aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/idle/mrst_s0i3.c
blob: 6dc37d8b5bcff8ec02aab555f9e0ec0ff36d719c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
/*
 * mrst_s0i3.c - super-deep sleep state for the Moorestown MID platform
 *
 * Copyright (c) 2010, Intel Corporation.
 * H. Peter Anvin <hpa@linux.intel.com>
 * Len Brown <len.brown@intel.com>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 */

#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/clockchips.h>
#include <linux/hrtimer.h>	/* ktime_get_real() */
#include <linux/pci.h>
#include <linux/cpu.h>
#include <trace/events/power.h>
#include <linux/sched.h>
#include <linux/suspend.h>
#include <linux/sfi.h>
#include <linux/memblock.h>
#include <asm/apic.h>
#include <asm/i387.h>
#include <asm/msr.h>
#include <asm/mtrr.h>
#include <asm/mwait.h>
#include "mrst_s0i3.h"

/* PMU register interface */
struct mrst_pmu_reg {
	u32 pm_sts;		/* 0x00 */
	u32 pm_cmd;		/* 0x04 */
	u32 pm_ics;		/* 0x08 */
	u32 _resv1;
	u32 pm_wkc[2];		/* 0x10 */
	u32 pm_wks[2];		/* 0x18 */
	u32 pm_ssc[4];		/* 0x20 */
	u32 pm_sss[4];		/* 0x30 */
	u32 pm_wssc[4];		/* 0x40 */
	u32 pm_c3c4;		/* 0x50 */
	u32 pm_c5c6;		/* 0x54 */
	u32 pm_msic;		/* 0x58 */
};

static void do_s0i3(void);
static volatile struct mrst_pmu_reg *pmu_reg;
static struct pci_dev *pmu_dev;		/* South Complex PMU unit */
static u64 *wakeup_ptr;
static phys_addr_t s0i3_trampoline_phys;
static void *s0i3_trampoline_base;
static volatile bool s0i3_pmu_command_pending;

/**
 * mrst_check_state_availability
 * @dev: cpuidle_device
 *
 * Certain states are not appropriate for governor to pick in some cases.
 * This function will be called as cpuidle_device's prepare callback and
 * thus tells governor to ignore such states when selecting the next state
 * to enter.
 */
int mrst_check_state_availability(struct cpuidle_device *dev)
{
	int cpu = smp_processor_id();

	/*
	 * If there is another CPU running, the GPU is active,
	 * the PMU is uninitialized, or there is a still-unprocessed
	 * PMU command, we cannot enter S0i3.
	 */
	if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
	    s0i3_pmu_command_pending)
		dev->states[5].flags |= CPUIDLE_FLAG_IGNORE;
	else
		dev->states[5].flags &= ~CPUIDLE_FLAG_IGNORE;

	/*
	 * If there is a pending PMU command, we cannot enter C6.
	 */
	if (s0i3_pmu_command_pending)
		dev->states[4].flags |= CPUIDLE_FLAG_IGNORE;
	else
		dev->states[4].flags &= ~CPUIDLE_FLAG_IGNORE;

	return 0;
}

/**
 * mrst_idle
 * @dev: cpuidle_device
 * @state: cpuidle state
 *
 * This enters S0i3, C6 or C4 depending on what is currently permitted.
 * C1-C4 are handled via the normal intel_idle entry.
 */
int mrst_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
{
	unsigned long ecx = 1; /* break on interrupt flag */
	unsigned long eax = (unsigned long)cpuidle_get_statedata(state);
	ktime_t kt_before, kt_after;
	s64 usec_delta;
	int cpu = smp_processor_id();

	local_irq_disable();

	/*
	 * leave_mm() to avoid costly and often unnecessary wakeups
	 * for flushing the user TLB's associated with the active mm.
	 */
	if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED)
		leave_mm(cpu);

	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);

	kt_before = ktime_get_real();

	stop_critical_timings();

	if (!need_resched()) {
		if (eax == -1UL) {
			do_s0i3();
		} else {
			/* Conventional MWAIT */

			__monitor((void *)&current_thread_info()->flags, 0, 0);
			smp_mb();
			if (!need_resched())
				__mwait(eax, ecx);
		}
	}

	start_critical_timings();

	kt_after = ktime_get_real();
	usec_delta = ktime_to_us(ktime_sub(kt_after, kt_before));

	local_irq_enable();

	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);

	return usec_delta;
}

/*
 * List of MSRs to be saved/restored, *other* than what is handled by
 * * save_processor_state/restore_processor_state.  * This is
 * specific to Langwell, and any future processors would need a new
 * list.
 *
 * XXX: check how much on this list is actually necessary.
 */
static const u32 s0i3_msr_list[] =
{
	MSR_IA32_EBL_CR_POWERON,
	MSR_IA32_FEATURE_CONTROL,
	MSR_IA32_PERFCTR0,
	MSR_IA32_PERFCTR1,
	MSR_IA32_MPERF,
	MSR_IA32_THERM_INTERRUPT,
	MSR_CORE_PERF_FIXED_CTR0,
	MSR_CORE_PERF_FIXED_CTR1,
	MSR_CORE_PERF_FIXED_CTR2,
	MSR_IA32_DS_AREA,
	MSR_IA32_CR_PAT,
};

static struct msr s0i3_msr_data[ARRAY_SIZE(s0i3_msr_list)];

static void s0i3_save_msrs(void)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(s0i3_msr_list); i++)
		s0i3_msr_data[i].q = native_read_msr(s0i3_msr_list[i]);
}

static void s0i3_restore_msrs(void)
{
	int i;

	for (i = ARRAY_SIZE(s0i3_msr_list) - 1; i >= 0; i--)
		native_write_msr(s0i3_msr_list[i],
				 s0i3_msr_data[i].l, s0i3_msr_data[i].h);
}

/*
 * List of APIC registers to be saved/restored.
 * XXX: Verify that this list is actually complete.
 * XXX: Try to figure out a better way to do this using kernel facilities.
 *
 * Note: these are open-coded to minimize delay and therefore improve the
 * power consumption.
 */
static const u32 s0i3_lapic_list[] =
{
	APIC_ID,
	APIC_TASKPRI,
	APIC_LDR,
	APIC_DFR,
	APIC_SPIV,
	APIC_ICR,
	APIC_ICR2,
	APIC_LVTT,
	APIC_LVTTHMR,
	APIC_LVTPC,
	APIC_LVT0,
	APIC_LVT1,
	APIC_LVTERR,
	APIC_TMICT,
	APIC_TMCCT,		/* Current timer count... questionable */
	APIC_TDCR
};

static u32 s0i3_lapic_data[ARRAY_SIZE(s0i3_msr_list)];

static void s0i3_save_lapic(void)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(s0i3_lapic_list); i++) {
		volatile u32 *addr = (volatile u32 *)
			(APIC_BASE + s0i3_lapic_list[i]);

		s0i3_lapic_data[i] = readl(addr);
	}
}

static void s0i3_restore_lapic(void)
{
	int i;

	for (i = ARRAY_SIZE(s0i3_msr_list) - 1; i >= 0; i--) {
		volatile u32 *addr = (volatile u32 *)
			(APIC_BASE + s0i3_lapic_list[i]);

		writel(s0i3_lapic_data[i], addr);
	}
}

/*
 * Leaving S0i3 will have put the other CPU thread into wait for SIPI;
 * we need to put it back into C6 in order to be able to use S0i3
 * again.
 *
 * XXX: this should probably be turned into a
 * mrst_wakeup_secondary_cpu function.
 */
static void s0i3_poke_other_cpu(void)
{
	const struct init_wakeup_delays delays = {
		.assert_init	= 0,
		.icr_accept	= 30,
		.cpu_accept	= 20,
	};

	wakeup_secondary_cpu_via_init_delays(1, s0i3_trampoline_phys, &delays);
}

/*
 * Send a command to the PMU to shut down the south complex
 */

#define WAKE_CAPABLE		0x80000000
#define AUTO_CLK_GATE_VALUE	0x00555551
#define SUB_SYS_D0I2_VALUE	0x00aaaaaa
#define WAKE_ENABLE_VALUE	0x0786 /* 0x4786? */
#define SUSPEND_GFX             0xc
#define PM_S0I3_COMMAND				\
  ((0 << 31) |	/* Reserved */			\
   (0 << 30) |	/* Core must be idle */		\
   (0xc2 << 22) |	/* ACK C6 trigger */	\
   (3 << 19) |	/* Trigger on DMI message */	\
   (3 << 16) |	/* Enter S0i3 */		\
   (0 << 13) |	/* Numeric mode ID (sw) */	\
   (3 << 9) |	/* Trigger mode */		\
   (0 << 8) |	/* Do not interrupt */		\
   (1 << 0))	/* Set configuration */

static void s0i3_wait_for_pmu(void)
{
	while (readl(&pmu_reg->pm_sts) & (1 << 8))
		cpu_relax();
}

noinline static void s0i3_prep_pmu(void)
{
	s0i3_wait_for_pmu();

	/* Clear any possible error conditions */
	writel(0x300, &pmu_reg->pm_ics);

	/* Program the wakeup */
	writel(WAKE_ENABLE_VALUE, &pmu_reg->pm_wkc[0]);
	writel(AUTO_CLK_GATE_VALUE, &pmu_reg->pm_wssc[0]);

	/* Clock gate Langwell */
	writel(SUB_SYS_D0I2_VALUE, &pmu_reg->pm_ssc[0]);

	/* Avoid entering conventional C6 until the PMU command has cleared */
	s0i3_pmu_command_pending = true;
}

static inline void s0i3_update_wake_pointer(void)
{
	*wakeup_ptr = virt_to_phys(mrst_s0i3_resume);
}

static noinline void do_s0i3(void)
{
	s0i3_update_wake_pointer();
	s0i3_save_lapic();
	s0i3_save_msrs();
	save_processor_state();
	s0i3_prep_pmu();
	if (mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd)) {
		s0i3_restore_msrs();
		restore_processor_state();
		s0i3_restore_lapic();

		/* The PMU command executed correctly, so no longer pending */
		s0i3_pmu_command_pending = false;

		/* HACK HACK HACK Enable MSI interrupts again */
		writel(0x0, &pmu_reg->pm_msic);

		s0i3_poke_other_cpu();
	} else {
		/* save_processor_state() did execute kernel_fpu_begin() */
		kernel_fpu_end();
	}
}

static int s0i3_sfi_parse_wake(struct sfi_table_header *table)
{
	struct sfi_table_simple *sb;
	struct sfi_wake_table_entry *pentry;
	int num;

	sb = (struct sfi_table_simple *)table;
	pentry = (struct sfi_wake_table_entry *)sb->pentry;
	num = SFI_GET_NUM_ENTRIES(sb, struct sfi_wake_table_entry);

	if (num < 1)		/* num == 1? */
		return -EINVAL;

	wakeup_ptr = ioremap_cache(pentry->phys_addr, 8);

	printk("s0i3: wakeup pointer at 0x%llx mapped to %p\n",
	       pentry->phys_addr, wakeup_ptr);

	return wakeup_ptr ? 0 : -ENOMEM;
}

/*
 * Interrupt handler.  The purpose of the interrupt is to break us
 * out of an error condition where we ended up in C6 rather than S0i3;
 * the cpuidle loop will then retry the S0i3 condition at a later time.
 */
static irqreturn_t s0i3_pmu_irq(int irq, void *dummy)
{
	u32 status;

	(void)dummy;

	status = readl(&pmu_reg->pm_ics);

	if (!(status & 0x200))
		return IRQ_NONE; /* Not ours */

	/* Clear the status */
	writel(status, &pmu_reg->pm_ics);
	s0i3_pmu_command_pending = false;

	return IRQ_HANDLED;
}

/*
 * Reserve memory for the return-to-C6 trampoline.  This is called
 * extremely early in initialization in order to allocate low memory.
 *
 * XXX: Replace this with unified trampoline code.
 */
extern const char s0i3_trampoline_data[], s0i3_trampoline_data_end[];

void __init s0i3_reserve_memory(void)
{
        phys_addr_t mem;
	size_t size;

	size = s0i3_trampoline_data_end - s0i3_trampoline_data;
	size = ALIGN(size, PAGE_SIZE);

        /* Has to be in very low memory so we can execute real-mode AP code. */
        mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
        if (mem == MEMBLOCK_ERROR)
                panic("Cannot allocate S0i3 trampoline\n");

	s0i3_trampoline_phys = mem;
        s0i3_trampoline_base = __va(mem);
        memblock_x86_reserve_range(mem, mem + size, "S0I3");
}

/* Hacky - should be replaced with a registered PCI ID driver */
static int __init s0i3_prepare(void)
{
	int err;
	volatile struct mrst_pmu_reg *pmu;
	u32 tmp;

	/* Map the PMU unit */
	pmu_dev = pci_get_device(0x8086, 0x0810, NULL);
	if (!pmu_dev)
		return -ENODEV;

	err = pci_enable_device(pmu_dev);
	if (err)
		return err;

	err = pci_request_regions(pmu_dev, "mrst_s0i3");
	if (err)
		goto err_disable_pdev;

	wakeup_ptr = NULL;
	err = sfi_table_parse(SFI_SIG_WAKE, NULL, NULL, s0i3_sfi_parse_wake);
	if (err)
		goto err_disable_pdev;

	pmu = pci_iomap(pmu_dev, 0, 0);
	if (!pmu) {
		err = -ENOMEM;
		goto err_disable_pdev;
	}

	err = request_irq(pmu_dev->irq, s0i3_pmu_irq, 0, "mrst_s0i3", NULL);
	if (err)
		goto err_disable_pdev;

	pmu_reg = pmu;

	/* Enable auto clockgating for the south complex (Langwell) */
	s0i3_wait_for_pmu();
	writel(AUTO_CLK_GATE_VALUE, &pmu_reg->pm_ssc[0]);
	writel(0x201, &pmu_reg->pm_cmd);

	/* Enable the hardware interrupt */
	tmp = readl(&pmu_reg->pm_ics);
	tmp |= 0x100;		/* Enable interrupts */
	writel(tmp, &pmu_reg->pm_ics);

	/* Set up the return-to-C6 code trampoline in low memory */
	memcpy(s0i3_trampoline_base, s0i3_trampoline_data,
	       s0i3_trampoline_data_end - s0i3_trampoline_data);

	return 0;

err_disable_pdev:
	pci_disable_device(pmu_dev);
	return err;
}

device_initcall(s0i3_prepare);