1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
|
/*
* processor_idle - idle state cpuidle driver.
* Adapted from drivers/idle/intel_idle.c and
* drivers/acpi/processor_idle.c
*
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/moduleparam.h>
#include <linux/cpuidle.h>
#include <linux/cpu.h>
#include <asm/paca.h>
#include <asm/reg.h>
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/runlatch.h>
#include "plpar_wrappers.h"
#include "pseries.h"
struct cpuidle_driver pseries_idle_driver = {
.name = "pseries_idle",
.owner = THIS_MODULE,
};
#define MAX_IDLE_STATE_COUNT 2
static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
static struct cpuidle_device __percpu *pseries_cpuidle_devices;
static struct cpuidle_state *cpuidle_state_table;
void update_smt_snooze_delay(int snooze)
{
struct cpuidle_driver *drv = cpuidle_get_driver();
if (drv)
drv->states[0].target_residency = snooze;
}
static inline void idle_loop_prolog(unsigned long *in_purr, ktime_t *kt_before)
{
*kt_before = ktime_get_real();
*in_purr = mfspr(SPRN_PURR);
/*
* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch.
*/
get_lppaca()->idle = 1;
}
static inline s64 idle_loop_epilog(unsigned long in_purr, ktime_t kt_before)
{
get_lppaca()->wait_state_cycles += mfspr(SPRN_PURR) - in_purr;
get_lppaca()->idle = 0;
return ktime_to_us(ktime_sub(ktime_get_real(), kt_before));
}
static int snooze_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
unsigned long in_purr;
ktime_t kt_before;
unsigned long start_snooze;
long snooze = drv->states[0].target_residency;
idle_loop_prolog(&in_purr, &kt_before);
if (snooze) {
start_snooze = get_tb() + snooze * tb_ticks_per_usec;
local_irq_enable();
set_thread_flag(TIF_POLLING_NRFLAG);
while ((snooze < 0) || (get_tb() < start_snooze)) {
if (need_resched() || cpu_is_offline(dev->cpu))
goto out;
ppc64_runlatch_off();
HMT_low();
HMT_very_low();
}
HMT_medium();
clear_thread_flag(TIF_POLLING_NRFLAG);
smp_mb();
local_irq_disable();
}
out:
HMT_medium();
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
}
static void check_and_cede_processor(void)
{
/*
* Interrupts are soft-disabled at this point,
* but not hard disabled. So an interrupt might have
* occurred before entering NAP, and would be potentially
* lost (edge events, decrementer events, etc...) unless
* we first hard disable then check.
*/
hard_irq_disable();
if (!lazy_irq_pending())
cede_processor();
}
static int dedicated_cede_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
unsigned long in_purr;
ktime_t kt_before;
idle_loop_prolog(&in_purr, &kt_before);
get_lppaca()->donate_dedicated_cpu = 1;
ppc64_runlatch_off();
HMT_medium();
check_and_cede_processor();
get_lppaca()->donate_dedicated_cpu = 0;
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
}
static int shared_cede_loop(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
unsigned long in_purr;
ktime_t kt_before;
idle_loop_prolog(&in_purr, &kt_before);
/*
* Yield the processor to the hypervisor. We return if
* an external interrupt occurs (which are driven prior
* to returning here) or if a prod occurs from another
* processor. When returning here, external interrupts
* are enabled.
*/
check_and_cede_processor();
dev->last_residency =
(int)idle_loop_epilog(in_purr, kt_before);
return index;
}
/*
* States for dedicated partition case.
*/
static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
{ /* Snooze */
.name = "snooze",
.desc = "snooze",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 0,
.target_residency = 0,
.enter = &snooze_loop },
{ /* CEDE */
.name = "CEDE",
.desc = "CEDE",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1,
.target_residency = 10,
.enter = &dedicated_cede_loop },
};
/*
* States for shared partition case.
*/
static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
{ /* Shared Cede */
.name = "Shared Cede",
.desc = "Shared Cede",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 0,
.target_residency = 0,
.enter = &shared_cede_loop },
};
int pseries_notify_cpuidle_add_cpu(int cpu)
{
struct cpuidle_device *dev =
per_cpu_ptr(pseries_cpuidle_devices, cpu);
if (dev && cpuidle_get_driver()) {
cpuidle_disable_device(dev);
cpuidle_enable_device(dev);
}
return 0;
}
/*
* pseries_cpuidle_driver_init()
*/
static int pseries_cpuidle_driver_init(void)
{
int idle_state;
struct cpuidle_driver *drv = &pseries_idle_driver;
drv->state_count = 0;
for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
if (idle_state > max_idle_state)
break;
/* is the state not enabled? */
if (cpuidle_state_table[idle_state].enter == NULL)
continue;
drv->states[drv->state_count] = /* structure copy */
cpuidle_state_table[idle_state];
if (cpuidle_state_table == dedicated_states)
drv->states[drv->state_count].target_residency =
__get_cpu_var(smt_snooze_delay);
drv->state_count += 1;
}
return 0;
}
/* pseries_idle_devices_uninit(void)
* unregister cpuidle devices and de-allocate memory
*/
static void pseries_idle_devices_uninit(void)
{
int i;
struct cpuidle_device *dev;
for_each_possible_cpu(i) {
dev = per_cpu_ptr(pseries_cpuidle_devices, i);
cpuidle_unregister_device(dev);
}
free_percpu(pseries_cpuidle_devices);
return;
}
/* pseries_idle_devices_init()
* allocate, initialize and register cpuidle device
*/
static int pseries_idle_devices_init(void)
{
int i;
struct cpuidle_driver *drv = &pseries_idle_driver;
struct cpuidle_device *dev;
pseries_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (pseries_cpuidle_devices == NULL)
return -ENOMEM;
for_each_possible_cpu(i) {
dev = per_cpu_ptr(pseries_cpuidle_devices, i);
dev->state_count = drv->state_count;
dev->cpu = i;
if (cpuidle_register_device(dev)) {
printk(KERN_DEBUG \
"cpuidle_register_device %d failed!\n", i);
return -EIO;
}
}
return 0;
}
/*
* pseries_idle_probe()
* Choose state table for shared versus dedicated partition
*/
static int pseries_idle_probe(void)
{
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return -ENODEV;
if (cpuidle_disable != IDLE_NO_OVERRIDE)
return -ENODEV;
if (max_idle_state == 0) {
printk(KERN_DEBUG "pseries processor idle disabled.\n");
return -EPERM;
}
if (get_lppaca()->shared_proc)
cpuidle_state_table = shared_states;
else
cpuidle_state_table = dedicated_states;
return 0;
}
static int __init pseries_processor_idle_init(void)
{
int retval;
retval = pseries_idle_probe();
if (retval)
return retval;
pseries_cpuidle_driver_init();
retval = cpuidle_register_driver(&pseries_idle_driver);
if (retval) {
printk(KERN_DEBUG "Registration of pseries driver failed.\n");
return retval;
}
retval = pseries_idle_devices_init();
if (retval) {
pseries_idle_devices_uninit();
cpuidle_unregister_driver(&pseries_idle_driver);
return retval;
}
printk(KERN_DEBUG "pseries_idle_driver registered\n");
return 0;
}
static void __exit pseries_processor_idle_exit(void)
{
pseries_idle_devices_uninit();
cpuidle_unregister_driver(&pseries_idle_driver);
return;
}
module_init(pseries_processor_idle_init);
module_exit(pseries_processor_idle_exit);
MODULE_AUTHOR("Deepthi Dharwar <deepthi@linux.vnet.ibm.com>");
MODULE_DESCRIPTION("Cpuidle driver for POWER");
MODULE_LICENSE("GPL");
|