diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/preempt.h | 5 | ||||
-rw-r--r-- | include/linux/init_task.h | 10 | ||||
-rw-r--r-- | include/linux/kthread.h | 1 | ||||
-rw-r--r-- | include/linux/preempt.h | 19 | ||||
-rw-r--r-- | include/linux/sched.h | 98 | ||||
-rw-r--r-- | include/linux/stop_machine.h | 28 | ||||
-rw-r--r-- | include/trace/events/sched.h | 30 |
7 files changed, 97 insertions, 94 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d0a7a4753db2..0bec580a4885 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!preempt_count() && tif_need_resched()); + return unlikely(preempt_count() == preempt_offset && + tif_need_resched()); } #ifdef CONFIG_PREEMPT diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e8493fee8160..d0b380ee7d67 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,6 +32,14 @@ extern struct fs_struct init_fs; #define INIT_CPUSET_SEQ(tsk) #endif +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#define INIT_PREV_CPUTIME(x) .prev_cputime = { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(x.prev_cputime.lock), \ +}, +#else +#define INIT_PREV_CPUTIME(x) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ @@ -46,6 +54,7 @@ extern struct fs_struct init_fs; .cputime_atomic = INIT_CPUTIME_ATOMIC, \ .running = 0, \ }, \ + INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } @@ -246,6 +255,7 @@ extern struct task_group root_task_group; INIT_TASK_RCU_TASKS(tsk) \ INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ + INIT_PREV_CPUTIME(tsk) \ INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ INIT_KASAN(tsk) \ diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 13d55206ccf6..869b21dcf503 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -38,6 +38,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), }) void kthread_bind(struct task_struct *k, unsigned int cpu); +void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask); int kthread_stop(struct task_struct *k); bool kthread_should_stop(void); bool kthread_should_park(void); diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 84991f185173..bea8dd8ff5e0 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -84,13 +84,21 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * The preempt_count offset after preempt_disable(); + */ #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_DISABLE_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET #else -# define PREEMPT_DISABLE_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif /* + * The preempt_count offset after spin_lock() + */ +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET + +/* * The preempt_count offset needed for things like: * * spin_lock_bh() @@ -103,7 +111,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot @@ -124,7 +132,8 @@ #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); -#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) +#define preempt_count_dec_and_test() \ + ({ preempt_count_sub(1); should_resched(0); }) #else #define preempt_count_add(val) __preempt_count_add(val) #define preempt_count_sub(val) __preempt_count_sub(val) @@ -184,7 +193,7 @@ do { \ #define preempt_check_resched() \ do { \ - if (should_resched()) \ + if (should_resched(0)) \ __preempt_schedule(); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 04b5ada460b4..119823decc46 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -530,39 +530,49 @@ struct cpu_itimer { }; /** - * struct cputime - snaphsot of system and user cputime + * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode + * @lock: protects the above two fields * - * Gathers a generic snapshot of user and system time. + * Stores previous user/system time values such that we can guarantee + * monotonicity. */ -struct cputime { +struct prev_cputime { +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE cputime_t utime; cputime_t stime; + raw_spinlock_t lock; +#endif }; +static inline void prev_cputime_init(struct prev_cputime *prev) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + prev->utime = prev->stime = 0; + raw_spin_lock_init(&prev->lock); +#endif +} + /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * - * This is an extension of struct cputime that includes the total runtime - * spent by the task from the scheduler point of view. - * - * As a result, this structure groups together three kinds of CPU time - * that are tracked for threads and thread groups. Most things considering - * CPU time want to group these counts together and treat all three - * of them in parallel. + * This structure groups together three kinds of CPU time that are tracked for + * threads and thread groups. Most things considering CPU time want to group + * these counts together and treat all three of them in parallel. */ struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; + /* Alternate field names when used to cache expirations. */ -#define prof_exp stime #define virt_exp utime +#define prof_exp stime #define sched_exp sum_exec_runtime #define INIT_CPUTIME \ @@ -715,9 +725,7 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - struct cputime prev_cputime; -#endif + struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1167,29 +1175,24 @@ struct load_weight { u32 inv_weight; }; +/* + * The load_avg/util_avg accumulates an infinite geometric series. + * 1) load_avg factors the amount of time that a sched_entity is + * runnable on a rq into its weight. For cfs_rq, it is the aggregated + * such weights of all runnable and blocked sched_entities. + * 2) util_avg factors frequency scaling into the amount of time + * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. + * For cfs_rq, it is the aggregated such times of all runnable and + * blocked sched_entities. + * The 64 bit load_sum can: + * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with + * the highest weight (=88761) always runnable, we should not overflow + * 2) for entity, support any load.weight always runnable + */ struct sched_avg { - u64 last_runnable_update; - s64 decay_count; - /* - * utilization_avg_contrib describes the amount of time that a - * sched_entity is running on a CPU. It is based on running_avg_sum - * and is scaled in the range [0..SCHED_LOAD_SCALE]. - * load_avg_contrib described the amount of time that a sched_entity - * is runnable on a rq. It is based on both runnable_avg_sum and the - * weight of the task. - */ - unsigned long load_avg_contrib, utilization_avg_contrib; - /* - * These sums represent an infinite geometric series and so are bound - * above by 1024/(1-y). Thus we only need a u32 to store them for all - * choices of y < 1-2^(-32)*1024. - * running_avg_sum reflects the time that the sched_entity is - * effectively running on the CPU. - * runnable_avg_sum represents the amount of time a sched_entity is on - * a runqueue which includes the running time that is monitored by - * running_avg_sum. - */ - u32 runnable_avg_sum, avg_period, running_avg_sum; + u64 last_update_time, load_sum; + u32 util_sum, period_contrib; + unsigned long load_avg, util_avg; }; #ifdef CONFIG_SCHEDSTATS @@ -1255,7 +1258,7 @@ struct sched_entity { #endif #ifdef CONFIG_SMP - /* Per-entity load-tracking */ + /* Per entity load average tracking */ struct sched_avg avg; #endif }; @@ -1351,9 +1354,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; - struct task_struct *last_wakee; - unsigned long wakee_flips; + unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; int wake_cpu; #endif @@ -1481,9 +1484,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - struct cputime prev_cputime; -#endif + struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_t vtime_seqlock; unsigned long long vtime_snap; @@ -2214,13 +2215,6 @@ static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } #endif /* CONFIG_NO_HZ_COMMON */ -#ifndef CONFIG_CPUMASK_OFFSTACK -static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) -{ - return set_cpus_allowed_ptr(p, &new_mask); -} -#endif - /* * Do not use outside of architecture code which knows its limitations. * @@ -2897,12 +2891,6 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT_COUNT -#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET -#else -#define PREEMPT_LOCK_OFFSET 0 -#endif - #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index d2abbdb8c6aa..414d924318ce 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -112,25 +112,13 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); +int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); -/** - * __stop_machine: freeze the machine on all CPUs and run this function - * @fn: the function to run - * @data: the data ptr for the @fn - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) - * - * Description: This is a special version of the above, which assumes cpus - * won't come or go while it's being called. Used by hotplug cpu. - */ -int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); - -int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); - #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int __stop_machine(int (*fn)(void *), void *data, +static inline int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -141,16 +129,10 @@ static inline int __stop_machine(int (*fn)(void *), void *data, return ret; } -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) -{ - return __stop_machine(fn, data, cpus); -} - -static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { - return __stop_machine(fn, data, cpus); + return stop_machine(fn, data, cpus); } #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index d57a575fe31f..539d6bc3216a 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -55,9 +55,9 @@ TRACE_EVENT(sched_kthread_stop_ret, */ DECLARE_EVENT_CLASS(sched_wakeup_template, - TP_PROTO(struct task_struct *p, int success), + TP_PROTO(struct task_struct *p), - TP_ARGS(__perf_task(p), success), + TP_ARGS(__perf_task(p)), TP_STRUCT__entry( __array( char, comm, TASK_COMM_LEN ) @@ -71,25 +71,37 @@ DECLARE_EVENT_CLASS(sched_wakeup_template, memcpy(__entry->comm, p->comm, TASK_COMM_LEN); __entry->pid = p->pid; __entry->prio = p->prio; - __entry->success = success; + __entry->success = 1; /* rudiment, kill when possible */ __entry->target_cpu = task_cpu(p); ), - TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d", + TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d", __entry->comm, __entry->pid, __entry->prio, - __entry->success, __entry->target_cpu) + __entry->target_cpu) ); +/* + * Tracepoint called when waking a task; this tracepoint is guaranteed to be + * called from the waking context. + */ +DEFINE_EVENT(sched_wakeup_template, sched_waking, + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); + +/* + * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG. + * It it not always called from the waking context. + */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup, - TP_PROTO(struct task_struct *p, int success), - TP_ARGS(p, success)); + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); /* * Tracepoint for waking up a new task: */ DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new, - TP_PROTO(struct task_struct *p, int success), - TP_ARGS(p, success)); + TP_PROTO(struct task_struct *p), + TP_ARGS(p)); #ifdef CREATE_TRACE_POINTS static inline long __trace_sched_switch_state(struct task_struct *p) |