summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/preempt.h5
-rw-r--r--include/linux/init_task.h10
-rw-r--r--include/linux/kthread.h1
-rw-r--r--include/linux/preempt.h19
-rw-r--r--include/linux/sched.h98
-rw-r--r--include/linux/stop_machine.h28
-rw-r--r--include/trace/events/sched.h30
7 files changed, 97 insertions, 94 deletions
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index d0a7a4753db2..0bec580a4885 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void)
/*
* Returns true when we need to resched and can (barring IRQ state).
*/
-static __always_inline bool should_resched(void)
+static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(!preempt_count() && tif_need_resched());
+ return unlikely(preempt_count() == preempt_offset &&
+ tif_need_resched());
}
#ifdef CONFIG_PREEMPT
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e8493fee8160..d0b380ee7d67 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -32,6 +32,14 @@ extern struct fs_struct init_fs;
#define INIT_CPUSET_SEQ(tsk)
#endif
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+#define INIT_PREV_CPUTIME(x) .prev_cputime = { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(x.prev_cputime.lock), \
+},
+#else
+#define INIT_PREV_CPUTIME(x)
+#endif
+
#define INIT_SIGNALS(sig) { \
.nr_threads = 1, \
.thread_head = LIST_HEAD_INIT(init_task.thread_node), \
@@ -46,6 +54,7 @@ extern struct fs_struct init_fs;
.cputime_atomic = INIT_CPUTIME_ATOMIC, \
.running = 0, \
}, \
+ INIT_PREV_CPUTIME(sig) \
.cred_guard_mutex = \
__MUTEX_INITIALIZER(sig.cred_guard_mutex), \
}
@@ -246,6 +255,7 @@ extern struct task_group root_task_group;
INIT_TASK_RCU_TASKS(tsk) \
INIT_CPUSET_SEQ(tsk) \
INIT_RT_MUTEXES(tsk) \
+ INIT_PREV_CPUTIME(tsk) \
INIT_VTIME(tsk) \
INIT_NUMA_BALANCING(tsk) \
INIT_KASAN(tsk) \
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 13d55206ccf6..869b21dcf503 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -38,6 +38,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
})
void kthread_bind(struct task_struct *k, unsigned int cpu);
+void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
bool kthread_should_stop(void);
bool kthread_should_park(void);
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 84991f185173..bea8dd8ff5e0 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -84,13 +84,21 @@
*/
#define in_nmi() (preempt_count() & NMI_MASK)
+/*
+ * The preempt_count offset after preempt_disable();
+ */
#if defined(CONFIG_PREEMPT_COUNT)
-# define PREEMPT_DISABLE_OFFSET 1
+# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET
#else
-# define PREEMPT_DISABLE_OFFSET 0
+# define PREEMPT_DISABLE_OFFSET 0
#endif
/*
+ * The preempt_count offset after spin_lock()
+ */
+#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET
+
+/*
* The preempt_count offset needed for things like:
*
* spin_lock_bh()
@@ -103,7 +111,7 @@
*
* Work as expected.
*/
-#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET)
+#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET)
/*
* Are we running in atomic context? WARNING: this macro cannot
@@ -124,7 +132,8 @@
#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
extern void preempt_count_add(int val);
extern void preempt_count_sub(int val);
-#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); })
+#define preempt_count_dec_and_test() \
+ ({ preempt_count_sub(1); should_resched(0); })
#else
#define preempt_count_add(val) __preempt_count_add(val)
#define preempt_count_sub(val) __preempt_count_sub(val)
@@ -184,7 +193,7 @@ do { \
#define preempt_check_resched() \
do { \
- if (should_resched()) \
+ if (should_resched(0)) \
__preempt_schedule(); \
} while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04b5ada460b4..119823decc46 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -530,39 +530,49 @@ struct cpu_itimer {
};
/**
- * struct cputime - snaphsot of system and user cputime
+ * struct prev_cputime - snaphsot of system and user cputime
* @utime: time spent in user mode
* @stime: time spent in system mode
+ * @lock: protects the above two fields
*
- * Gathers a generic snapshot of user and system time.
+ * Stores previous user/system time values such that we can guarantee
+ * monotonicity.
*/
-struct cputime {
+struct prev_cputime {
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
cputime_t utime;
cputime_t stime;
+ raw_spinlock_t lock;
+#endif
};
+static inline void prev_cputime_init(struct prev_cputime *prev)
+{
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+ prev->utime = prev->stime = 0;
+ raw_spin_lock_init(&prev->lock);
+#endif
+}
+
/**
* struct task_cputime - collected CPU time counts
* @utime: time spent in user mode, in &cputime_t units
* @stime: time spent in kernel mode, in &cputime_t units
* @sum_exec_runtime: total time spent on the CPU, in nanoseconds
*
- * This is an extension of struct cputime that includes the total runtime
- * spent by the task from the scheduler point of view.
- *
- * As a result, this structure groups together three kinds of CPU time
- * that are tracked for threads and thread groups. Most things considering
- * CPU time want to group these counts together and treat all three
- * of them in parallel.
+ * This structure groups together three kinds of CPU time that are tracked for
+ * threads and thread groups. Most things considering CPU time want to group
+ * these counts together and treat all three of them in parallel.
*/
struct task_cputime {
cputime_t utime;
cputime_t stime;
unsigned long long sum_exec_runtime;
};
+
/* Alternate field names when used to cache expirations. */
-#define prof_exp stime
#define virt_exp utime
+#define prof_exp stime
#define sched_exp sum_exec_runtime
#define INIT_CPUTIME \
@@ -715,9 +725,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- struct cputime prev_cputime;
-#endif
+ struct prev_cputime prev_cputime;
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
unsigned long inblock, oublock, cinblock, coublock;
@@ -1167,29 +1175,24 @@ struct load_weight {
u32 inv_weight;
};
+/*
+ * The load_avg/util_avg accumulates an infinite geometric series.
+ * 1) load_avg factors the amount of time that a sched_entity is
+ * runnable on a rq into its weight. For cfs_rq, it is the aggregated
+ * such weights of all runnable and blocked sched_entities.
+ * 2) util_avg factors frequency scaling into the amount of time
+ * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE].
+ * For cfs_rq, it is the aggregated such times of all runnable and
+ * blocked sched_entities.
+ * The 64 bit load_sum can:
+ * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with
+ * the highest weight (=88761) always runnable, we should not overflow
+ * 2) for entity, support any load.weight always runnable
+ */
struct sched_avg {
- u64 last_runnable_update;
- s64 decay_count;
- /*
- * utilization_avg_contrib describes the amount of time that a
- * sched_entity is running on a CPU. It is based on running_avg_sum
- * and is scaled in the range [0..SCHED_LOAD_SCALE].
- * load_avg_contrib described the amount of time that a sched_entity
- * is runnable on a rq. It is based on both runnable_avg_sum and the
- * weight of the task.
- */
- unsigned long load_avg_contrib, utilization_avg_contrib;
- /*
- * These sums represent an infinite geometric series and so are bound
- * above by 1024/(1-y). Thus we only need a u32 to store them for all
- * choices of y < 1-2^(-32)*1024.
- * running_avg_sum reflects the time that the sched_entity is
- * effectively running on the CPU.
- * runnable_avg_sum represents the amount of time a sched_entity is on
- * a runqueue which includes the running time that is monitored by
- * running_avg_sum.
- */
- u32 runnable_avg_sum, avg_period, running_avg_sum;
+ u64 last_update_time, load_sum;
+ u32 util_sum, period_contrib;
+ unsigned long load_avg, util_avg;
};
#ifdef CONFIG_SCHEDSTATS
@@ -1255,7 +1258,7 @@ struct sched_entity {
#endif
#ifdef CONFIG_SMP
- /* Per-entity load-tracking */
+ /* Per entity load average tracking */
struct sched_avg avg;
#endif
};
@@ -1351,9 +1354,9 @@ struct task_struct {
#ifdef CONFIG_SMP
struct llist_node wake_entry;
int on_cpu;
- struct task_struct *last_wakee;
- unsigned long wakee_flips;
+ unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
+ struct task_struct *last_wakee;
int wake_cpu;
#endif
@@ -1481,9 +1484,7 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- struct cputime prev_cputime;
-#endif
+ struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_t vtime_seqlock;
unsigned long long vtime_snap;
@@ -2214,13 +2215,6 @@ static inline void calc_load_enter_idle(void) { }
static inline void calc_load_exit_idle(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
-#ifndef CONFIG_CPUMASK_OFFSTACK
-static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
-{
- return set_cpus_allowed_ptr(p, &new_mask);
-}
-#endif
-
/*
* Do not use outside of architecture code which knows its limitations.
*
@@ -2897,12 +2891,6 @@ extern int _cond_resched(void);
extern int __cond_resched_lock(spinlock_t *lock);
-#ifdef CONFIG_PREEMPT_COUNT
-#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET
-#else
-#define PREEMPT_LOCK_OFFSET 0
-#endif
-
#define cond_resched_lock(lock) ({ \
___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
__cond_resched_lock(lock); \
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index d2abbdb8c6aa..414d924318ce 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -112,25 +112,13 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
*
* This can be thought of as a very heavy write lock, equivalent to
* grabbing every spinlock in the kernel. */
-int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
+int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
-/**
- * __stop_machine: freeze the machine on all CPUs and run this function
- * @fn: the function to run
- * @data: the data ptr for the @fn
- * @cpus: the cpus to run the @fn() on (NULL = any online cpu)
- *
- * Description: This is a special version of the above, which assumes cpus
- * won't come or go while it's being called. Used by hotplug cpu.
- */
-int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
-
-int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
-
#else /* CONFIG_STOP_MACHINE && CONFIG_SMP */
-static inline int __stop_machine(int (*fn)(void *), void *data,
+static inline int stop_machine(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
unsigned long flags;
@@ -141,16 +129,10 @@ static inline int __stop_machine(int (*fn)(void *), void *data,
return ret;
}
-static inline int stop_machine(int (*fn)(void *), void *data,
- const struct cpumask *cpus)
-{
- return __stop_machine(fn, data, cpus);
-}
-
-static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data,
+static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
- return __stop_machine(fn, data, cpus);
+ return stop_machine(fn, data, cpus);
}
#endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index d57a575fe31f..539d6bc3216a 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -55,9 +55,9 @@ TRACE_EVENT(sched_kthread_stop_ret,
*/
DECLARE_EVENT_CLASS(sched_wakeup_template,
- TP_PROTO(struct task_struct *p, int success),
+ TP_PROTO(struct task_struct *p),
- TP_ARGS(__perf_task(p), success),
+ TP_ARGS(__perf_task(p)),
TP_STRUCT__entry(
__array( char, comm, TASK_COMM_LEN )
@@ -71,25 +71,37 @@ DECLARE_EVENT_CLASS(sched_wakeup_template,
memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
__entry->pid = p->pid;
__entry->prio = p->prio;
- __entry->success = success;
+ __entry->success = 1; /* rudiment, kill when possible */
__entry->target_cpu = task_cpu(p);
),
- TP_printk("comm=%s pid=%d prio=%d success=%d target_cpu=%03d",
+ TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
__entry->comm, __entry->pid, __entry->prio,
- __entry->success, __entry->target_cpu)
+ __entry->target_cpu)
);
+/*
+ * Tracepoint called when waking a task; this tracepoint is guaranteed to be
+ * called from the waking context.
+ */
+DEFINE_EVENT(sched_wakeup_template, sched_waking,
+ TP_PROTO(struct task_struct *p),
+ TP_ARGS(p));
+
+/*
+ * Tracepoint called when the task is actually woken; p->state == TASK_RUNNNG.
+ * It it not always called from the waking context.
+ */
DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
- TP_PROTO(struct task_struct *p, int success),
- TP_ARGS(p, success));
+ TP_PROTO(struct task_struct *p),
+ TP_ARGS(p));
/*
* Tracepoint for waking up a new task:
*/
DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
- TP_PROTO(struct task_struct *p, int success),
- TP_ARGS(p, success));
+ TP_PROTO(struct task_struct *p),
+ TP_ARGS(p));
#ifdef CREATE_TRACE_POINTS
static inline long __trace_sched_switch_state(struct task_struct *p)