summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c67
-rw-r--r--kernel/audit.h8
-rw-r--r--kernel/auditsc.c25
-rw-r--r--kernel/bpf/core.c12
-rw-r--r--kernel/bpf/syscall.c8
-rw-r--r--kernel/bpf/verifier.c64
-rw-r--r--kernel/cgroup/cgroup.c9
-rw-r--r--kernel/irq/affinity.c20
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/locking/lockdep_internals.h6
-rw-r--r--kernel/padata.c5
-rw-r--r--kernel/ptrace.c14
-rw-r--r--kernel/sched/clock.c46
-rw-r--r--kernel/sched/cputime.c27
-rw-r--r--kernel/sched/sched.h9
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/trace/ftrace.c29
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c9
-rw-r--r--kernel/trace/trace.h2
20 files changed, 241 insertions, 149 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 2f4964cfde0b..a871bf80fde1 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -160,7 +160,6 @@ static LIST_HEAD(audit_freelist);
/* queue msgs to send via kauditd_task */
static struct sk_buff_head audit_queue;
-static void kauditd_hold_skb(struct sk_buff *skb);
/* queue msgs due to temporary unicast send problems */
static struct sk_buff_head audit_retry_queue;
/* queue msgs waiting for new auditd connection */
@@ -454,30 +453,6 @@ static void auditd_set(int pid, u32 portid, struct net *net)
}
/**
- * auditd_reset - Disconnect the auditd connection
- *
- * Description:
- * Break the auditd/kauditd connection and move all the queued records into the
- * hold queue in case auditd reconnects.
- */
-static void auditd_reset(void)
-{
- struct sk_buff *skb;
-
- /* if it isn't already broken, break the connection */
- rcu_read_lock();
- if (auditd_conn.pid)
- auditd_set(0, 0, NULL);
- rcu_read_unlock();
-
- /* flush all of the main and retry queues to the hold queue */
- while ((skb = skb_dequeue(&audit_retry_queue)))
- kauditd_hold_skb(skb);
- while ((skb = skb_dequeue(&audit_queue)))
- kauditd_hold_skb(skb);
-}
-
-/**
* kauditd_print_skb - Print the audit record to the ring buffer
* @skb: audit record
*
@@ -505,9 +480,6 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
{
/* put the record back in the queue at the same place */
skb_queue_head(&audit_hold_queue, skb);
-
- /* fail the auditd connection */
- auditd_reset();
}
/**
@@ -544,9 +516,6 @@ static void kauditd_hold_skb(struct sk_buff *skb)
/* we have no other options - drop the message */
audit_log_lost("kauditd hold queue overflow");
kfree_skb(skb);
-
- /* fail the auditd connection */
- auditd_reset();
}
/**
@@ -567,6 +536,30 @@ static void kauditd_retry_skb(struct sk_buff *skb)
}
/**
+ * auditd_reset - Disconnect the auditd connection
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the queued records into the
+ * hold queue in case auditd reconnects.
+ */
+static void auditd_reset(void)
+{
+ struct sk_buff *skb;
+
+ /* if it isn't already broken, break the connection */
+ rcu_read_lock();
+ if (auditd_conn.pid)
+ auditd_set(0, 0, NULL);
+ rcu_read_unlock();
+
+ /* flush all of the main and retry queues to the hold queue */
+ while ((skb = skb_dequeue(&audit_retry_queue)))
+ kauditd_hold_skb(skb);
+ while ((skb = skb_dequeue(&audit_queue)))
+ kauditd_hold_skb(skb);
+}
+
+/**
* auditd_send_unicast_skb - Send a record via unicast to auditd
* @skb: audit record
*
@@ -758,6 +751,7 @@ static int kauditd_thread(void *dummy)
NULL, kauditd_rehold_skb);
if (rc < 0) {
sk = NULL;
+ auditd_reset();
goto main_queue;
}
@@ -767,6 +761,7 @@ static int kauditd_thread(void *dummy)
NULL, kauditd_hold_skb);
if (rc < 0) {
sk = NULL;
+ auditd_reset();
goto main_queue;
}
@@ -775,16 +770,18 @@ main_queue:
* unicast, dump failed record sends to the retry queue; if
* sk == NULL due to previous failures we will just do the
* multicast send and move the record to the retry queue */
- kauditd_send_queue(sk, portid, &audit_queue, 1,
- kauditd_send_multicast_skb,
- kauditd_retry_skb);
+ rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
+ kauditd_send_multicast_skb,
+ kauditd_retry_skb);
+ if (sk == NULL || rc < 0)
+ auditd_reset();
+ sk = NULL;
/* drop our netns reference, no auditd sends past this line */
if (net) {
put_net(net);
net = NULL;
}
- sk = NULL;
/* we have processed all the queues so wake everyone */
wake_up(&audit_backlog_wait);
diff --git a/kernel/audit.h b/kernel/audit.h
index 0f1cf6d1878a..0d87f8ab8778 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -333,13 +333,7 @@ extern u32 audit_sig_sid;
extern int audit_filter(int msgtype, unsigned int listtype);
#ifdef CONFIG_AUDITSYSCALL
-extern int __audit_signal_info(int sig, struct task_struct *t);
-static inline int audit_signal_info(int sig, struct task_struct *t)
-{
- if (auditd_test_task(t) || (audit_signals && !audit_dummy_context()))
- return __audit_signal_info(sig, t);
- return 0;
-}
+extern int audit_signal_info(int sig, struct task_struct *t);
extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
extern struct list_head *audit_killed_trees(void);
#else
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e59ffc7fc522..1c2333155893 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2249,26 +2249,27 @@ void __audit_ptrace(struct task_struct *t)
* If the audit subsystem is being terminated, record the task (pid)
* and uid that is doing that.
*/
-int __audit_signal_info(int sig, struct task_struct *t)
+int audit_signal_info(int sig, struct task_struct *t)
{
struct audit_aux_data_pids *axp;
struct task_struct *tsk = current;
struct audit_context *ctx = tsk->audit_context;
kuid_t uid = current_uid(), t_uid = task_uid(t);
- if (auditd_test_task(t)) {
- if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
- audit_sig_pid = task_tgid_nr(tsk);
- if (uid_valid(tsk->loginuid))
- audit_sig_uid = tsk->loginuid;
- else
- audit_sig_uid = uid;
- security_task_getsecid(tsk, &audit_sig_sid);
- }
- if (!audit_signals || audit_dummy_context())
- return 0;
+ if (auditd_test_task(t) &&
+ (sig == SIGTERM || sig == SIGHUP ||
+ sig == SIGUSR1 || sig == SIGUSR2)) {
+ audit_sig_pid = task_tgid_nr(tsk);
+ if (uid_valid(tsk->loginuid))
+ audit_sig_uid = tsk->loginuid;
+ else
+ audit_sig_uid = uid;
+ security_task_getsecid(tsk, &audit_sig_sid);
}
+ if (!audit_signals || audit_dummy_context())
+ return 0;
+
/* optimize the common case by putting first signal recipient directly
* in audit_context */
if (!ctx->target_pid) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f45827e205d3..b4f1cb0c5ac7 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1162,12 +1162,12 @@ out:
LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
off = IMM;
load_word:
- /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
- * only appearing in the programs where ctx ==
- * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
- * == BPF_R6, bpf_convert_filter() saves it in BPF_R6,
- * internal BPF verifier will check that BPF_R6 ==
- * ctx.
+ /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
+ * appearing in the programs where ctx == skb
+ * (see may_access_skb() in the verifier). All programs
+ * keep 'ctx' in regs[BPF_REG_CTX] == BPF_R6,
+ * bpf_convert_filter() saves it in BPF_R6, internal BPF
+ * verifier will check that BPF_R6 == ctx.
*
* BPF_ABS and BPF_IND are wrappers of function calls,
* so they scratch BPF_R1-BPF_R5 registers, preserve
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7af0dcc5d755..821f9e807de5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -617,6 +617,14 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
+ /* If we tail call into other programs, we
+ * cannot make any assumptions since they
+ * can be replaced dynamically during runtime
+ * in the program array.
+ */
+ prog->cb_access = 1;
+ prog->xdp_adjust_head = 1;
+
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
* interpeter for every normal call
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 796b68d00119..a834068a400e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -765,38 +765,56 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
}
}
-static int check_ptr_alignment(struct bpf_verifier_env *env,
- struct bpf_reg_state *reg, int off, int size)
+static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
{
- if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
- if (off % size != 0) {
- verbose("misaligned access off %d size %d\n",
- off, size);
- return -EACCES;
- } else {
- return 0;
- }
- }
-
- if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
- /* misaligned access to packet is ok on x86,arm,arm64 */
- return 0;
-
if (reg->id && size != 1) {
- verbose("Unknown packet alignment. Only byte-sized access allowed\n");
+ verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
return -EACCES;
}
/* skb->data is NET_IP_ALIGN-ed */
- if (reg->type == PTR_TO_PACKET &&
- (NET_IP_ALIGN + reg->off + off) % size != 0) {
+ if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
verbose("misaligned packet access off %d+%d+%d size %d\n",
NET_IP_ALIGN, reg->off, off, size);
return -EACCES;
}
+
return 0;
}
+static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
+ int size)
+{
+ if (size != 1) {
+ verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
+ return -EACCES;
+ }
+
+ return 0;
+}
+
+static int check_ptr_alignment(const struct bpf_reg_state *reg,
+ int off, int size)
+{
+ switch (reg->type) {
+ case PTR_TO_PACKET:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_pkt_ptr_alignment(reg, off, size);
+ case PTR_TO_MAP_VALUE_ADJ:
+ return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
+ check_val_ptr_alignment(reg, size);
+ default:
+ if (off % size != 0) {
+ verbose("misaligned access off %d size %d\n",
+ off, size);
+ return -EACCES;
+ }
+
+ return 0;
+ }
+}
+
/* check whether memory at (regno + off) is accessible for t = (read | write)
* if t==write, value_regno is a register which value is stored into memory
* if t==read, value_regno is a register which will receive the value from memory
@@ -818,7 +836,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
if (size < 0)
return size;
- err = check_ptr_alignment(env, reg, off, size);
+ err = check_ptr_alignment(reg, off, size);
if (err)
return err;
@@ -1925,6 +1943,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
* register as unknown.
*/
if (env->allow_ptr_leaks &&
+ BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
(dst_reg->type == PTR_TO_MAP_VALUE ||
dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
@@ -1973,14 +1992,15 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
for (i = 0; i < MAX_BPF_REG; i++)
if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
- regs[i].range = dst_reg->off;
+ /* keep the maximum range already checked */
+ regs[i].range = max(regs[i].range, dst_reg->off);
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
- reg->range = dst_reg->off;
+ reg->range = max(reg->range, dst_reg->off);
}
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 48851327a15e..687f5e0194ef 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2425,11 +2425,12 @@ ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
tsk = tsk->group_leader;
/*
- * Workqueue threads may acquire PF_NO_SETAFFINITY and become
- * trapped in a cpuset, or RT worker may be born in a cgroup
- * with no rt_runtime allocated. Just say no.
+ * kthreads may acquire PF_NO_SETAFFINITY during initialization.
+ * If userland migrates such a kthread to a non-root cgroup, it can
+ * become trapped in a cpuset, or RT kthread may be born in a
+ * cgroup with no rt_runtime allocated. Just say no.
*/
- if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
+ if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) {
ret = -EINVAL;
goto out_unlock_rcu;
}
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 4544b115f5eb..e2d356dd7581 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -59,7 +59,7 @@ static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
struct cpumask *
irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
{
- int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec;
+ int n, nodes, cpus_per_vec, extra_vecs, curvec;
int affv = nvecs - affd->pre_vectors - affd->post_vectors;
int last_affv = affv + affd->pre_vectors;
nodemask_t nodemsk = NODE_MASK_NONE;
@@ -94,19 +94,21 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
goto done;
}
- /* Spread the vectors per node */
- vecs_per_node = affv / nodes;
- /* Account for rounding errors */
- extra_vecs = affv - (nodes * vecs_per_node);
-
for_each_node_mask(n, nodemsk) {
- int ncpus, v, vecs_to_assign = vecs_per_node;
+ int ncpus, v, vecs_to_assign, vecs_per_node;
+
+ /* Spread the vectors per node */
+ vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
/* Get the cpus on this node which are in the mask */
cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
/* Calculate the number of cpus per vector */
ncpus = cpumask_weight(nmsk);
+ vecs_to_assign = min(vecs_per_node, ncpus);
+
+ /* Account for rounding errors */
+ extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign);
for (v = 0; curvec < last_affv && v < vecs_to_assign;
curvec++, v++) {
@@ -115,14 +117,14 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
/* Account for extra vectors to compensate rounding errors */
if (extra_vecs) {
cpus_per_vec++;
- if (!--extra_vecs)
- vecs_per_node++;
+ --extra_vecs;
}
irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
}
if (curvec >= last_affv)
break;
+ --nodes;
}
done:
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 2f26adea0f84..26db528c1d88 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -20,6 +20,7 @@
#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
+#include <linux/cgroup.h>
#include <trace/events/sched.h>
static DEFINE_SPINLOCK(kthread_create_lock);
@@ -225,6 +226,7 @@ static int kthread(void *_create)
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
+ cgroup_kthread_ready();
__kthread_parkme(self);
ret = threadfn(data);
}
@@ -538,6 +540,7 @@ int kthreadd(void *unused)
set_mems_allowed(node_states[N_MEMORY]);
current->flags |= PF_NOFREEZE;
+ cgroup_init_kthreadd();
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index c2b88490d857..c08fbd2f5ba9 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -46,13 +46,13 @@ enum {
(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
/*
- * CONFIG_PROVE_LOCKING_SMALL is defined for sparc. Sparc requires .text,
+ * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
* .data and .bss to fit in required 32MB limit for the kernel. With
- * PROVE_LOCKING we could go over this limit and cause system boot-up problems.
+ * CONFIG_LOCKDEP we could go over this limit and cause system boot-up problems.
* So, reduce the static allocations for lockdeps related structures so that
* everything fits in current required size limit.
*/
-#ifdef CONFIG_PROVE_LOCKING_SMALL
+#ifdef CONFIG_LOCKDEP_SMALL
/*
* MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
* we track.
diff --git a/kernel/padata.c b/kernel/padata.c
index 05316c9f32da..3202aa17492c 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -186,19 +186,20 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
reorder = &next_queue->reorder;
+ spin_lock(&reorder->lock);
if (!list_empty(&reorder->list)) {
padata = list_entry(reorder->list.next,
struct padata_priv, list);
- spin_lock(&reorder->lock);
list_del_init(&padata->list);
atomic_dec(&pd->reorder_objects);
- spin_unlock(&reorder->lock);
pd->processed++;
+ spin_unlock(&reorder->lock);
goto out;
}
+ spin_unlock(&reorder->lock);
if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
padata = ERR_PTR(-ENODATA);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 0af928712174..266ddcc1d8bb 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -184,11 +184,17 @@ static void ptrace_unfreeze_traced(struct task_struct *task)
WARN_ON(!task->ptrace || task->parent != current);
+ /*
+ * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely.
+ * Recheck state under the lock to close this race.
+ */
spin_lock_irq(&task->sighand->siglock);
- if (__fatal_signal_pending(task))
- wake_up_state(task, __TASK_TRACED);
- else
- task->state = TASK_TRACED;
+ if (task->state == __TASK_TRACED) {
+ if (__fatal_signal_pending(task))
+ wake_up_state(task, __TASK_TRACED);
+ else
+ task->state = TASK_TRACED;
+ }
spin_unlock_irq(&task->sighand->siglock);
}
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index a08795e21628..00a45c45beca 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
static int __sched_clock_stable_early = 1;
/*
- * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset
+ * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
*/
-static __read_mostly u64 raw_offset;
-static __read_mostly u64 gtod_offset;
+__read_mostly u64 __sched_clock_offset;
+static __read_mostly u64 __gtod_offset;
struct sched_clock_data {
u64 tick_raw;
@@ -131,17 +131,24 @@ static void __set_sched_clock_stable(void)
/*
* Attempt to make the (initial) unstable->stable transition continuous.
*/
- raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw);
+ __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
- scd->tick_gtod, gtod_offset,
- scd->tick_raw, raw_offset);
+ scd->tick_gtod, __gtod_offset,
+ scd->tick_raw, __sched_clock_offset);
static_branch_enable(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
}
-static void __clear_sched_clock_stable(struct work_struct *work)
+static void __sched_clock_work(struct work_struct *work)
+{
+ static_branch_disable(&__sched_clock_stable);
+}
+
+static DECLARE_WORK(sched_clock_work, __sched_clock_work);
+
+static void __clear_sched_clock_stable(void)
{
struct sched_clock_data *scd = this_scd();
@@ -154,17 +161,17 @@ static void __clear_sched_clock_stable(struct work_struct *work)
*
* Still do what we can.
*/
- gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod);
+ __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
- scd->tick_gtod, gtod_offset,
- scd->tick_raw, raw_offset);
+ scd->tick_gtod, __gtod_offset,
+ scd->tick_raw, __sched_clock_offset);
- static_branch_disable(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
-}
-static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
+ if (sched_clock_stable())
+ schedule_work(&sched_clock_work);
+}
void clear_sched_clock_stable(void)
{
@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void)
smp_mb(); /* matches sched_clock_init_late() */
if (sched_clock_running == 2)
- schedule_work(&sched_clock_work);
+ __clear_sched_clock_stable();
}
void sched_clock_init_late(void)
@@ -214,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y)
*/
static u64 sched_clock_local(struct sched_clock_data *scd)
{
- u64 now, clock, old_clock, min_clock, max_clock;
+ u64 now, clock, old_clock, min_clock, max_clock, gtod;
s64 delta;
again:
@@ -231,9 +238,10 @@ again:
* scd->tick_gtod + TICK_NSEC);
*/
- clock = scd->tick_gtod + gtod_offset + delta;
- min_clock = wrap_max(scd->tick_gtod, old_clock);
- max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
+ gtod = scd->tick_gtod + __gtod_offset;
+ clock = gtod + delta;
+ min_clock = wrap_max(gtod, old_clock);
+ max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
clock = wrap_max(clock, min_clock);
clock = wrap_min(clock, max_clock);
@@ -317,7 +325,7 @@ u64 sched_clock_cpu(int cpu)
u64 clock;
if (sched_clock_stable())
- return sched_clock() + raw_offset;
+ return sched_clock() + __sched_clock_offset;
if (unlikely(!sched_clock_running))
return 0ull;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index f3778e2b46c8..aea3135c5d90 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -34,6 +34,18 @@ void disable_sched_clock_irqtime(void)
sched_clock_irqtime = 0;
}
+static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
+ enum cpu_usage_stat idx)
+{
+ u64 *cpustat = kcpustat_this_cpu->cpustat;
+
+ u64_stats_update_begin(&irqtime->sync);
+ cpustat[idx] += delta;
+ irqtime->total += delta;
+ irqtime->tick_delta += delta;
+ u64_stats_update_end(&irqtime->sync);
+}
+
/*
* Called before incrementing preempt_count on {soft,}irq_enter
* and before decrementing preempt_count on {soft,}irq_exit.
@@ -41,7 +53,6 @@ void disable_sched_clock_irqtime(void)
void irqtime_account_irq(struct task_struct *curr)
{
struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
- u64 *cpustat = kcpustat_this_cpu->cpustat;
s64 delta;
int cpu;
@@ -52,22 +63,16 @@ void irqtime_account_irq(struct task_struct *curr)
delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
irqtime->irq_start_time += delta;
- u64_stats_update_begin(&irqtime->sync);
/*
* We do not account for softirq time from ksoftirqd here.
* We want to continue accounting softirq time to ksoftirqd thread
* in that case, so as not to confuse scheduler with a special task
* that do not consume any time, but still wants to run.
*/
- if (hardirq_count()) {
- cpustat[CPUTIME_IRQ] += delta;
- irqtime->tick_delta += delta;
- } else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) {
- cpustat[CPUTIME_SOFTIRQ] += delta;
- irqtime->tick_delta += delta;
- }
-
- u64_stats_update_end(&irqtime->sync);
+ if (hardirq_count())
+ irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
+ else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
+ irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
}
EXPORT_SYMBOL_GPL(irqtime_account_irq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 5cbf92214ad8..767aab3505a8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1869,6 +1869,7 @@ static inline void nohz_balance_exit_idle(unsigned int cpu) { }
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
struct irqtime {
+ u64 total;
u64 tick_delta;
u64 irq_start_time;
struct u64_stats_sync sync;
@@ -1876,16 +1877,20 @@ struct irqtime {
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
+/*
+ * Returns the irqtime minus the softirq time computed by ksoftirqd.
+ * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
+ * and never move forward.
+ */
static inline u64 irq_time_read(int cpu)
{
struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
- u64 *cpustat = kcpustat_cpu(cpu).cpustat;
unsigned int seq;
u64 total;
do {
seq = __u64_stats_fetch_begin(&irqtime->sync);
- total = cpustat[CPUTIME_SOFTIRQ] + cpustat[CPUTIME_IRQ];
+ total = irqtime->total;
} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
return total;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index acf0a5a06da7..8c8714fcb53c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2133,9 +2133,12 @@ static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
if (write) {
if (*negp)
return -EINVAL;
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
+ *negp = false;
*lvalp = (unsigned long)val;
}
return 0;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b9691ee8f6c1..dd3e91d68dc7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3755,23 +3755,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash)
ftrace_probe_registered = 1;
}
-static void __disable_ftrace_function_probe(void)
+static bool __disable_ftrace_function_probe(void)
{
int i;
if (!ftrace_probe_registered)
- return;
+ return false;
for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
struct hlist_head *hhd = &ftrace_func_hash[i];
if (hhd->first)
- return;
+ return false;
}
/* no more funcs left */
ftrace_shutdown(&trace_probe_ops, 0);
ftrace_probe_registered = 0;
+ return true;
}
@@ -3901,6 +3902,7 @@ static void
__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
void *data, int flags)
{
+ struct ftrace_ops_hash old_hash_ops;
struct ftrace_func_entry *rec_entry;
struct ftrace_func_probe *entry;
struct ftrace_func_probe *p;
@@ -3912,6 +3914,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
struct hlist_node *tmp;
char str[KSYM_SYMBOL_LEN];
int i, ret;
+ bool disabled;
if (glob && (strcmp(glob, "*") == 0 || !strlen(glob)))
func_g.search = NULL;
@@ -3930,6 +3933,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
mutex_lock(&trace_probe_ops.func_hash->regex_lock);
+ old_hash_ops.filter_hash = old_hash;
+ /* Probes only have filters */
+ old_hash_ops.notrace_hash = NULL;
+
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
if (!hash)
/* Hmm, should report this somehow */
@@ -3967,12 +3974,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
}
}
mutex_lock(&ftrace_lock);
- __disable_ftrace_function_probe();
+ disabled = __disable_ftrace_function_probe();
/*
* Remove after the disable is called. Otherwise, if the last
* probe is removed, a null hash means *all enabled*.
*/
ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
+
+ /* still need to update the function call sites */
+ if (ftrace_enabled && !disabled)
+ ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS,
+ &old_hash_ops);
synchronize_sched();
if (!ret)
free_ftrace_hash_rcu(old_hash);
@@ -5554,6 +5566,15 @@ static void clear_ftrace_pids(struct trace_array *tr)
trace_free_pid_list(pid_list);
}
+void ftrace_clear_pids(struct trace_array *tr)
+{
+ mutex_lock(&ftrace_lock);
+
+ clear_ftrace_pids(tr);
+
+ mutex_unlock(&ftrace_lock);
+}
+
static void ftrace_pid_reset(struct trace_array *tr)
{
mutex_lock(&ftrace_lock);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 96fc3c043ad6..ca47a4fa2986 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3405,11 +3405,23 @@ EXPORT_SYMBOL_GPL(ring_buffer_iter_reset);
int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
{
struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *reader;
+ struct buffer_page *head_page;
+ struct buffer_page *commit_page;
+ unsigned commit;
cpu_buffer = iter->cpu_buffer;
- return iter->head_page == cpu_buffer->commit_page &&
- iter->head == rb_commit_index(cpu_buffer);
+ /* Remember, trace recording is off when iterator is in use */
+ reader = cpu_buffer->reader_page;
+ head_page = cpu_buffer->head_page;
+ commit_page = cpu_buffer->commit_page;
+ commit = rb_page_commit(commit_page);
+
+ return ((iter->head_page == commit_page && iter->head == commit) ||
+ (iter->head_page == reader && commit_page == head_page &&
+ head_page->read == commit &&
+ iter->head == rb_page_commit(cpu_buffer->reader_page)));
}
EXPORT_SYMBOL_GPL(ring_buffer_iter_empty);
@@ -4826,9 +4838,9 @@ static __init int test_ringbuffer(void)
rb_data[cpu].cnt = cpu;
rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
"rbtester/%d", cpu);
- if (WARN_ON(!rb_threads[cpu])) {
+ if (WARN_ON(IS_ERR(rb_threads[cpu]))) {
pr_cont("FAILED\n");
- ret = -1;
+ ret = PTR_ERR(rb_threads[cpu]);
goto out_free;
}
@@ -4838,9 +4850,9 @@ static __init int test_ringbuffer(void)
/* Now create the rb hammer! */
rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
- if (WARN_ON(!rb_hammer)) {
+ if (WARN_ON(IS_ERR(rb_hammer))) {
pr_cont("FAILED\n");
- ret = -1;
+ ret = PTR_ERR(rb_hammer);
goto out_free;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index f35109514a01..0ad75e9698f6 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6733,11 +6733,13 @@ ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
return ret;
out_reg:
- ret = register_ftrace_function_probe(glob, ops, count);
+ ret = alloc_snapshot(&global_trace);
+ if (ret < 0)
+ goto out;
- if (ret >= 0)
- alloc_snapshot(&global_trace);
+ ret = register_ftrace_function_probe(glob, ops, count);
+ out:
return ret < 0 ? ret : 0;
}
@@ -7402,6 +7404,7 @@ static int instance_rmdir(const char *name)
tracing_set_nop(tr);
event_trace_del_tracer(tr);
+ ftrace_clear_pids(tr);
ftrace_destroy_function_files(tr);
tracefs_remove_recursive(tr->dir);
free_trace_buffers(tr);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ae1cce91fead..d19d52d600d6 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -896,6 +896,7 @@ int using_ftrace_ops_list_func(void);
void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
void ftrace_init_tracefs_toplevel(struct trace_array *tr,
struct dentry *d_tracer);
+void ftrace_clear_pids(struct trace_array *tr);
#else
static inline int ftrace_trace_task(struct trace_array *tr)
{
@@ -914,6 +915,7 @@ ftrace_init_global_array_ops(struct trace_array *tr) { }
static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { }
+static inline void ftrace_clear_pids(struct trace_array *tr) { }
/* ftace_func_t type is not defined, use macro instead of static inline */
#define ftrace_init_array_ops(tr, func) do { } while (0)
#endif /* CONFIG_FUNCTION_TRACER */