diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2019-07-03 10:50:21 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2019-07-03 10:50:21 +0200 |
commit | 341924049558e5f7c1a148a2c461a417933d35d9 (patch) | |
tree | 1340b4f2131737ef4be63f18ee74cd73ad12cd3e /kernel | |
parent | 516337048fa40496ae5ca9863c367ec991a44d9a (diff) | |
parent | 4a8f81b8c053aad250ee247b219904ada72df9a4 (diff) |
Merge branch 'timers/vdso' into timers/core
so the hyper-v clocksource update can be applied.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/cgroup.c | 5 | ||||
-rw-r--r-- | kernel/bpf/core.c | 1 | ||||
-rw-r--r-- | kernel/bpf/devmap.c | 9 | ||||
-rw-r--r-- | kernel/bpf/inode.c | 5 | ||||
-rw-r--r-- | kernel/bpf/lpm_trie.c | 14 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 8 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 12 | ||||
-rw-r--r-- | kernel/cgroup/pids.c | 5 | ||||
-rw-r--r-- | kernel/cgroup/rdma.c | 5 | ||||
-rw-r--r-- | kernel/compat.c | 5 | ||||
-rw-r--r-- | kernel/crash_core.c | 4 | ||||
-rw-r--r-- | kernel/kexec.c | 4 | ||||
-rw-r--r-- | kernel/kexec_core.c | 4 | ||||
-rw-r--r-- | kernel/kexec_file.c | 4 | ||||
-rw-r--r-- | kernel/power/poweroff.c | 3 | ||||
-rw-r--r-- | kernel/sched/debug.c | 5 | ||||
-rw-r--r-- | kernel/sysctl.c | 44 | ||||
-rw-r--r-- | kernel/time/Makefile | 1 | ||||
-rw-r--r-- | kernel/time/vsyscall.c | 133 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 100 |
20 files changed, 283 insertions, 88 deletions
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fcde0f7b2585..92a7d0cf8d13 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Functions to manage eBPF programs attached to cgroups * * Copyright (c) 2016 Daniel Mack - * - * This file is subject to the terms and conditions of version 2 of the GNU - * General Public License. See the file COPYING in the main directory of the - * Linux distribution for more details. */ #include <linux/kernel.h> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7c473f208a10..080e2bb644cc 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2097,7 +2097,6 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); -int sysctl_bpf_stats_enabled __read_mostly; /* All definitions of tracepoints related to BPF. */ #define CREATE_TRACE_POINTS diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 15dbc15c5b0c..cd8297b3bdb9 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -178,6 +178,7 @@ static void dev_map_free(struct bpf_map *map) if (!dev) continue; + free_percpu(dev->bulkq); dev_put(dev->dev); kfree(dev); } @@ -273,6 +274,7 @@ void __dev_map_flush(struct bpf_map *map) unsigned long *bitmap = this_cpu_ptr(dtab->flush_needed); u32 bit; + rcu_read_lock(); for_each_set_bit(bit, bitmap, map->max_entries) { struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]); struct xdp_bulk_queue *bq; @@ -283,11 +285,12 @@ void __dev_map_flush(struct bpf_map *map) if (unlikely(!dev)) continue; - __clear_bit(bit, bitmap); - bq = this_cpu_ptr(dev->bulkq); bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, true); + + __clear_bit(bit, bitmap); } + rcu_read_unlock(); } /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or @@ -380,6 +383,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) int cpu; + rcu_read_lock(); for_each_online_cpu(cpu) { bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu); __clear_bit(dev->bit, bitmap); @@ -387,6 +391,7 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev) bq = per_cpu_ptr(dev->bulkq, cpu); bq_xmit_all(dev, bq, XDP_XMIT_FLUSH, false); } + rcu_read_unlock(); } } diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 84a80b02db99..cc0d0cf114e3 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Minimal file system backend for holding eBPF maps and programs, * used by bpf(2) object pinning. @@ -5,10 +6,6 @@ * Authors: * * Daniel Borkmann <daniel@iogearbox.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. */ #include <linux/init.h> diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index e61630c2e50b..57b59cca4db7 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Longest prefix match list implementation * * Copyright (c) 2016,2017 Daniel Mack * Copyright (c) 2016 David Herrmann - * - * This file is subject to the terms and conditions of version 2 of the GNU - * General Public License. See the file COPYING in the main directory of the - * Linux distribution for more details. */ #include <linux/bpf.h> @@ -716,9 +713,14 @@ find_leftmost: * have exact two children, so this function will never return NULL. */ for (node = search_root; node;) { - if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) + if (node->flags & LPM_TREE_NODE_FLAG_IM) { + node = rcu_dereference(node->child[0]); + } else { next_node = node; - node = rcu_dereference(node->child[0]); + node = rcu_dereference(node->child[0]); + if (!node) + node = rcu_dereference(next_node->child[1]); + } } do_copy: next_key->prefixlen = next_node->prefixlen; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 96c8928b468b..5b30f8baaf02 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1573,6 +1573,8 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: return 0; default: return -EINVAL; @@ -1867,6 +1869,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -1952,6 +1956,8 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: ptype = BPF_PROG_TYPE_CGROUP_SOCK_ADDR; break; case BPF_CGROUP_SOCK_OPS: @@ -2003,6 +2009,8 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_INET6_CONNECT: case BPF_CGROUP_UDP4_SENDMSG: case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: case BPF_CGROUP_SOCK_OPS: case BPF_CGROUP_DEVICE: case BPF_CGROUP_SYSCTL: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d15cc4fafa89..a5c369e60343 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5353,9 +5353,12 @@ static int check_return_code(struct bpf_verifier_env *env) struct tnum range = tnum_range(0, 1); switch (env->prog->type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || + env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG) + range = tnum_range(1, 1); case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: @@ -5372,16 +5375,17 @@ static int check_return_code(struct bpf_verifier_env *env) } if (!tnum_in(range, reg->var_off)) { + char tn_buf[48]; + verbose(env, "At program exit the register R0 "); if (!tnum_is_unknown(reg->var_off)) { - char tn_buf[48]; - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, "has value %s", tn_buf); } else { verbose(env, "has unknown scalar value"); } - verbose(env, " should have been 0 or 1\n"); + tnum_strn(tn_buf, sizeof(tn_buf), range); + verbose(env, " should have been in %s\n", tn_buf); return -EINVAL; } return 0; diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index c9960baaa14f..8e513a573fe9 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Process number limiting controller for cgroups. * @@ -25,10 +26,6 @@ * a superset of parent/child/pids.current. * * Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com> - * - * This file is subject to the terms and conditions of version 2 of the GNU - * General Public License. See the file COPYING in the main directory of the - * Linux distribution for more details. */ #include <linux/kernel.h> diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c index 1d75ae7f1cb7..ae042c347c64 100644 --- a/kernel/cgroup/rdma.c +++ b/kernel/cgroup/rdma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * RDMA resource limiting controller for cgroups. * @@ -5,10 +6,6 @@ * additional RDMA resources after a certain limit is reached. * * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> - * - * This file is subject to the terms and conditions of version 2 of the GNU - * General Public License. See the file COPYING in the main directory of the - * Linux distribution for more details. */ #include <linux/bitops.h> diff --git a/kernel/compat.c b/kernel/compat.c index b5f7063c0db6..a2bc1d6ceb57 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/compat.c * @@ -5,10 +6,6 @@ * on 64 bit kernels. * * Copyright (C) 2002-2003 Stephen Rothwell, IBM Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include <linux/linkage.h> diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 093c9f917ed0..9f1557b98468 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * crash.c - kernel crash support code. * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #include <linux/crash_core.h> diff --git a/kernel/kexec.c b/kernel/kexec.c index 68559808fdfa..1b018f1a6e0d 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * kexec.c - kexec_load system call * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index fd5c95ff9251..d5870723b8ad 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -1,9 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * kexec.c - kexec system call core code. * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 072b6ee55e3f..ef7b951a8087 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -1,12 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * kexec: kexec_file_load system call * * Copyright (C) 2014 Red Hat Inc. * Authors: * Vivek Goyal <vgoyal@redhat.com> - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c index 7ef6866b521d..6d475281c730 100644 --- a/kernel/power/poweroff.c +++ b/kernel/power/poweroff.c @@ -1,7 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * poweroff.c - sysrq handler to gracefully power down machine. - * - * This file is released under the GPL v2 */ #include <linux/kernel.h> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 678bfb9bd87f..14c6a8716ba1 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1,13 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * kernel/sched/debug.c * * Print the CFS rbtree and other debugging details * * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. */ #include "sched.h" diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7d1008be6173..1beca96fb625 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -230,11 +230,6 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #endif static int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -#ifdef CONFIG_BPF_SYSCALL -static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -#endif #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses its own private copy */ @@ -1253,12 +1248,10 @@ static struct ctl_table kern_table[] = { }, { .procname = "bpf_stats_enabled", - .data = &sysctl_bpf_stats_enabled, - .maxlen = sizeof(sysctl_bpf_stats_enabled), + .data = &bpf_stats_enabled_key.key, + .maxlen = sizeof(bpf_stats_enabled_key), .mode = 0644, - .proc_handler = proc_dointvec_minmax_bpf_stats, - .extra1 = &zero, - .extra2 = &one, + .proc_handler = proc_do_static_key, }, #endif #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) @@ -3374,26 +3367,35 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, #endif /* CONFIG_PROC_SYSCTL */ -#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) -static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +#if defined(CONFIG_SYSCTL) +int proc_do_static_key(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - int ret, bpf_stats = *(int *)table->data; - struct ctl_table tmp = *table; + struct static_key *key = (struct static_key *)table->data; + static DEFINE_MUTEX(static_key_mutex); + int val, ret; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + .extra1 = &zero, + .extra2 = &one, + }; if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; - tmp.data = &bpf_stats; + mutex_lock(&static_key_mutex); + val = static_key_enabled(key); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { - *(int *)table->data = bpf_stats; - if (bpf_stats) - static_branch_enable(&bpf_stats_enabled_key); + if (val) + static_key_enable(key); else - static_branch_disable(&bpf_stats_enabled_key); + static_key_disable(key); } + mutex_unlock(&static_key_mutex); return ret; } #endif diff --git a/kernel/time/Makefile b/kernel/time/Makefile index f1e46f338a9c..1867044800bb 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -16,5 +16,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o +obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c new file mode 100644 index 000000000000..a80893180826 --- /dev/null +++ b/kernel/time/vsyscall.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 ARM Ltd. + * + * Generic implementation of update_vsyscall and update_vsyscall_tz. + * + * Based on the x86 specific implementation. + */ + +#include <linux/hrtimer.h> +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> +#include <vdso/vsyscall.h> + +static inline void update_vdso_data(struct vdso_data *vdata, + struct timekeeper *tk) +{ + struct vdso_timestamp *vdso_ts; + u64 nsec; + + vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; + vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; + vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult; + vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift; + vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; + vdata[CS_RAW].mask = tk->tkr_raw.mask; + vdata[CS_RAW].mult = tk->tkr_raw.mult; + vdata[CS_RAW].shift = tk->tkr_raw.shift; + + /* CLOCK_REALTIME */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; + vdso_ts->sec = tk->xtime_sec; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; + + /* CLOCK_MONOTONIC */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + + nsec = tk->tkr_mono.xtime_nsec; + nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); + vdso_ts->sec++; + } + vdso_ts->nsec = nsec; + + /* CLOCK_MONOTONIC_RAW */ + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; + vdso_ts->sec = tk->raw_sec; + vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + + /* CLOCK_BOOTTIME */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsec = tk->tkr_mono.xtime_nsec; + nsec += ((u64)(tk->wall_to_monotonic.tv_nsec + + ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift); + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); + vdso_ts->sec++; + } + vdso_ts->nsec = nsec; + + /* CLOCK_TAI */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; + vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; + + /* + * Read without the seqlock held by clock_getres(). + * Note: No need to have a second copy. + */ + WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct vdso_data *vdata = __arch_get_k_vdso_data(); + struct vdso_timestamp *vdso_ts; + u64 nsec; + + if (__arch_update_vdso_data()) { + /* + * Some architectures might want to skip the update of the + * data page. + */ + return; + } + + /* copy vsyscall data */ + vdso_write_begin(vdata); + + vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk); + vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk); + + /* CLOCK_REALTIME_COARSE */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; + vdso_ts->sec = tk->xtime_sec; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + + /* CLOCK_MONOTONIC_COARSE */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec = nsec + tk->wall_to_monotonic.tv_nsec; + while (nsec >= NSEC_PER_SEC) { + nsec = nsec - NSEC_PER_SEC; + vdso_ts->sec++; + } + vdso_ts->nsec = nsec; + + if (__arch_use_vsyscall(vdata)) + update_vdso_data(vdata, tk); + + __arch_update_vsyscall(vdata, tk); + + vdso_write_end(vdata); + + __arch_sync_vdso_data(vdata); +} + +void update_vsyscall_tz(void) +{ + struct vdso_data *vdata = __arch_get_k_vdso_data(); + + if (__arch_use_vsyscall(vdata)) { + vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; + vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; + } + + __arch_sync_vdso_data(vdata); +} diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f92d6ad5e080..1c9a4745e596 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -410,8 +410,6 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { .arg4_type = ARG_CONST_SIZE, }; -static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd); - static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, u64 flags, struct perf_sample_data *sd) @@ -442,24 +440,50 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return perf_event_output(event, sd, regs); } +/* + * Support executing tracepoints in normal, irq, and nmi context that each call + * bpf_perf_event_output + */ +struct bpf_trace_sample_data { + struct perf_sample_data sds[3]; +}; + +static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_trace_sds); +static DEFINE_PER_CPU(int, bpf_trace_nest_level); BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd); + struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds); + int nest_level = this_cpu_inc_return(bpf_trace_nest_level); struct perf_raw_record raw = { .frag = { .size = size, .data = data, }, }; + struct perf_sample_data *sd; + int err; - if (unlikely(flags & ~(BPF_F_INDEX_MASK))) - return -EINVAL; + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) { + err = -EBUSY; + goto out; + } + + sd = &sds->sds[nest_level - 1]; + + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) { + err = -EINVAL; + goto out; + } perf_sample_data_init(sd, 0, 0); sd->raw = &raw; - return __bpf_perf_event_output(regs, map, flags, sd); + err = __bpf_perf_event_output(regs, map, flags, sd); + +out: + this_cpu_dec(bpf_trace_nest_level); + return err; } static const struct bpf_func_proto bpf_perf_event_output_proto = { @@ -822,16 +846,48 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) /* * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp * to avoid potential recursive reuse issue when/if tracepoints are added - * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack + * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack. + * + * Since raw tracepoints run despite bpf_prog_active, support concurrent usage + * in normal, irq, and nmi context. */ -static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs); +struct bpf_raw_tp_regs { + struct pt_regs regs[3]; +}; +static DEFINE_PER_CPU(struct bpf_raw_tp_regs, bpf_raw_tp_regs); +static DEFINE_PER_CPU(int, bpf_raw_tp_nest_level); +static struct pt_regs *get_bpf_raw_tp_regs(void) +{ + struct bpf_raw_tp_regs *tp_regs = this_cpu_ptr(&bpf_raw_tp_regs); + int nest_level = this_cpu_inc_return(bpf_raw_tp_nest_level); + + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(tp_regs->regs))) { + this_cpu_dec(bpf_raw_tp_nest_level); + return ERR_PTR(-EBUSY); + } + + return &tp_regs->regs[nest_level - 1]; +} + +static void put_bpf_raw_tp_regs(void) +{ + this_cpu_dec(bpf_raw_tp_nest_level); +} + BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags, void *, data, u64, size) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return ____bpf_perf_event_output(regs, map, flags, data, size); + ret = ____bpf_perf_event_output(regs, map, flags, data, size); + + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { @@ -848,12 +904,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */ - return bpf_get_stackid((unsigned long) regs, (unsigned long) map, - flags, 0, 0); + ret = bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { @@ -868,11 +930,17 @@ static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = { BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args, void *, buf, u32, size, u64, flags) { - struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs); + struct pt_regs *regs = get_bpf_raw_tp_regs(); + int ret; + + if (IS_ERR(regs)) + return PTR_ERR(regs); perf_fetch_caller_regs(regs); - return bpf_get_stack((unsigned long) regs, (unsigned long) buf, - (unsigned long) size, flags, 0); + ret = bpf_get_stack((unsigned long) regs, (unsigned long) buf, + (unsigned long) size, flags, 0); + put_bpf_raw_tp_regs(); + return ret; } static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = { |