summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorKP Singh <kpsingh@google.com>2020-11-06 10:37:40 +0000
committerAlexei Starovoitov <ast@kernel.org>2020-11-06 08:08:37 -0800
commit4cf1bc1f10452065a29d576fc5693fc4fab5b919 (patch)
tree142a7cf6f1baf696dc72b54d510a59823ca139eb /kernel
parent9e7a4d9831e836eb03dedab89902277ee94eb7a6 (diff)
bpf: Implement task local storage
Similar to bpf_local_storage for sockets and inodes add local storage for task_struct. The life-cycle of storage is managed with the life-cycle of the task_struct. i.e. the storage is destroyed along with the owning task with a callback to the bpf_task_storage_free from the task_free LSM hook. The BPF LSM allocates an __rcu pointer to the bpf_local_storage in the security blob which are now stackable and can co-exist with other LSMs. The userspace map operations can be done by using a pid fd as a key passed to the lookup, update and delete operations. Signed-off-by: KP Singh <kpsingh@google.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Song Liu <songliubraving@fb.com> Acked-by: Martin KaFai Lau <kafai@fb.com> Link: https://lore.kernel.org/bpf/20201106103747.2780972-3-kpsingh@chromium.org
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/bpf_lsm.c4
-rw-r--r--kernel/bpf/bpf_task_storage.c315
-rw-r--r--kernel/bpf/syscall.c3
-rw-r--r--kernel/bpf/verifier.c10
5 files changed, 332 insertions, 1 deletions
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index bdc8cd1b6767..f0b93ced5a7f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
+obj-${CONFIG_BPF_LSM} += bpf_task_storage.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index cd8a617f2109..e92c51bebb47 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -63,6 +63,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock:
return &bpf_spin_unlock_proto;
+ case BPF_FUNC_task_storage_get:
+ return &bpf_task_storage_get_proto;
+ case BPF_FUNC_task_storage_delete:
+ return &bpf_task_storage_delete_proto;
default:
return tracing_prog_func_proto(func_id, prog);
}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
new file mode 100644
index 000000000000..39a45fba4fb0
--- /dev/null
+++ b/kernel/bpf/bpf_task_storage.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Facebook
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/pid.h>
+#include <linux/sched.h>
+#include <linux/rculist.h>
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/bpf.h>
+#include <linux/bpf_local_storage.h>
+#include <linux/filter.h>
+#include <uapi/linux/btf.h>
+#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
+#include <linux/fdtable.h>
+
+DEFINE_BPF_STORAGE_CACHE(task_cache);
+
+static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
+{
+ struct task_struct *task = owner;
+ struct bpf_storage_blob *bsb;
+
+ bsb = bpf_task(task);
+ if (!bsb)
+ return NULL;
+ return &bsb->storage;
+}
+
+static struct bpf_local_storage_data *
+task_storage_lookup(struct task_struct *task, struct bpf_map *map,
+ bool cacheit_lockit)
+{
+ struct bpf_local_storage *task_storage;
+ struct bpf_local_storage_map *smap;
+ struct bpf_storage_blob *bsb;
+
+ bsb = bpf_task(task);
+ if (!bsb)
+ return NULL;
+
+ task_storage = rcu_dereference(bsb->storage);
+ if (!task_storage)
+ return NULL;
+
+ smap = (struct bpf_local_storage_map *)map;
+ return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit);
+}
+
+void bpf_task_storage_free(struct task_struct *task)
+{
+ struct bpf_local_storage_elem *selem;
+ struct bpf_local_storage *local_storage;
+ bool free_task_storage = false;
+ struct bpf_storage_blob *bsb;
+ struct hlist_node *n;
+
+ bsb = bpf_task(task);
+ if (!bsb)
+ return;
+
+ rcu_read_lock();
+
+ local_storage = rcu_dereference(bsb->storage);
+ if (!local_storage) {
+ rcu_read_unlock();
+ return;
+ }
+
+ /* Neither the bpf_prog nor the bpf-map's syscall
+ * could be modifying the local_storage->list now.
+ * Thus, no elem can be added-to or deleted-from the
+ * local_storage->list by the bpf_prog or by the bpf-map's syscall.
+ *
+ * It is racing with bpf_local_storage_map_free() alone
+ * when unlinking elem from the local_storage->list and
+ * the map's bucket->list.
+ */
+ raw_spin_lock_bh(&local_storage->lock);
+ hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
+ /* Always unlink from map before unlinking from
+ * local_storage.
+ */
+ bpf_selem_unlink_map(selem);
+ free_task_storage = bpf_selem_unlink_storage_nolock(
+ local_storage, selem, false);
+ }
+ raw_spin_unlock_bh(&local_storage->lock);
+ rcu_read_unlock();
+
+ /* free_task_storage should always be true as long as
+ * local_storage->list was non-empty.
+ */
+ if (free_task_storage)
+ kfree_rcu(local_storage, rcu);
+}
+
+static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_local_storage_data *sdata;
+ struct task_struct *task;
+ unsigned int f_flags;
+ struct pid *pid;
+ int fd, err;
+
+ fd = *(int *)key;
+ pid = pidfd_get_pid(fd, &f_flags);
+ if (IS_ERR(pid))
+ return ERR_CAST(pid);
+
+ /* We should be in an RCU read side critical section, it should be safe
+ * to call pid_task.
+ */
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ sdata = task_storage_lookup(task, map, true);
+ put_pid(pid);
+ return sdata ? sdata->data : NULL;
+out:
+ put_pid(pid);
+ return ERR_PTR(err);
+}
+
+static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
+ void *value, u64 map_flags)
+{
+ struct bpf_local_storage_data *sdata;
+ struct task_struct *task;
+ unsigned int f_flags;
+ struct pid *pid;
+ int fd, err;
+
+ fd = *(int *)key;
+ pid = pidfd_get_pid(fd, &f_flags);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+
+ /* We should be in an RCU read side critical section, it should be safe
+ * to call pid_task.
+ */
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ sdata = bpf_local_storage_update(
+ task, (struct bpf_local_storage_map *)map, value, map_flags);
+
+ err = PTR_ERR_OR_ZERO(sdata);
+out:
+ put_pid(pid);
+ return err;
+}
+
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
+{
+ struct bpf_local_storage_data *sdata;
+
+ sdata = task_storage_lookup(task, map, false);
+ if (!sdata)
+ return -ENOENT;
+
+ bpf_selem_unlink(SELEM(sdata));
+
+ return 0;
+}
+
+static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
+{
+ struct task_struct *task;
+ unsigned int f_flags;
+ struct pid *pid;
+ int fd, err;
+
+ fd = *(int *)key;
+ pid = pidfd_get_pid(fd, &f_flags);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+
+ /* We should be in an RCU read side critical section, it should be safe
+ * to call pid_task.
+ */
+ WARN_ON_ONCE(!rcu_read_lock_held());
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = task_storage_delete(task, map);
+out:
+ put_pid(pid);
+ return err;
+}
+
+BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags)
+{
+ struct bpf_local_storage_data *sdata;
+
+ if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
+ return (unsigned long)NULL;
+
+ /* explicitly check that the task_storage_ptr is not
+ * NULL as task_storage_lookup returns NULL in this case and
+ * bpf_local_storage_update expects the owner to have a
+ * valid storage pointer.
+ */
+ if (!task_storage_ptr(task))
+ return (unsigned long)NULL;
+
+ sdata = task_storage_lookup(task, map, true);
+ if (sdata)
+ return (unsigned long)sdata->data;
+
+ /* This helper must only be called from places where the lifetime of the task
+ * is guaranteed. Either by being refcounted or by being protected
+ * by an RCU read-side critical section.
+ */
+ if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
+ sdata = bpf_local_storage_update(
+ task, (struct bpf_local_storage_map *)map, value,
+ BPF_NOEXIST);
+ return IS_ERR(sdata) ? (unsigned long)NULL :
+ (unsigned long)sdata->data;
+ }
+
+ return (unsigned long)NULL;
+}
+
+BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
+ task)
+{
+ /* This helper must only be called from places where the lifetime of the task
+ * is guaranteed. Either by being refcounted or by being protected
+ * by an RCU read-side critical section.
+ */
+ return task_storage_delete(task, map);
+}
+
+static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ return -ENOTSUPP;
+}
+
+static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = bpf_local_storage_map_alloc(attr);
+ if (IS_ERR(smap))
+ return ERR_CAST(smap);
+
+ smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+ return &smap->map;
+}
+
+static void task_storage_map_free(struct bpf_map *map)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = (struct bpf_local_storage_map *)map;
+ bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+ bpf_local_storage_map_free(smap);
+}
+
+static int task_storage_map_btf_id;
+const struct bpf_map_ops task_storage_map_ops = {
+ .map_meta_equal = bpf_map_meta_equal,
+ .map_alloc_check = bpf_local_storage_map_alloc_check,
+ .map_alloc = task_storage_map_alloc,
+ .map_free = task_storage_map_free,
+ .map_get_next_key = notsupp_get_next_key,
+ .map_lookup_elem = bpf_pid_task_storage_lookup_elem,
+ .map_update_elem = bpf_pid_task_storage_update_elem,
+ .map_delete_elem = bpf_pid_task_storage_delete_elem,
+ .map_check_btf = bpf_local_storage_map_check_btf,
+ .map_btf_name = "bpf_local_storage_map",
+ .map_btf_id = &task_storage_map_btf_id,
+ .map_owner_storage_ptr = task_storage_ptr,
+};
+
+BTF_ID_LIST_SINGLE(bpf_task_storage_btf_ids, struct, task_struct)
+
+const struct bpf_func_proto bpf_task_storage_get_proto = {
+ .func = bpf_task_storage_get,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+ .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
+
+const struct bpf_func_proto bpf_task_storage_delete_proto = {
+ .func = bpf_task_storage_delete,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_PTR_TO_BTF_ID,
+ .arg2_btf_id = &bpf_task_storage_btf_ids[0],
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8f50c9c19f1b..f3fe9f53f93c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -773,7 +773,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
map->map_type != BPF_MAP_TYPE_ARRAY &&
map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
map->map_type != BPF_MAP_TYPE_SK_STORAGE &&
- map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+ map->map_type != BPF_MAP_TYPE_INODE_STORAGE &&
+ map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
return -ENOTSUPP;
if (map->spin_lock_off + sizeof(struct bpf_spin_lock) >
map->value_size) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f863aa84d0a2..00960f6a83ec 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4469,6 +4469,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_inode_storage_delete)
goto error;
break;
+ case BPF_MAP_TYPE_TASK_STORAGE:
+ if (func_id != BPF_FUNC_task_storage_get &&
+ func_id != BPF_FUNC_task_storage_delete)
+ goto error;
+ break;
default:
break;
}
@@ -4547,6 +4552,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
goto error;
break;
+ case BPF_FUNC_task_storage_get:
+ case BPF_FUNC_task_storage_delete:
+ if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
+ goto error;
+ break;
default:
break;
}