summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c285
-rw-r--r--net/core/datagram.c25
-rw-r--r--net/core/dev.c3
-rw-r--r--net/core/devlink.c37
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/flow_dissector.c4
-rw-r--r--net/core/flow_offload.c7
-rw-r--r--net/core/netclassid_cgroup.c47
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/skmsg.c8
-rw-r--r--net/core/sock.c5
11 files changed, 365 insertions, 62 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 3ab23f698221..756b63b6f7b3 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -8,6 +8,7 @@
#include <linux/bpf.h>
#include <net/bpf_sk_storage.h>
#include <net/sock.h>
+#include <uapi/linux/sock_diag.h>
#include <uapi/linux/btf.h>
static atomic_t cache_idx;
@@ -60,7 +61,7 @@ struct bpf_sk_storage_data {
* the number of cachelines access during the cache hit case.
*/
struct bpf_sk_storage_map __rcu *smap;
- u8 data[0] __aligned(8);
+ u8 data[] __aligned(8);
};
/* Linked to bpf_sk_storage and bpf_sk_storage_map */
@@ -606,6 +607,14 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
kfree(map);
}
+/* U16_MAX is much more than enough for sk local storage
+ * considering a tcp_sock is ~2k.
+ */
+#define MAX_VALUE_SIZE \
+ min_t(u32, \
+ (KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \
+ (U16_MAX - sizeof(struct bpf_sk_storage_elem)))
+
static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
{
if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK ||
@@ -619,12 +628,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (attr->value_size >= KMALLOC_MAX_SIZE -
- MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) ||
- /* U16_MAX is much more than enough for sk local storage
- * considering a tcp_sock is ~2k.
- */
- attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem))
+ if (attr->value_size > MAX_VALUE_SIZE)
return -E2BIG;
return 0;
@@ -910,3 +914,270 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_SOCKET,
};
+
+struct bpf_sk_storage_diag {
+ u32 nr_maps;
+ struct bpf_map *maps[];
+};
+
+/* The reply will be like:
+ * INET_DIAG_BPF_SK_STORAGES (nla_nest)
+ * SK_DIAG_BPF_STORAGE (nla_nest)
+ * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
+ * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
+ * SK_DIAG_BPF_STORAGE (nla_nest)
+ * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
+ * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
+ * ....
+ */
+static int nla_value_size(u32 value_size)
+{
+ /* SK_DIAG_BPF_STORAGE (nla_nest)
+ * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
+ * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
+ */
+ return nla_total_size(0) + nla_total_size(sizeof(u32)) +
+ nla_total_size_64bit(value_size);
+}
+
+void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
+{
+ u32 i;
+
+ if (!diag)
+ return;
+
+ for (i = 0; i < diag->nr_maps; i++)
+ bpf_map_put(diag->maps[i]);
+
+ kfree(diag);
+}
+EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
+
+static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
+ const struct bpf_map *map)
+{
+ u32 i;
+
+ for (i = 0; i < diag->nr_maps; i++) {
+ if (diag->maps[i] == map)
+ return true;
+ }
+
+ return false;
+}
+
+struct bpf_sk_storage_diag *
+bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
+{
+ struct bpf_sk_storage_diag *diag;
+ struct nlattr *nla;
+ u32 nr_maps = 0;
+ int rem, err;
+
+ /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
+ * the map_alloc_check() side also does.
+ */
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ nla_for_each_nested(nla, nla_stgs, rem) {
+ if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+ nr_maps++;
+ }
+
+ diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps,
+ GFP_KERNEL);
+ if (!diag)
+ return ERR_PTR(-ENOMEM);
+
+ nla_for_each_nested(nla, nla_stgs, rem) {
+ struct bpf_map *map;
+ int map_fd;
+
+ if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+ continue;
+
+ map_fd = nla_get_u32(nla);
+ map = bpf_map_get(map_fd);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto err_free;
+ }
+ if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
+ bpf_map_put(map);
+ err = -EINVAL;
+ goto err_free;
+ }
+ if (diag_check_dup(diag, map)) {
+ bpf_map_put(map);
+ err = -EEXIST;
+ goto err_free;
+ }
+ diag->maps[diag->nr_maps++] = map;
+ }
+
+ return diag;
+
+err_free:
+ bpf_sk_storage_diag_free(diag);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
+
+static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb)
+{
+ struct nlattr *nla_stg, *nla_value;
+ struct bpf_sk_storage_map *smap;
+
+ /* It cannot exceed max nlattr's payload */
+ BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE);
+
+ nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
+ if (!nla_stg)
+ return -EMSGSIZE;
+
+ smap = rcu_dereference(sdata->smap);
+ if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
+ goto errout;
+
+ nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
+ smap->map.value_size,
+ SK_DIAG_BPF_STORAGE_PAD);
+ if (!nla_value)
+ goto errout;
+
+ if (map_value_has_spin_lock(&smap->map))
+ copy_map_value_locked(&smap->map, nla_data(nla_value),
+ sdata->data, true);
+ else
+ copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
+
+ nla_nest_end(skb, nla_stg);
+ return 0;
+
+errout:
+ nla_nest_cancel(skb, nla_stg);
+ return -EMSGSIZE;
+}
+
+static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
+ int stg_array_type,
+ unsigned int *res_diag_size)
+{
+ /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
+ unsigned int diag_size = nla_total_size(0);
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_elem *selem;
+ struct bpf_sk_storage_map *smap;
+ struct nlattr *nla_stgs;
+ unsigned int saved_len;
+ int err = 0;
+
+ rcu_read_lock();
+
+ sk_storage = rcu_dereference(sk->sk_bpf_storage);
+ if (!sk_storage || hlist_empty(&sk_storage->list)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ nla_stgs = nla_nest_start(skb, stg_array_type);
+ if (!nla_stgs)
+ /* Continue to learn diag_size */
+ err = -EMSGSIZE;
+
+ saved_len = skb->len;
+ hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
+ smap = rcu_dereference(SDATA(selem)->smap);
+ diag_size += nla_value_size(smap->map.value_size);
+
+ if (nla_stgs && diag_get(SDATA(selem), skb))
+ /* Continue to learn diag_size */
+ err = -EMSGSIZE;
+ }
+
+ rcu_read_unlock();
+
+ if (nla_stgs) {
+ if (saved_len == skb->len)
+ nla_nest_cancel(skb, nla_stgs);
+ else
+ nla_nest_end(skb, nla_stgs);
+ }
+
+ if (diag_size == nla_total_size(0)) {
+ *res_diag_size = 0;
+ return 0;
+ }
+
+ *res_diag_size = diag_size;
+ return err;
+}
+
+int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
+ struct sock *sk, struct sk_buff *skb,
+ int stg_array_type,
+ unsigned int *res_diag_size)
+{
+ /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
+ unsigned int diag_size = nla_total_size(0);
+ struct bpf_sk_storage *sk_storage;
+ struct bpf_sk_storage_data *sdata;
+ struct nlattr *nla_stgs;
+ unsigned int saved_len;
+ int err = 0;
+ u32 i;
+
+ *res_diag_size = 0;
+
+ /* No map has been specified. Dump all. */
+ if (!diag->nr_maps)
+ return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
+ res_diag_size);
+
+ rcu_read_lock();
+ sk_storage = rcu_dereference(sk->sk_bpf_storage);
+ if (!sk_storage || hlist_empty(&sk_storage->list)) {
+ rcu_read_unlock();
+ return 0;
+ }
+
+ nla_stgs = nla_nest_start(skb, stg_array_type);
+ if (!nla_stgs)
+ /* Continue to learn diag_size */
+ err = -EMSGSIZE;
+
+ saved_len = skb->len;
+ for (i = 0; i < diag->nr_maps; i++) {
+ sdata = __sk_storage_lookup(sk_storage,
+ (struct bpf_sk_storage_map *)diag->maps[i],
+ false);
+
+ if (!sdata)
+ continue;
+
+ diag_size += nla_value_size(diag->maps[i]->value_size);
+
+ if (nla_stgs && diag_get(sdata, skb))
+ /* Continue to learn diag_size */
+ err = -EMSGSIZE;
+ }
+ rcu_read_unlock();
+
+ if (nla_stgs) {
+ if (saved_len == skb->len)
+ nla_nest_cancel(skb, nla_stgs);
+ else
+ nla_nest_end(skb, nla_stgs);
+ }
+
+ if (diag_size == nla_total_size(0)) {
+ *res_diag_size = 0;
+ return 0;
+ }
+
+ *res_diag_size = diag_size;
+ return err;
+}
+EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a78e7f864c1e..4213081c6ed3 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -166,8 +166,6 @@ done:
struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
struct sk_buff_head *queue,
unsigned int flags,
- void (*destructor)(struct sock *sk,
- struct sk_buff *skb),
int *off, int *err,
struct sk_buff **last)
{
@@ -198,8 +196,6 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
refcount_inc(&skb->users);
} else {
__skb_unlink(skb, queue);
- if (destructor)
- destructor(sk, skb);
}
*off = _off;
return skb;
@@ -212,7 +208,6 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
* @sk: socket
* @queue: socket queue from which to receive
* @flags: MSG\_ flags
- * @destructor: invoked under the receive lock on successful dequeue
* @off: an offset in bytes to peek skb from. Returns an offset
* within an skb where data actually starts
* @err: error code returned
@@ -245,10 +240,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
*/
struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
struct sk_buff_head *queue,
- unsigned int flags,
- void (*destructor)(struct sock *sk,
- struct sk_buff *skb),
- int *off, int *err,
+ unsigned int flags, int *off, int *err,
struct sk_buff **last)
{
struct sk_buff *skb;
@@ -269,8 +261,8 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
* However, this function was correct in any case. 8)
*/
spin_lock_irqsave(&queue->lock, cpu_flags);
- skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
- off, &error, last);
+ skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
+ last);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
if (error)
goto no_packet;
@@ -293,10 +285,7 @@ EXPORT_SYMBOL(__skb_try_recv_datagram);
struct sk_buff *__skb_recv_datagram(struct sock *sk,
struct sk_buff_head *sk_queue,
- unsigned int flags,
- void (*destructor)(struct sock *sk,
- struct sk_buff *skb),
- int *off, int *err)
+ unsigned int flags, int *off, int *err)
{
struct sk_buff *skb, *last;
long timeo;
@@ -304,8 +293,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk,
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
- skb = __skb_try_recv_datagram(sk, sk_queue, flags, destructor,
- off, err, &last);
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err,
+ &last);
if (skb)
return skb;
@@ -326,7 +315,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
return __skb_recv_datagram(sk, &sk->sk_receive_queue,
flags | (noblock ? MSG_DONTWAIT : 0),
- NULL, &off, err);
+ &off, err);
}
EXPORT_SYMBOL(skb_recv_datagram);
diff --git a/net/core/dev.c b/net/core/dev.c
index 25dab1598803..d84541c24446 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4848,7 +4848,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
+ switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
+ &cl_res, false)) {
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
skb->tc_index = TC_H_MIN(cl_res.classid);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f8af5e2d748b..f51bebc8c33f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -545,6 +545,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
+ case DEVLINK_PORT_FLAVOUR_VIRTUAL:
if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER,
attrs->phys.port_number))
return -EMSGSIZE;
@@ -3352,34 +3353,41 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
struct genl_info *info,
union devlink_param_value *value)
{
+ struct nlattr *param_data;
int len;
- if (param->type != DEVLINK_PARAM_TYPE_BOOL &&
- !info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA])
+ param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA];
+
+ if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data)
return -EINVAL;
switch (param->type) {
case DEVLINK_PARAM_TYPE_U8:
- value->vu8 = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u8))
+ return -EINVAL;
+ value->vu8 = nla_get_u8(param_data);
break;
case DEVLINK_PARAM_TYPE_U16:
- value->vu16 = nla_get_u16(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u16))
+ return -EINVAL;
+ value->vu16 = nla_get_u16(param_data);
break;
case DEVLINK_PARAM_TYPE_U32:
- value->vu32 = nla_get_u32(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]);
+ if (nla_len(param_data) != sizeof(u32))
+ return -EINVAL;
+ value->vu32 = nla_get_u32(param_data);
break;
case DEVLINK_PARAM_TYPE_STRING:
- len = strnlen(nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]),
- nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
- if (len == nla_len(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]) ||
+ len = strnlen(nla_data(param_data), nla_len(param_data));
+ if (len == nla_len(param_data) ||
len >= __DEVLINK_PARAM_MAX_STRING_VALUE)
return -EINVAL;
- strcpy(value->vstr,
- nla_data(info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]));
+ strcpy(value->vstr, nla_data(param_data));
break;
case DEVLINK_PARAM_TYPE_BOOL:
- value->vbool = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA] ?
- true : false;
+ if (param_data && nla_len(param_data))
+ return -EINVAL;
+ value->vbool = nla_get_flag(param_data);
break;
}
return 0;
@@ -4232,7 +4240,7 @@ struct devlink_fmsg_item {
int attrtype;
u8 nla_type;
u16 len;
- int value[0];
+ int value[];
};
struct devlink_fmsg {
@@ -6032,6 +6040,8 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
[DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
@@ -6806,6 +6816,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
+ case DEVLINK_PORT_FLAVOUR_VIRTUAL:
if (!attrs->split)
n = snprintf(name, len, "p%u", attrs->phys.port_number);
else
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d58c1c45a895..8e33cec9fc4e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -68,7 +68,7 @@ struct net_dm_hw_entry {
struct net_dm_hw_entries {
u32 num_entries;
- struct net_dm_hw_entry entries[0];
+ struct net_dm_hw_entry entries[];
};
struct per_cpu_dm_data {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a1670dff0629..3eff84824c8b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -920,9 +920,7 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
(int)FLOW_DISSECTOR_F_STOP_AT_ENCAP);
flow_keys->flags = flags;
- preempt_disable();
- result = BPF_PROG_RUN(prog, ctx);
- preempt_enable();
+ result = bpf_prog_run_pin_on_cpu(prog, ctx);
flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen);
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index d21348202ba6..7440e6117c81 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -188,6 +188,13 @@ void flow_action_cookie_destroy(struct flow_action_cookie *cookie)
}
EXPORT_SYMBOL(flow_action_cookie_destroy);
+void flow_rule_match_ct(const struct flow_rule *rule,
+ struct flow_match_ct *out)
+{
+ FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_CT, out);
+}
+EXPORT_SYMBOL(flow_rule_match_ct);
+
struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv))
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 0642f91c4038..b4c87fe31be2 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -53,30 +53,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css)
kfree(css_cls_state(css));
}
+/*
+ * To avoid freezing of sockets creation for tasks with big number of threads
+ * and opened sockets lets release file_lock every 1000 iterated descriptors.
+ * New sockets will already have been created with new classid.
+ */
+
+struct update_classid_context {
+ u32 classid;
+ unsigned int batch;
+};
+
+#define UPDATE_CLASSID_BATCH 1000
+
static int update_classid_sock(const void *v, struct file *file, unsigned n)
{
int err;
+ struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file, &err);
if (sock) {
spin_lock(&cgroup_sk_update_lock);
- sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,
- (unsigned long)v);
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
spin_unlock(&cgroup_sk_update_lock);
}
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+ }
return 0;
}
+static void update_classid_task(struct task_struct *p, u32 classid)
+{
+ struct update_classid_context ctx = {
+ .classid = classid,
+ .batch = UPDATE_CLASSID_BATCH
+ };
+ unsigned int fd = 0;
+
+ do {
+ task_lock(p);
+ fd = iterate_fd(p->files, fd, update_classid_sock, &ctx);
+ task_unlock(p);
+ cond_resched();
+ } while (fd);
+}
+
static void cgrp_attach(struct cgroup_taskset *tset)
{
struct cgroup_subsys_state *css;
struct task_struct *p;
cgroup_taskset_for_each(p, css, tset) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)css_cls_state(css)->classid);
- task_unlock(p);
+ update_classid_task(p, css_cls_state(css)->classid);
}
}
@@ -98,10 +128,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) {
- task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock,
- (void *)(unsigned long)cs->classid);
- task_unlock(p);
+ update_classid_task(p, cs->classid);
cond_resched();
}
css_task_iter_end(&it);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index acc849df60b5..f2b3d8dd40f4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2003,8 +2003,8 @@ static int pktgen_setup_dev(const struct pktgen_net *pn,
return -ENODEV;
}
- if (odev->type != ARPHRD_ETHER) {
- pr_err("not an ethernet device: \"%s\"\n", ifname);
+ if (odev->type != ARPHRD_ETHER && odev->type != ARPHRD_LOOPBACK) {
+ pr_err("not an ethernet or loopback device: \"%s\"\n", ifname);
err = -EINVAL;
} else if (!netif_running(odev)) {
pr_err("device is down: \"%s\"\n", ifname);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index eeb28cb85664..c479372f2cd2 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -628,7 +628,6 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
struct bpf_prog *prog;
int ret;
- preempt_disable();
rcu_read_lock();
prog = READ_ONCE(psock->progs.msg_parser);
if (unlikely(!prog)) {
@@ -638,7 +637,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
sk_msg_compute_data_pointers(msg);
msg->sk = sk;
- ret = BPF_PROG_RUN(prog, msg);
+ ret = bpf_prog_run_pin_on_cpu(prog, msg);
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
@@ -653,7 +652,6 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
}
out:
rcu_read_unlock();
- preempt_enable();
return ret;
}
EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
@@ -665,9 +663,7 @@ static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
skb->sk = psock->sk;
bpf_compute_data_end_sk_skb(skb);
- preempt_disable();
- ret = BPF_PROG_RUN(prog, skb);
- preempt_enable();
+ ret = bpf_prog_run_pin_on_cpu(prog, skb);
/* strparser clones the skb before handing it to a upper layer,
* meaning skb_orphan has been called. We NULL sk on the way out
* to ensure we don't trigger a BUG_ON() in skb/sk operations
diff --git a/net/core/sock.c b/net/core/sock.c
index e4af4dbc1c9e..0fc8937a7ff4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1832,7 +1832,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_zckey, 0);
sock_reset_flag(newsk, SOCK_DONE);
- mem_cgroup_sk_alloc(newsk);
+
+ /* sk->sk_memcg will be populated at accept() time */
+ newsk->sk_memcg = NULL;
+
cgroup_sk_alloc(&newsk->sk_cgrp_data);
rcu_read_lock();