summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-03-25 15:31:22 -0700
committerDavid S. Miller <davem@davemloft.net>2021-03-25 15:31:22 -0700
commitefd13b71a3fa31413f8d15342e01d44b60b0a432 (patch)
tree2ed1b299e25538c5a60485a1047507b49d3e0ecf /net/core
parent84c7f6c33f42a12eb036ebf0f0e3670799304120 (diff)
parent002322402dafd846c424ffa9240a937f49b48c42 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c33
-rw-r--r--net/core/drop_monitor.c23
-rw-r--r--net/core/dst.c59
-rw-r--r--net/core/filter.c12
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/sock.c44
6 files changed, 135 insertions, 38 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4bb6dcdbed8b..48b529d59157 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1230,6 +1230,18 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
return -ENOMEM;
for_each_netdev(net, d) {
+ struct netdev_name_node *name_node;
+ list_for_each_entry(name_node, &d->name_node->list, list) {
+ if (!sscanf(name_node->name, name, &i))
+ continue;
+ if (i < 0 || i >= max_netdevices)
+ continue;
+
+ /* avoid cases where sscanf is not exact inverse of printf */
+ snprintf(buf, IFNAMSIZ, name, i);
+ if (!strncmp(buf, name_node->name, IFNAMSIZ))
+ set_bit(i, inuse);
+ }
if (!sscanf(d->name, name, &i))
continue;
if (i < 0 || i >= max_netdevices)
@@ -4345,6 +4357,13 @@ static inline void ____napi_schedule(struct softnet_data *sd,
*/
thread = READ_ONCE(napi->thread);
if (thread) {
+ /* Avoid doing set_bit() if the thread is in
+ * INTERRUPTIBLE state, cause napi_thread_wait()
+ * makes sure to proceed with napi polling
+ * if the thread is explicitly woken from here.
+ */
+ if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE)
+ set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
wake_up_process(thread);
return;
}
@@ -6534,6 +6553,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+ NAPIF_STATE_SCHED_THREADED |
NAPIF_STATE_PREFER_BUSY_POLL);
/* If STATE_MISSED was set, leave STATE_SCHED set,
@@ -7017,16 +7037,25 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
static int napi_thread_wait(struct napi_struct *napi)
{
+ bool woken = false;
+
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
- if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ /* Testing SCHED_THREADED bit here to make sure the current
+ * kthread owns this napi and could poll on this napi.
+ * Testing SCHED bit is not enough because SCHED bit might be
+ * set by some other busy poll thread or by napi_disable().
+ */
+ if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
WARN_ON(!list_empty(&napi->poll_list));
__set_current_state(TASK_RUNNING);
return 0;
}
schedule();
+ /* woken being true indicates this thread owns this napi. */
+ woken = true;
set_current_state(TASK_INTERRUPTIBLE);
}
__set_current_state(TASK_RUNNING);
@@ -11412,7 +11441,7 @@ static void __net_exit default_device_exit(struct net *net)
continue;
/* Leave virtual devices for the generic cleanup */
- if (dev->rtnl_link_ops)
+ if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
continue;
/* Push remaining network devices to init_net */
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 1eb02c2236f2..ead2a8aa57b4 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -1053,6 +1053,20 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
return 0;
err_module_put:
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ del_timer_sync(&hw_data->send_timer);
+ cancel_work_sync(&hw_data->dm_alert_work);
+ while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+ struct devlink_trap_metadata *hw_metadata;
+
+ hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+ net_dm_hw_metadata_free(hw_metadata);
+ consume_skb(skb);
+ }
+ }
module_put(THIS_MODULE);
return rc;
}
@@ -1134,6 +1148,15 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
err_unregister_trace:
unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
err_module_put:
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct sk_buff *skb;
+
+ del_timer_sync(&data->send_timer);
+ cancel_work_sync(&data->dm_alert_work);
+ while ((skb = __skb_dequeue(&data->drop_queue)))
+ consume_skb(skb);
+ }
module_put(THIS_MODULE);
return rc;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index 0c01bd8d9d81..fb3bcba87744 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -237,37 +237,62 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
-static struct dst_ops md_dst_ops = {
- .family = AF_UNSPEC,
-};
+struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie)
+{
+ return NULL;
+}
-static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
+u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old)
{
- WARN_ONCE(1, "Attempting to call output on metadata dst\n");
- kfree_skb(skb);
- return 0;
+ return NULL;
}
-static int dst_md_discard(struct sk_buff *skb)
+struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
- WARN_ONCE(1, "Attempting to call input on metadata dst\n");
- kfree_skb(skb);
- return 0;
+ return NULL;
+}
+
+void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
+{
+}
+EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu);
+
+void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+ struct sk_buff *skb)
+{
+}
+EXPORT_SYMBOL_GPL(dst_blackhole_redirect);
+
+unsigned int dst_blackhole_mtu(const struct dst_entry *dst)
+{
+ unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+ return mtu ? : dst->dev->mtu;
}
+EXPORT_SYMBOL_GPL(dst_blackhole_mtu);
+
+static struct dst_ops dst_blackhole_ops = {
+ .family = AF_UNSPEC,
+ .neigh_lookup = dst_blackhole_neigh_lookup,
+ .check = dst_blackhole_check,
+ .cow_metrics = dst_blackhole_cow_metrics,
+ .update_pmtu = dst_blackhole_update_pmtu,
+ .redirect = dst_blackhole_redirect,
+ .mtu = dst_blackhole_mtu,
+};
static void __metadata_dst_init(struct metadata_dst *md_dst,
enum metadata_type type, u8 optslen)
-
{
struct dst_entry *dst;
dst = &md_dst->dst;
- dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
+ dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE,
DST_METADATA | DST_NOCOUNT);
-
- dst->input = dst_md_discard;
- dst->output = dst_md_discard_out;
-
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
md_dst->type = type;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index b6732000d8a2..f5eeebf6a16f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5600,7 +5600,7 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
return -EINVAL;
- if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
+ if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
return -EINVAL;
dev = __dev_via_ifindex(dev, ifindex);
@@ -5610,7 +5610,11 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
mtu = READ_ONCE(dev->mtu);
dev_len = mtu + dev->hard_header_len;
- skb_len = skb->len + len_diff; /* minus result pass check */
+
+ /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
+ skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
+
+ skb_len += len_diff; /* minus result pass check */
if (skb_len <= dev_len) {
ret = BPF_MTU_CHK_RET_SUCCESS;
goto out;
@@ -5655,6 +5659,10 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
/* Add L2-header as dev MTU is L3 size */
dev_len = mtu + dev->hard_header_len;
+ /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
+ if (*mtu_len)
+ xdp_len = *mtu_len + dev->hard_header_len;
+
xdp_len += len_diff; /* minus result pass check */
if (xdp_len > dev_len)
ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2ed380d096ce..5985029e43d4 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -176,7 +176,7 @@ void skb_flow_get_icmp_tci(const struct sk_buff *skb,
* avoid confusion with packets without such field
*/
if (icmp_has_id(ih->type))
- key_icmp->id = ih->un.echo.id ? : 1;
+ key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1;
else
key_icmp->id = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0ed98f20448a..cc31b601ae10 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3440,6 +3440,32 @@ static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
twsk_prot->twsk_slab = NULL;
}
+static int tw_prot_init(const struct proto *prot)
+{
+ struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
+
+ if (!twsk_prot)
+ return 0;
+
+ twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
+ prot->name);
+ if (!twsk_prot->twsk_slab_name)
+ return -ENOMEM;
+
+ twsk_prot->twsk_slab =
+ kmem_cache_create(twsk_prot->twsk_slab_name,
+ twsk_prot->twsk_obj_size, 0,
+ SLAB_ACCOUNT | prot->slab_flags,
+ NULL);
+ if (!twsk_prot->twsk_slab) {
+ pr_crit("%s: Can't create timewait sock SLAB cache!\n",
+ prot->name);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
{
if (!rsk_prot)
@@ -3496,22 +3522,8 @@ int proto_register(struct proto *prot, int alloc_slab)
if (req_prot_init(prot))
goto out_free_request_sock_slab;
- if (prot->twsk_prot != NULL) {
- prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
-
- if (prot->twsk_prot->twsk_slab_name == NULL)
- goto out_free_request_sock_slab;
-
- prot->twsk_prot->twsk_slab =
- kmem_cache_create(prot->twsk_prot->twsk_slab_name,
- prot->twsk_prot->twsk_obj_size,
- 0,
- SLAB_ACCOUNT |
- prot->slab_flags,
- NULL);
- if (prot->twsk_prot->twsk_slab == NULL)
- goto out_free_timewait_sock_slab;
- }
+ if (tw_prot_init(prot))
+ goto out_free_timewait_sock_slab;
}
mutex_lock(&proto_list_mutex);