summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-06-17 15:55:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-06-17 15:55:34 -0700
commitda0f382029868806e88c046eb2560fdee7a9457c (patch)
treefe8c8248c5d2023430e2a129fe7dc0c424365aea /net/core
parenteb7c825bf74755a9ea975b7a463c6d13ffa7f447 (diff)
parent4fddbf8a99ee5a65bdd31b3ebbf5a84b9395d496 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: "Lots of bug fixes here: 1) Out of bounds access in __bpf_skc_lookup, from Lorenz Bauer. 2) Fix rate reporting in cfg80211_calculate_bitrate_he(), from John Crispin. 3) Use after free in psock backlog workqueue, from John Fastabend. 4) Fix source port matching in fdb peer flow rule of mlx5, from Raed Salem. 5) Use atomic_inc_not_zero() in fl6_sock_lookup(), from Eric Dumazet. 6) Network header needs to be set for packet redirect in nfp, from John Hurley. 7) Fix udp zerocopy refcnt, from Willem de Bruijn. 8) Don't assume linear buffers in vxlan and geneve error handlers, from Stefano Brivio. 9) Fix TOS matching in mlxsw, from Jiri Pirko. 10) More SCTP cookie memory leak fixes, from Neil Horman. 11) Fix VLAN filtering in rtl8366, from Linus Walluij. 12) Various TCP SACK payload size and fragmentation memory limit fixes from Eric Dumazet. 13) Use after free in pneigh_get_next(), also from Eric Dumazet. 14) LAPB control block leak fix from Jeremy Sowden" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (145 commits) lapb: fixed leak of control-blocks. tipc: purge deferredq list for each grp member in tipc_group_delete ax25: fix inconsistent lock state in ax25_destroy_timer neigh: fix use-after-free read in pneigh_get_next tcp: fix compile error if !CONFIG_SYSCTL hv_sock: Suppress bogus "may be used uninitialized" warnings be2net: Fix number of Rx queues used for flow hashing net: handle 802.1P vlan 0 packets properly tcp: enforce tcp_min_snd_mss in tcp_mtu_probing() tcp: add tcp_min_snd_mss sysctl tcp: tcp_fragment() should apply sane memory limits tcp: limit payload size of sacked skbs Revert "net: phylink: set the autoneg state in phylink_phy_change" bpf: fix nested bpf tracepoints with per-cpu data bpf: Fix out of bounds memory access in bpf_sk_storage vsock/virtio: set SOCK_DONE on peer shutdown net: dsa: rtl8366: Fix up VLAN filtering net: phylink: set the autoneg state in phylink_phy_change net: add high_order_alloc_disable sysctl/static key tcp: add tcp_tx_skb_cache sysctl ...
Diffstat (limited to 'net/core')
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/dev.c30
-rw-r--r--net/core/ethtool.c5
-rw-r--r--net/core/filter.c26
-rw-r--r--net/core/neighbour.c7
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sysctl_net_core.c7
8 files changed, 80 insertions, 6 deletions
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cc9597a87770..d1c4e1f3be2c 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -633,7 +633,8 @@ static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
- smap->bucket_log = ilog2(roundup_pow_of_two(num_possible_cpus()));
+ /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
+ smap->bucket_log = max_t(u32, 1, ilog2(roundup_pow_of_two(num_possible_cpus())));
nbuckets = 1U << smap->bucket_log;
smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
GFP_USER | __GFP_NOWARN);
diff --git a/net/core/dev.c b/net/core/dev.c
index eb7fb6daa1ef..d6edd218babd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4923,8 +4923,36 @@ skip_classify:
}
if (unlikely(skb_vlan_tag_present(skb))) {
- if (skb_vlan_tag_get_id(skb))
+check_vlan_id:
+ if (skb_vlan_tag_get_id(skb)) {
+ /* Vlan id is non 0 and vlan_do_receive() above couldn't
+ * find vlan device.
+ */
skb->pkt_type = PACKET_OTHERHOST;
+ } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+ skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
+ /* Outer header is 802.1P with vlan 0, inner header is
+ * 802.1Q or 802.1AD and vlan_do_receive() above could
+ * not find vlan dev for vlan id 0.
+ */
+ __vlan_hwaccel_clear_tag(skb);
+ skb = skb_vlan_untag(skb);
+ if (unlikely(!skb))
+ goto out;
+ if (vlan_do_receive(&skb))
+ /* After stripping off 802.1P header with vlan 0
+ * vlan dev is found for inner header.
+ */
+ goto another_round;
+ else if (unlikely(!skb))
+ goto out;
+ else
+ /* We have stripped outer 802.1P vlan 0 header.
+ * But could not find vlan dev.
+ * check again for vlan id to set OTHERHOST.
+ */
+ goto check_vlan_id;
+ }
/* Note: we might in the future use prio bits
* and set skb->priority like in vlan_do_receive()
* For the time being, just ignore Priority Code Point
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index d08b1e19ce9c..4d1011b2e24f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -3020,6 +3020,11 @@ ethtool_rx_flow_rule_create(const struct ethtool_rx_flow_spec_input *input)
match->mask.vlan.vlan_id =
ntohs(ext_m_spec->vlan_tci) & 0x0fff;
+ match->key.vlan.vlan_dei =
+ !!(ext_h_spec->vlan_tci & htons(0x1000));
+ match->mask.vlan.vlan_dei =
+ !!(ext_m_spec->vlan_tci & htons(0x1000));
+
match->key.vlan.vlan_priority =
(ntohs(ext_h_spec->vlan_tci) & 0xe000) >> 13;
match->mask.vlan.vlan_priority =
diff --git a/net/core/filter.c b/net/core/filter.c
index cd09bf5d21f4..f615e42cf4ef 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5300,7 +5300,13 @@ __bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct net *net;
int sdif;
- family = len == sizeof(tuple->ipv4) ? AF_INET : AF_INET6;
+ if (len == sizeof(tuple->ipv4))
+ family = AF_INET;
+ else if (len == sizeof(tuple->ipv6))
+ family = AF_INET6;
+ else
+ return NULL;
+
if (unlikely(family == AF_UNSPEC || flags ||
!((s32)netns_id < 0 || netns_id <= S32_MAX)))
goto out;
@@ -5333,8 +5339,14 @@ __bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
ifindex, proto, netns_id, flags);
- if (sk)
+ if (sk) {
sk = sk_to_full_sk(sk);
+ if (!sk_fullsock(sk)) {
+ if (!sock_flag(sk, SOCK_RCU_FREE))
+ sock_gen_put(sk);
+ return NULL;
+ }
+ }
return sk;
}
@@ -5365,8 +5377,14 @@ bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
flags);
- if (sk)
+ if (sk) {
sk = sk_to_full_sk(sk);
+ if (!sk_fullsock(sk)) {
+ if (!sock_flag(sk, SOCK_RCU_FREE))
+ sock_gen_put(sk);
+ return NULL;
+ }
+ }
return sk;
}
@@ -6726,6 +6744,7 @@ static bool sock_addr_is_valid_access(int off, int size,
case BPF_CGROUP_INET4_BIND:
case BPF_CGROUP_INET4_CONNECT:
case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP4_RECVMSG:
break;
default:
return false;
@@ -6736,6 +6755,7 @@ static bool sock_addr_is_valid_access(int off, int size,
case BPF_CGROUP_INET6_BIND:
case BPF_CGROUP_INET6_CONNECT:
case BPF_CGROUP_UDP6_SENDMSG:
+ case BPF_CGROUP_UDP6_RECVMSG:
break;
default:
return false;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0e2c07355463..9e7fc929bc50 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3203,6 +3203,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
}
void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
+ __acquires(tbl->lock)
__acquires(rcu_bh)
{
struct neigh_seq_state *state = seq->private;
@@ -3213,6 +3214,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
rcu_read_lock_bh();
state->nht = rcu_dereference_bh(tbl->nht);
+ read_lock(&tbl->lock);
return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
}
@@ -3246,8 +3248,13 @@ out:
EXPORT_SYMBOL(neigh_seq_next);
void neigh_seq_stop(struct seq_file *seq, void *v)
+ __releases(tbl->lock)
__releases(rcu_bh)
{
+ struct neigh_seq_state *state = seq->private;
+ struct neigh_table *tbl = state->tbl;
+
+ read_unlock(&tbl->lock);
rcu_read_unlock_bh();
}
EXPORT_SYMBOL(neigh_seq_stop);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 47c1aa9ee045..c8cd99c3603f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2337,6 +2337,7 @@ do_frag_list:
kv.iov_base = skb->data + offset;
kv.iov_len = slen;
memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen);
if (ret <= 0)
diff --git a/net/core/sock.c b/net/core/sock.c
index 2b3701958486..af09a23e4822 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1850,6 +1850,9 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
+#ifdef CONFIG_BPF_SYSCALL
+ RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
+#endif
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
@@ -2320,6 +2323,7 @@ static void sk_leave_memory_pressure(struct sock *sk)
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
+DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/**
* skb_page_frag_refill - check that a page_frag contains enough room
@@ -2344,7 +2348,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
}
pfrag->offset = 0;
- if (SKB_FRAG_PAGE_ORDER) {
+ if (SKB_FRAG_PAGE_ORDER &&
+ !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
/* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP | __GFP_NOWARN |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1a2685694abd..f9204719aeee 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = {
.extra1 = &zero,
.extra2 = &two,
},
+ {
+ .procname = "high_order_alloc_disable",
+ .data = &net_high_order_alloc_disable_key.key,
+ .maxlen = sizeof(net_high_order_alloc_disable_key),
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
{ }
};