summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/filter.c26
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/dsa/tag_sja1105.c1
-rw-r--r--net/ipv4/fib_frontend.c5
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/ip6_fib.c18
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c13
-rw-r--r--net/rds/ib.h1
-rw-r--r--net/rds/ib_cm.c9
-rw-r--r--net/rds/ib_frmr.c84
-rw-r--r--net/rds/ib_mr.h4
-rw-r--r--net/rds/ib_rdma.c60
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/cls_api.c1
-rw-r--r--net/sched/sch_fq_codel.c2
-rw-r--r--net/sched/sch_sfq.c2
-rw-r--r--net/sched/sch_taprio.c6
-rw-r--r--net/sctp/sm_make_chunk.c12
-rw-r--r--net/tipc/node.c1
-rw-r--r--net/xdp/xdp_umem.c16
-rw-r--r--net/xdp/xsk.c13
25 files changed, 192 insertions, 102 deletions
diff --git a/net/core/filter.c b/net/core/filter.c
index 47f6386fb17a..4e2a79b2fd77 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4335,7 +4335,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
ret = tcp_set_congestion_control(sk, name, false,
- reinit);
+ reinit, true);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -6884,20 +6884,30 @@ static bool sock_addr_is_valid_access(int off, int size,
case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
msg_src_ip6[3]):
- /* Only narrow read access allowed for now. */
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
+
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ user_ip6))
+ return true;
+
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ msg_src_ip6))
+ return true;
+
if (!bpf_ctx_narrow_access_ok(off, size, size_default))
return false;
} else {
- if (bpf_ctx_wide_store_ok(off, size,
- struct bpf_sock_addr,
- user_ip6))
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ user_ip6))
return true;
- if (bpf_ctx_wide_store_ok(off, size,
- struct bpf_sock_addr,
- msg_src_ip6))
+ if (bpf_ctx_wide_access_ok(off, size,
+ struct bpf_sock_addr,
+ msg_src_ip6))
return true;
if (size != size_default)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 26da97359d5b..f79e61c570ea 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1124,6 +1124,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
atomic_set(&neigh->probes,
NEIGH_VAR(neigh->parms, UCAST_PROBES));
+ neigh_del_timer(neigh);
neigh->nud_state = NUD_INCOMPLETE;
neigh->updated = now;
next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
@@ -1140,6 +1141,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
}
} else if (neigh->nud_state & NUD_STALE) {
neigh_dbg(2, "neigh %p is delayed\n", neigh);
+ neigh_del_timer(neigh);
neigh->nud_state = NUD_DELAY;
neigh->updated = jiffies;
neigh_add_timer(neigh, jiffies +
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6f1e31f674a3..0338820ee0ec 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -762,7 +762,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt)
printk("%sdev name=%s feat=0x%pNF\n",
level, dev->name, &dev->features);
if (sk)
- printk("%ssk family=%hu type=%hu proto=%hu\n",
+ printk("%ssk family=%hu type=%u proto=%u\n",
level, sk->sk_family, sk->sk_type, sk->sk_protocol);
if (full_pkt && headroom)
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 1d96c9d4a8e9..26363d72d25b 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -216,6 +216,7 @@ static struct sk_buff
if (!skb) {
dev_err_ratelimited(dp->ds->dev,
"Failed to copy stampable skb\n");
+ spin_unlock(&sp->data->meta_lock);
return NULL;
}
sja1105_transfer_meta(skb, meta);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 317339cd7f03..e8bc939b56dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -388,6 +388,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fib_combine_itag(itag, &res);
dev_match = fib_info_nh_uses_dev(res.fi, dev);
+ /* This is not common, loopback packets retain skb_dst so normally they
+ * would not even hit this slow path.
+ */
+ dev_match = dev_match || (res.type == RTN_LOCAL &&
+ dev == net->loopback_dev);
if (dev_match) {
ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7846afacdf0b..776905899ac0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2785,7 +2785,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name, true, true);
+ err = tcp_set_congestion_control(sk, name, true, true,
+ ns_capable(sock_net(sk)->user_ns,
+ CAP_NET_ADMIN));
release_sock(sk);
return err;
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index e1862b64a90f..c445a81d144e 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -333,7 +333,8 @@ out:
* tcp_reinit_congestion_control (if the current congestion control was
* already initialized.
*/
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
+ bool reinit, bool cap_net_admin)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -369,8 +370,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo
} else {
err = -EBUSY;
}
- } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
- ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
+ } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) {
err = -EPERM;
} else if (!try_module_get(ca->owner)) {
err = -EBUSY;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c21862ba9c02..d88821c794fb 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2170,7 +2170,7 @@ start_lookup:
/* Initialize UDP checksum. If exited with zero value (success),
* CHECKSUM_UNNECESSARY means, that no more checks are required.
- * Otherwise, csum completion requires chacksumming packet body,
+ * Otherwise, csum completion requires checksumming packet body,
* including udp header and folding it to skb->csum.
*/
static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 49884f96232b..87f47bc55c5e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1151,8 +1151,24 @@ add:
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_ADD,
rt, extack);
- if (err)
+ if (err) {
+ struct fib6_info *sibling, *next_sibling;
+
+ /* If the route has siblings, then it first
+ * needs to be unlinked from them.
+ */
+ if (!rt->fib6_nsiblings)
+ return err;
+
+ list_for_each_entry_safe(sibling, next_sibling,
+ &rt->fib6_siblings,
+ fib6_siblings)
+ sibling->fib6_nsiblings--;
+ rt->fib6_nsiblings = 0;
+ list_del_init(&rt->fib6_siblings);
+ rt6_multipath_rebalance(next_sibling);
return err;
+ }
}
rcu_assign_pointer(rt->fib6_next, iter);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8b0c33fb19a2..e49fec767a10 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2563,7 +2563,7 @@ static struct dst_entry *rt6_check(struct rt6_info *rt,
{
u32 rt_cookie = 0;
- if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
+ if (!from || !fib6_get_cookie_safe(from, &rt_cookie) ||
rt_cookie != cookie)
return NULL;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 80610899a323..b2ccbc473127 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -900,12 +900,17 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPV6,
0, dst, tiph->saddr, 0, 0,
sock_net_uid(tunnel->net, NULL));
- rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
- if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
- goto tx_error_icmp;
+ rt = dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr);
+ if (!rt) {
+ rt = ip_route_output_flow(tunnel->net, &fl4, NULL);
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error_icmp;
+ }
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, fl4.saddr);
}
+
if (rt->rt_type != RTN_UNICAST) {
ip_rt_put(rt);
dev->stats.tx_carrier_errors++;
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 66c03c7665b2..303c6ee8bdb7 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -156,6 +156,7 @@ struct rds_ib_connection {
/* To control the number of wrs from fastreg */
atomic_t i_fastreg_wrs;
+ atomic_t i_fastreg_inuse_count;
/* interrupt handling */
struct tasklet_struct i_send_tasklet;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index c36d89cd14a1..fddaa09f7b0d 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -40,6 +40,7 @@
#include "rds_single_path.h"
#include "rds.h"
#include "ib.h"
+#include "ib_mr.h"
/*
* Set the selected protocol version
@@ -526,7 +527,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
attr.qp_type = IB_QPT_RC;
attr.send_cq = ic->i_send_cq;
attr.recv_cq = ic->i_recv_cq;
- atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
/*
* XXX this can fail if max_*_wr is too large? Are we supposed
@@ -993,6 +993,11 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_cm_id, err);
}
+ /* kick off "flush_worker" for all pools in order to reap
+ * all FRMR registrations that are still marked "FRMR_IS_INUSE"
+ */
+ rds_ib_flush_mrs();
+
/*
* We want to wait for tx and rx completion to finish
* before we tear down the connection, but we have to be
@@ -1005,6 +1010,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
wait_event(rds_ib_ring_empty_wait,
rds_ib_ring_empty(&ic->i_recv_ring) &&
(atomic_read(&ic->i_signaled_sends) == 0) &&
+ (atomic_read(&ic->i_fastreg_inuse_count) == 0) &&
(atomic_read(&ic->i_fastreg_wrs) == RDS_IB_DEFAULT_FR_WR));
tasklet_kill(&ic->i_send_tasklet);
tasklet_kill(&ic->i_recv_tasklet);
@@ -1132,6 +1138,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
spin_lock_init(&ic->i_ack_lock);
#endif
atomic_set(&ic->i_signaled_sends, 0);
+ atomic_set(&ic->i_fastreg_wrs, RDS_IB_DEFAULT_FR_WR);
/*
* rds_ib_conn_shutdown() waits for these to be emptied so they
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 32ae26ed58a0..06ecf9d2d4bf 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -32,6 +32,24 @@
#include "ib_mr.h"
+static inline void
+rds_transition_frwr_state(struct rds_ib_mr *ibmr,
+ enum rds_ib_fr_state old_state,
+ enum rds_ib_fr_state new_state)
+{
+ if (cmpxchg(&ibmr->u.frmr.fr_state,
+ old_state, new_state) == old_state &&
+ old_state == FRMR_IS_INUSE) {
+ /* enforce order of ibmr->u.frmr.fr_state update
+ * before decrementing i_fastreg_inuse_count
+ */
+ smp_mb__before_atomic();
+ atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
+ if (waitqueue_active(&rds_ib_ring_empty_wait))
+ wake_up(&rds_ib_ring_empty_wait);
+ }
+}
+
static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
int npages)
{
@@ -75,6 +93,8 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
pool->max_items_soft = pool->max_items;
frmr->fr_state = FRMR_IS_FREE;
+ init_waitqueue_head(&frmr->fr_inv_done);
+ init_waitqueue_head(&frmr->fr_reg_done);
return ibmr;
out_no_cigar:
@@ -116,13 +136,19 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
if (unlikely(ret != ibmr->sg_len))
return ret < 0 ? ret : -EINVAL;
+ if (cmpxchg(&frmr->fr_state,
+ FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
+ return -EBUSY;
+
+ atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
+
/* Perform a WR for the fast_reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
* inside the fast_reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
- frmr->fr_state = FRMR_IS_INUSE;
+ frmr->fr_reg = true;
memset(&reg_wr, 0, sizeof(reg_wr));
reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
@@ -138,12 +164,23 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
if (unlikely(ret)) {
/* Failure here can be because of -ENOMEM as well */
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
+
atomic_inc(&ibmr->ic->i_fastreg_wrs);
if (printk_ratelimit())
pr_warn("RDS/IB: %s returned error(%d)\n",
__func__, ret);
+ goto out;
}
+
+ /* Wait for the registration to complete in order to prevent an invalid
+ * access error resulting from a race between the memory region already
+ * being accessed while registration is still pending.
+ */
+ wait_event(frmr->fr_reg_done, !frmr->fr_reg);
+
+out:
+
return ret;
}
@@ -255,12 +292,29 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
if (unlikely(ret)) {
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
frmr->fr_inv = false;
+ /* enforce order of frmr->fr_inv update
+ * before incrementing i_fastreg_wrs
+ */
+ smp_mb__before_atomic();
atomic_inc(&ibmr->ic->i_fastreg_wrs);
pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
goto out;
}
+
+ /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
+ * 1) avoid a silly bouncing between "clean_list" and "drop_list"
+ * triggered by function "rds_ib_reg_frmr" as it is releases frmr
+ * regions whose state is not "FRMR_IS_FREE" right away.
+ * 2) prevents an invalid access error in a race
+ * from a pending "IB_WR_LOCAL_INV" operation
+ * with a teardown ("dma_unmap_sg", "put_page")
+ * and de-registration ("ib_dereg_mr") of the corresponding
+ * memory region.
+ */
+ wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
+
out:
return ret;
}
@@ -271,7 +325,7 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
struct rds_ib_frmr *frmr = &ibmr->u.frmr;
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_IS_STALE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
if (rds_conn_up(ic->conn))
rds_ib_conn_error(ic->conn,
"frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
@@ -283,10 +337,20 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
}
if (frmr->fr_inv) {
- frmr->fr_state = FRMR_IS_FREE;
+ rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
frmr->fr_inv = false;
+ wake_up(&frmr->fr_inv_done);
}
+ if (frmr->fr_reg) {
+ frmr->fr_reg = false;
+ wake_up(&frmr->fr_reg_done);
+ }
+
+ /* enforce order of frmr->{fr_reg,fr_inv} update
+ * before incrementing i_fastreg_wrs
+ */
+ smp_mb__before_atomic();
atomic_inc(&ic->i_fastreg_wrs);
}
@@ -295,14 +359,18 @@ void rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
{
struct rds_ib_mr *ibmr, *next;
struct rds_ib_frmr *frmr;
- int ret = 0;
+ int ret = 0, ret2;
unsigned int freed = *nfreed;
/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
list_for_each_entry(ibmr, list, unmap_list) {
- if (ibmr->sg_dma_len)
- ret |= rds_ib_post_inv(ibmr);
+ if (ibmr->sg_dma_len) {
+ ret2 = rds_ib_post_inv(ibmr);
+ if (ret2 && !ret)
+ ret = ret2;
+ }
}
+
if (ret)
pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 5da12c248431..9045a8c0edff 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -57,6 +57,9 @@ struct rds_ib_frmr {
struct ib_mr *mr;
enum rds_ib_fr_state fr_state;
bool fr_inv;
+ wait_queue_head_t fr_inv_done;
+ bool fr_reg;
+ wait_queue_head_t fr_reg_done;
struct ib_send_wr fr_wr;
unsigned int dma_npages;
unsigned int sg_byte_len;
@@ -97,6 +100,7 @@ struct rds_ib_mr_pool {
struct llist_head free_list; /* unused MRs */
struct llist_head clean_list; /* unused & unmapped MRs */
wait_queue_head_t flush_wait;
+ spinlock_t clean_lock; /* "clean_list" concurrency */
atomic_t free_pinned; /* memory pinned by free MRs */
unsigned long max_items;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 0b347f46b2f4..c8c1e3ae8d84 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -40,9 +40,6 @@
struct workqueue_struct *rds_ib_mr_wq;
-static DEFINE_PER_CPU(unsigned long, clean_list_grace);
-#define CLEAN_LIST_BUSY_BIT 0
-
static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
{
struct rds_ib_device *rds_ibdev;
@@ -195,12 +192,11 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
{
struct rds_ib_mr *ibmr = NULL;
struct llist_node *ret;
- unsigned long *flag;
+ unsigned long flags;
- preempt_disable();
- flag = this_cpu_ptr(&clean_list_grace);
- set_bit(CLEAN_LIST_BUSY_BIT, flag);
+ spin_lock_irqsave(&pool->clean_lock, flags);
ret = llist_del_first(&pool->clean_list);
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
if (ret) {
ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
if (pool->pool_type == RDS_IB_MR_8K_POOL)
@@ -209,23 +205,9 @@ struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
}
- clear_bit(CLEAN_LIST_BUSY_BIT, flag);
- preempt_enable();
return ibmr;
}
-static inline void wait_clean_list_grace(void)
-{
- int cpu;
- unsigned long *flag;
-
- for_each_online_cpu(cpu) {
- flag = &per_cpu(clean_list_grace, cpu);
- while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
- cpu_relax();
- }
-}
-
void rds_ib_sync_mr(void *trans_private, int direction)
{
struct rds_ib_mr *ibmr = trans_private;
@@ -324,8 +306,7 @@ static unsigned int llist_append_to_list(struct llist_head *llist,
* of clusters. Each cluster has linked llist nodes of
* MR_CLUSTER_SIZE mrs that are ready for reuse.
*/
-static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
- struct list_head *list,
+static void list_to_llist_nodes(struct list_head *list,
struct llist_node **nodes_head,
struct llist_node **nodes_tail)
{
@@ -402,8 +383,13 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
*/
dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
- if (free_all)
+ if (free_all) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&pool->clean_lock, flags);
llist_append_to_list(&pool->clean_list, &unmap_list);
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
+ }
free_goal = rds_ib_flush_goal(pool, free_all);
@@ -416,27 +402,20 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
if (!list_empty(&unmap_list)) {
- /* we have to make sure that none of the things we're about
- * to put on the clean list would race with other cpus trying
- * to pull items off. The llist would explode if we managed to
- * remove something from the clean list and then add it back again
- * while another CPU was spinning on that same item in llist_del_first.
- *
- * This is pretty unlikely, but just in case wait for an llist grace period
- * here before adding anything back into the clean list.
- */
- wait_clean_list_grace();
-
- list_to_llist_nodes(pool, &unmap_list, &clean_nodes, &clean_tail);
+ unsigned long flags;
+
+ list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail);
if (ibmr_ret) {
*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
clean_nodes = clean_nodes->next;
}
/* more than one entry in llist nodes */
- if (clean_nodes)
+ if (clean_nodes) {
+ spin_lock_irqsave(&pool->clean_lock, flags);
llist_add_batch(clean_nodes, clean_tail,
&pool->clean_list);
-
+ spin_unlock_irqrestore(&pool->clean_lock, flags);
+ }
}
atomic_sub(unpinned, &pool->free_pinned);
@@ -471,7 +450,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
else
rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
- return ERR_PTR(-EAGAIN);
+ break;
}
/* We do have some empty MRs. Flush them out. */
@@ -485,7 +464,7 @@ struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
return ibmr;
}
- return ibmr;
+ return NULL;
}
static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
@@ -610,6 +589,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
init_llist_head(&pool->free_list);
init_llist_head(&pool->drop_list);
init_llist_head(&pool->clean_list);
+ spin_lock_init(&pool->clean_lock);
mutex_init(&pool->flush_lock);
init_waitqueue_head(&pool->flush_wait);
INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index dd55b9ac3a66..afd2ba157a13 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -942,7 +942,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
- depends on NET_CLS_ACT && NF_CONNTRACK
+ depends on NET_CLS_ACT && NF_CONNTRACK && NF_NAT
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 278014e26aec..d144233423c5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -2152,6 +2152,7 @@ replay:
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
tfilter_put(tp, fh);
+ q->flags &= ~TCQ_F_CAN_BYPASS;
}
errout:
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index e2faf33d282b..d59fbcc745d1 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -596,8 +596,6 @@ static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid)
static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
- /* we cannot bypass queue discipline anymore */
- sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 420bd8411677..68404a9d2ce4 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -824,8 +824,6 @@ static unsigned long sfq_find(struct Qdisc *sch, u32 classid)
static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
u32 classid)
{
- /* we cannot bypass queue discipline anymore */
- sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 388750ddc57a..c39db507ba3f 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -75,7 +75,7 @@ struct taprio_sched {
struct sched_gate_list __rcu *admin_sched;
struct hrtimer advance_timer;
struct list_head taprio_list;
- int txtime_delay;
+ u32 txtime_delay;
};
static ktime_t sched_base_time(const struct sched_gate_list *sched)
@@ -1113,7 +1113,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto unlock;
}
- q->txtime_delay = nla_get_s32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
+ q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]);
}
if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) &&
@@ -1430,7 +1430,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
goto options_error;
if (q->txtime_delay &&
- nla_put_s32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
+ nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
if (oper && dump_schedule(skb, oper))
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index ed39396b9bba..36bd8a6e82df 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2582,8 +2582,7 @@ do_addr_param:
case SCTP_PARAM_STATE_COOKIE:
asoc->peer.cookie_len =
ntohs(param.p->length) - sizeof(struct sctp_paramhdr);
- if (asoc->peer.cookie)
- kfree(asoc->peer.cookie);
+ kfree(asoc->peer.cookie);
asoc->peer.cookie = kmemdup(param.cookie->body, asoc->peer.cookie_len, gfp);
if (!asoc->peer.cookie)
retval = 0;
@@ -2648,8 +2647,7 @@ do_addr_param:
goto fall_through;
/* Save peer's random parameter */
- if (asoc->peer.peer_random)
- kfree(asoc->peer.peer_random);
+ kfree(asoc->peer.peer_random);
asoc->peer.peer_random = kmemdup(param.p,
ntohs(param.p->length), gfp);
if (!asoc->peer.peer_random) {
@@ -2663,8 +2661,7 @@ do_addr_param:
goto fall_through;
/* Save peer's HMAC list */
- if (asoc->peer.peer_hmacs)
- kfree(asoc->peer.peer_hmacs);
+ kfree(asoc->peer.peer_hmacs);
asoc->peer.peer_hmacs = kmemdup(param.p,
ntohs(param.p->length), gfp);
if (!asoc->peer.peer_hmacs) {
@@ -2680,8 +2677,7 @@ do_addr_param:
if (!ep->auth_enable)
goto fall_through;
- if (asoc->peer.peer_chunks)
- kfree(asoc->peer.peer_chunks);
+ kfree(asoc->peer.peer_chunks);
asoc->peer.peer_chunks = kmemdup(param.p,
ntohs(param.p->length), gfp);
if (!asoc->peer.peer_chunks)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 324a1f91b394..3a5be1d7e572 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1807,6 +1807,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
__skb_queue_head_init(&xmitq);
/* Ensure message is well-formed before touching the header */
+ TIPC_SKB_CB(skb)->validated = false;
if (unlikely(!tipc_msg_validate(&skb)))
goto discard;
hdr = buf_msg(skb);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 20c91f02d3d8..83de74ca729a 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -87,21 +87,20 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
struct netdev_bpf bpf;
int err = 0;
+ ASSERT_RTNL();
+
force_zc = flags & XDP_ZEROCOPY;
force_copy = flags & XDP_COPY;
if (force_zc && force_copy)
return -EINVAL;
- rtnl_lock();
- if (xdp_get_umem_from_qid(dev, queue_id)) {
- err = -EBUSY;
- goto out_rtnl_unlock;
- }
+ if (xdp_get_umem_from_qid(dev, queue_id))
+ return -EBUSY;
err = xdp_reg_umem_at_qid(dev, umem, queue_id);
if (err)
- goto out_rtnl_unlock;
+ return err;
umem->dev = dev;
umem->queue_id = queue_id;
@@ -110,7 +109,7 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
if (force_copy)
/* For copy-mode, we are done. */
- goto out_rtnl_unlock;
+ return 0;
if (!dev->netdev_ops->ndo_bpf ||
!dev->netdev_ops->ndo_xsk_async_xmit) {
@@ -125,7 +124,6 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
err = dev->netdev_ops->ndo_bpf(dev, &bpf);
if (err)
goto err_unreg_umem;
- rtnl_unlock();
umem->zc = true;
return 0;
@@ -135,8 +133,6 @@ err_unreg_umem:
err = 0; /* fallback to copy mode */
if (err)
xdp_clear_umem_at_qid(dev, queue_id);
-out_rtnl_unlock:
- rtnl_unlock();
return err;
}
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d4d6f10aa936..59b57d708697 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -240,6 +240,9 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
mutex_lock(&xs->mutex);
+ if (xs->queue_id >= xs->dev->real_num_tx_queues)
+ goto out;
+
while (xskq_peek_desc(xs->tx, &desc)) {
char *buffer;
u64 addr;
@@ -250,12 +253,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- if (xskq_reserve_addr(xs->umem->cq))
- goto out;
-
- if (xs->queue_id >= xs->dev->real_num_tx_queues)
- goto out;
-
len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
@@ -267,7 +264,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
addr = desc.addr;
buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err)) {
+ if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
kfree_skb(skb);
goto out;
}
@@ -433,6 +430,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY))
return -EINVAL;
+ rtnl_lock();
mutex_lock(&xs->mutex);
if (xs->state != XSK_READY) {
err = -EBUSY;
@@ -518,6 +516,7 @@ out_unlock:
xs->state = XSK_BOUND;
out_release:
mutex_unlock(&xs->mutex);
+ rtnl_unlock();
return err;
}