From 1f4c17a03ba7f430d63dba8c8e08ff1e2712581d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Aug 2016 13:36:08 -0400 Subject: SUNRPC: Handle EADDRNOTAVAIL on connection failures If the connect attempt immediately fails with an EADDRNOTAVAIL error, then that means our choice of source port number was bad. This error is expected when we set the SO_REUSEPORT socket option and we have 2 sockets sharing the same source and destination address and port combinations. Signed-off-by: Trond Myklebust Fixes: 402e23b4ed9ed ("SUNRPC: Fix stupid typo in xs_sock_set_reuseport") Cc: stable@vger.kernel.org # v4.0+ --- net/sunrpc/xprtsock.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 111767ab124a..c6b1d48c4319 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2295,6 +2295,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* SYN_SENT! */ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + break; + case -EADDRNOTAVAIL: + /* Source port number is unavailable. Try a new one! */ + transport->srcport = 0; } out: return ret; -- cgit v1.2.3 From ad3331acb1464024d20a184d3358f3cecc373b54 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Aug 2016 13:47:43 -0400 Subject: SUNRPC: Fix up socket autodisconnect Ensure that we don't forget to set up the disconnection timer for the case when a connect request is fulfilled after the RPC request that initiated it has timed out or been interrupted. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8313960cac52..ea244b29138b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -680,6 +680,20 @@ out: spin_unlock_bh(&xprt->transport_lock); } +static bool +xprt_has_timer(const struct rpc_xprt *xprt) +{ + return xprt->idle_timeout != 0; +} + +static void +xprt_schedule_autodisconnect(struct rpc_xprt *xprt) + __must_hold(&xprt->transport_lock) +{ + if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) + mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); +} + static void xprt_init_autodisconnect(unsigned long data) { @@ -688,6 +702,8 @@ xprt_init_autodisconnect(unsigned long data) spin_lock(&xprt->transport_lock); if (!list_empty(&xprt->recv)) goto out_abort; + /* Reset xprt->last_used to avoid connect/autodisconnect cycling */ + xprt->last_used = jiffies; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); @@ -725,6 +741,7 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) goto out; xprt->snd_task =NULL; xprt->ops->release_xprt(xprt, NULL); + xprt_schedule_autodisconnect(xprt); out: spin_unlock_bh(&xprt->transport_lock); wake_up_bit(&xprt->state, XPRT_LOCKED); @@ -888,11 +905,6 @@ static void xprt_timer(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } -static inline int xprt_has_timer(struct rpc_xprt *xprt) -{ - return xprt->idle_timeout != 0; -} - /** * xprt_prepare_transmit - reserve the transport before sending a request * @task: RPC task about to send a request @@ -1280,9 +1292,7 @@ void xprt_release(struct rpc_task *task) if (!list_empty(&req->rq_list)) list_del(&req->rq_list); xprt->last_used = jiffies; - if (list_empty(&xprt->recv) && xprt_has_timer(xprt)) - mod_timer(&xprt->timer, - xprt->last_used + xprt->idle_timeout); + xprt_schedule_autodisconnect(xprt); spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); -- cgit v1.2.3 From 9130b8dbc6ac20f2dc5846e1647f5b60eafab6e3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 3 Aug 2016 20:19:48 -0400 Subject: SUNRPC: allow for upcalls for same uid but different gss service It's possible to have simultaneous upcalls for the same UIDs but different GSS service. In that case, we need to allow for the upcall to gssd to proceed so that not the same context is used by two different GSS services. Some servers lock the use of context to the GSS service. Signed-off-by: Olga Kornievskaia Cc: stable@vger.kernel.org # v3.9+ Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 23c8e7c39656..976c7812bbd5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -340,12 +340,14 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) } static struct gss_upcall_msg * -__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid) +__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; + if (auth && pos->auth->service != auth->service) + continue; atomic_inc(&pos->count); dprintk("RPC: %s found msg %p\n", __func__, pos); return pos; @@ -365,7 +367,7 @@ gss_add_msg(struct gss_upcall_msg *gss_msg) struct gss_upcall_msg *old; spin_lock(&pipe->lock); - old = __gss_find_upcall(pipe, gss_msg->uid); + old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth); if (old == NULL) { atomic_inc(&gss_msg->count); list_add(&gss_msg->list, &pipe->in_downcall); @@ -714,7 +716,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid); + gss_msg = __gss_find_upcall(pipe, uid, NULL); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; -- cgit v1.2.3 From d88e4d82efc7aca47f1e31808717c9718e466d93 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2016 16:24:28 +1000 Subject: SUNRPC: disable the use of IPv6 temporary addresses. If the net.ipv6.conf.*.use_temp_addr sysctl is set to '2', then TCP connections over IPv6 will prefer a 'private' source address. These eventually expire and become invalid, typically after a week, but the time is configurable. When the local address becomes invalid the client will not be able to receive replies from the server. Eventually the connection will timeout or break and a new connection will be established, but this can take half an hour (typically TCP connection break time). RFC 4941, which describes private IPv6 addresses, acknowledges that some applications might not work well with them and that the application may explicitly a request non-temporary (i.e. "public") address. I believe this is correct for SUNRPC clients. Without this change, a client will occasionally experience a long delay if private addresses have been enabled. The privacy offered by private addresses is of little value for an NFS server which requires client authentication. For NFSv3 this will often not be a problem because idle connections are closed after 5 minutes. For NFSv4 connections never go idle due to the period RENEW (or equivalent) request. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c6b1d48c4319..bf79212fdbf8 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2236,6 +2236,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) unsigned int keepcnt = xprt->timeout->to_retries + 1; unsigned int opt_on = 1; unsigned int timeo; + unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC; /* TCP Keepalive options */ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, @@ -2247,6 +2248,16 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keepcnt, sizeof(keepcnt)); + /* Avoid temporary address, they are bad for long-lived + * connections such as NFS mounts. + * RFC4941, section 3.6 suggests that: + * Individual applications, which have specific + * knowledge about the normal duration of connections, + * MAY override this as appropriate. + */ + kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES, + (char *)&addr_pref, sizeof(addr_pref)); + /* TCP user timeout (see RFC5482) */ timeo = jiffies_to_msecs(xprt->timeout->to_initval) * (xprt->timeout->to_retries + 1); -- cgit v1.2.3 From 02910177aede34d6f49e2dc14b1c5c6cd468d94f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Aug 2016 00:00:33 -0400 Subject: SUNRPC: Fix reconnection timeouts When the connect attempt fails and backs off, we should start the clock at the last connection attempt, not time at which we queue up the reconnect job. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf79212fdbf8..04b0c4190dd7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2173,6 +2173,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_unlock_bh(&sk->sk_callback_lock); } xs_udp_do_set_buffer_size(xprt); + + xprt->stat.connect_start = jiffies; } static void xs_udp_setup_socket(struct work_struct *work) @@ -2384,6 +2386,16 @@ out: xprt_wake_pending_tasks(xprt, status); } +static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt) +{ + unsigned long start, now = jiffies; + + start = xprt->stat.connect_start + xprt->reestablish_timeout; + if (time_after(start, now)) + return start - now; + return 0; +} + /** * xs_connect - connect a socket to a remote endpoint * @xprt: pointer to transport structure @@ -2401,6 +2413,7 @@ out: static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + unsigned long delay = 0; WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); @@ -2412,19 +2425,19 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) /* Start by resetting any existing state */ xs_reset_transport(transport); - queue_delayed_work(xprtiod_workqueue, - &transport->connect_worker, - xprt->reestablish_timeout); + delay = xs_reconnect_delay(xprt); + xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; - } else { + } else dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); - queue_delayed_work(xprtiod_workqueue, - &transport->connect_worker, 0); - } + + queue_delayed_work(xprtiod_workqueue, + &transport->connect_worker, + delay); } /** -- cgit v1.2.3 From 3851f1cdb2b8d507b10395fc110d4c37d6121285 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 4 Aug 2016 00:08:45 -0400 Subject: SUNRPC: Limit the reconnect backoff timer to the max RPC message timeout ...and ensure that we propagate it to new transports on the same client. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 3 +++ net/sunrpc/xprtsock.c | 18 ++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cb49898a5a58..faac5472d14d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2638,6 +2638,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; + unsigned long reconnect_timeout; unsigned char resvport; int ret = 0; @@ -2649,6 +2650,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, return -EAGAIN; } resvport = xprt->resvport; + reconnect_timeout = xprt->max_reconnect_timeout; rcu_read_unlock(); xprt = xprt_create_transport(xprtargs); @@ -2657,6 +2659,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, goto out_put_switch; } xprt->resvport = resvport; + xprt->max_reconnect_timeout = reconnect_timeout; rpc_xprt_switch_set_roundrobin(xps); if (setup) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 04b0c4190dd7..8ede3bc52481 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -177,7 +177,6 @@ static struct ctl_table sunrpc_table[] = { * increase over time if the server is down or not responding. */ #define XS_TCP_INIT_REEST_TO (3U * HZ) -#define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) /* * TCP idle timeout; client drops the transport socket if it is idle @@ -2396,6 +2395,15 @@ static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt) return 0; } +static void xs_reconnect_backoff(struct rpc_xprt *xprt) +{ + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > xprt->max_reconnect_timeout) + xprt->reestablish_timeout = xprt->max_reconnect_timeout; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; +} + /** * xs_connect - connect a socket to a remote endpoint * @xprt: pointer to transport structure @@ -2426,12 +2434,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) xs_reset_transport(transport); delay = xs_reconnect_delay(xprt); + xs_reconnect_backoff(xprt); - xprt->reestablish_timeout <<= 1; - if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) - xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; - if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) - xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; } else dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); @@ -2989,6 +2993,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->ops = &xs_tcp_ops; xprt->timeout = &xs_tcp_default_timeout; + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; + INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn); INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket); -- cgit v1.2.3 From 8d480326c3d6921ff5f1cc988c993bd572248deb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 5 Aug 2016 19:03:31 -0400 Subject: NFSv4: Cap the transport reconnection timer at 1/2 lease period We don't want to miss a lease period renewal due to the TCP connection failing to reconnect in a timely fashion. To ensure this doesn't happen, cap the reconnection timer so that we retry the connection attempt at least every 1/2 lease period. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index faac5472d14d..7f79fb7dc6a0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2676,6 +2676,27 @@ out_put_switch: } EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt); +static int +rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt, + struct rpc_xprt *xprt, + void *data) +{ + unsigned long timeout = *((unsigned long *)data); + + if (timeout < xprt->max_reconnect_timeout) + xprt->max_reconnect_timeout = timeout; + return 0; +} + +void +rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo) +{ + rpc_clnt_iterate_for_each_xprt(clnt, + rpc_xprt_cap_max_reconnect_timeout, + &timeo); +} +EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout); + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) static void rpc_show_header(void) { -- cgit v1.2.3