diff options
author | David S. Miller <davem@davemloft.net> | 2017-06-16 12:45:16 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-06-16 12:45:16 -0400 |
commit | 34a95132e0e44bd2e0235ee90f8c3eafe90f269d (patch) | |
tree | 65fc470f7f2ab3c73c3d3bfd46caa3473098e68e /net | |
parent | 5f886eefbb2d4b207c43b96bbe951545be4f462d (diff) | |
parent | 10beea7d7408d0b1c9208757f445c5c710239e0e (diff) |
Merge branch 'rds-tcp-misc-bug-fixes'
Sowmini Varadhan says:
====================
rds: tcp: misc bug fixes
This series contains 2 bug fixes (patch2, patch3) and one bit of
code cleanup (patch1) identified during database testing
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/rds/connection.c | 5 | ||||
-rw-r--r-- | net/rds/rds.h | 4 | ||||
-rw-r--r-- | net/rds/recv.c | 12 | ||||
-rw-r--r-- | net/rds/send.c | 11 | ||||
-rw-r--r-- | net/rds/tcp_connect.c | 3 | ||||
-rw-r--r-- | net/rds/tcp_listen.c | 40 | ||||
-rw-r--r-- | net/rds/threads.c | 5 |
7 files changed, 43 insertions, 37 deletions
diff --git a/net/rds/connection.c b/net/rds/connection.c index 6a5ebdea7d2e..382443b060cb 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -124,11 +124,6 @@ static void __rds_conn_path_init(struct rds_connection *conn, cp->cp_conn = conn; atomic_set(&cp->cp_state, RDS_CONN_DOWN); cp->cp_send_gen = 0; - /* cp_outgoing is per-path. So we can only set it here - * for the single-path transports. - */ - if (!conn->c_trans->t_mp_capable) - cp->cp_outgoing = (is_outgoing ? 1 : 0); cp->cp_reconnect_jiffies = 0; INIT_DELAYED_WORK(&cp->cp_send_w, rds_send_worker); INIT_DELAYED_WORK(&cp->cp_recv_w, rds_recv_worker); diff --git a/net/rds/rds.h b/net/rds/rds.h index 82d38ccf5e8b..d6a04a05eb79 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -92,6 +92,8 @@ enum { #define RDS_MPATH_HASH(rs, n) (jhash_1word((rs)->rs_bound_port, \ (rs)->rs_hash_initval) & ((n) - 1)) +#define IS_CANONICAL(laddr, faddr) (htonl(laddr) < htonl(faddr)) + /* Per mpath connection state */ struct rds_conn_path { struct rds_connection *cp_conn; @@ -125,8 +127,6 @@ struct rds_conn_path { unsigned int cp_unacked_packets; unsigned int cp_unacked_bytes; - unsigned int cp_outgoing:1, - cp_pad_to_32:31; unsigned int cp_index; }; diff --git a/net/rds/recv.c b/net/rds/recv.c index c70c32cb05f5..49493dbc43a1 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -215,10 +215,10 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, switch (type) { case RDS_EXTHDR_NPATHS: conn->c_npaths = min_t(int, RDS_MPATH_WORKERS, - buffer.rds_npaths); + be16_to_cpu(buffer.rds_npaths)); break; case RDS_EXTHDR_GEN_NUM: - new_peer_gen_num = buffer.rds_gen_num; + new_peer_gen_num = be32_to_cpu(buffer.rds_gen_num); break; default: pr_warn_ratelimited("ignoring unknown exthdr type " @@ -254,7 +254,8 @@ static void rds_start_mprds(struct rds_connection *conn) int i; struct rds_conn_path *cp; - if (conn->c_npaths > 1 && conn->c_laddr < conn->c_faddr) { + if (conn->c_npaths > 1 && + IS_CANONICAL(conn->c_laddr, conn->c_faddr)) { for (i = 1; i < conn->c_npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_connect_if_down(cp); @@ -339,14 +340,15 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, rds_stats_inc(s_recv_ping); rds_send_pong(cp, inc->i_hdr.h_sport); /* if this is a handshake ping, start multipath if necessary */ - if (RDS_HS_PROBE(inc->i_hdr.h_sport, inc->i_hdr.h_dport)) { + if (RDS_HS_PROBE(be16_to_cpu(inc->i_hdr.h_sport), + be16_to_cpu(inc->i_hdr.h_dport))) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); rds_start_mprds(cp->cp_conn); } goto out; } - if (inc->i_hdr.h_dport == RDS_FLAG_PROBE_PORT && + if (be16_to_cpu(inc->i_hdr.h_dport) == RDS_FLAG_PROBE_PORT && inc->i_hdr.h_sport == 0) { rds_recv_hs_exthdrs(&inc->i_hdr, cp->cp_conn); /* if this is a handshake pong, start multipath if necessary */ diff --git a/net/rds/send.c b/net/rds/send.c index 5cc64039caf7..3652a50397c7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1246,15 +1246,17 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport, rm->m_inc.i_hdr.h_flags |= h_flags; cp->cp_next_tx_seq++; - if (RDS_HS_PROBE(sport, dport) && cp->cp_conn->c_trans->t_mp_capable) { - u16 npaths = RDS_MPATH_WORKERS; + if (RDS_HS_PROBE(be16_to_cpu(sport), be16_to_cpu(dport)) && + cp->cp_conn->c_trans->t_mp_capable) { + u16 npaths = cpu_to_be16(RDS_MPATH_WORKERS); + u32 my_gen_num = cpu_to_be32(cp->cp_conn->c_my_gen_num); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_NPATHS, &npaths, sizeof(npaths)); rds_message_add_extension(&rm->m_inc.i_hdr, RDS_EXTHDR_GEN_NUM, - &cp->cp_conn->c_my_gen_num, + &my_gen_num, sizeof(u32)); } spin_unlock_irqrestore(&cp->cp_lock, flags); @@ -1293,5 +1295,6 @@ rds_send_ping(struct rds_connection *conn) } conn->c_ping_triggered = 1; spin_unlock_irqrestore(&cp->cp_lock, flags); - rds_send_probe(&conn->c_path[0], RDS_FLAG_PROBE_PORT, 0, 0); + rds_send_probe(&conn->c_path[0], cpu_to_be16(RDS_FLAG_PROBE_PORT), + 0, 0); } diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index d6839d96d539..5a62a083bb5a 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -66,7 +66,7 @@ void rds_tcp_state_change(struct sock *sk) * RDS connection as RDS_CONN_UP until the reconnect, * to avoid RDS datagram loss. */ - if (cp->cp_conn->c_laddr > cp->cp_conn->c_faddr && + if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { rds_conn_path_drop(cp); @@ -135,7 +135,6 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp) ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest), O_NONBLOCK); - cp->cp_outgoing = 1; rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret); if (ret == -EINPROGRESS) ret = 0; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 507678853e6c..df291ac245d6 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -83,7 +83,7 @@ static struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) { int i; - bool peer_is_smaller = (conn->c_faddr < conn->c_laddr); + bool peer_is_smaller = IS_CANONICAL(conn->c_faddr, conn->c_laddr); int npaths = max_t(int, 1, conn->c_npaths); /* for mprds, all paths MUST be initiated by the peer @@ -112,6 +112,17 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) return NULL; } +static void rds_tcp_set_linger(struct socket *sock) +{ + struct linger no_linger = { + .l_onoff = 1, + .l_linger = 0, + }; + + kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, + (char *)&no_linger, sizeof(no_linger)); +} + int rds_tcp_accept_one(struct socket *sock) { struct socket *new_sock = NULL; @@ -171,21 +182,10 @@ int rds_tcp_accept_one(struct socket *sock) if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_ERROR) goto rst_nsk; if (rs_tcp->t_sock) { - /* Need to resolve a duelling SYN between peers. - * We have an outstanding SYN to this peer, which may - * potentially have transitioned to the RDS_CONN_UP state, - * so we must quiesce any send threads before resetting - * c_transport_data. - */ - if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr) || - !cp->cp_outgoing) { - goto rst_nsk; - } else { - rds_tcp_reset_callbacks(new_sock, cp); - cp->cp_outgoing = 0; - /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(cp, RDS_CONN_RESETTING); - } + /* Duelling SYN has been handled in rds_tcp_accept_one() */ + rds_tcp_reset_callbacks(new_sock, cp); + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(cp, RDS_CONN_RESETTING); } else { rds_tcp_set_callbacks(new_sock, cp); rds_connect_path_complete(cp, RDS_CONN_CONNECTING); @@ -194,7 +194,13 @@ int rds_tcp_accept_one(struct socket *sock) ret = 0; goto out; rst_nsk: - /* reset the newly returned accept sock and bail */ + /* reset the newly returned accept sock and bail. + * It is safe to set linger on new_sock because the RDS connection + * has not been brought up on new_sock, so no RDS-level data could + * be pending on it. By setting linger, we achieve the side-effect + * of avoiding TIME_WAIT state on new_sock. + */ + rds_tcp_set_linger(new_sock); kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: diff --git a/net/rds/threads.c b/net/rds/threads.c index 3e447d056d09..2852bc1d37d4 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -127,7 +127,7 @@ void rds_queue_reconnect(struct rds_conn_path *cp) /* let peer with smaller addr initiate reconnect, to avoid duels */ if (conn->c_trans->t_type == RDS_TRANS_TCP && - conn->c_laddr > conn->c_faddr) + !IS_CANONICAL(conn->c_laddr, conn->c_faddr)) return; set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); @@ -156,7 +156,8 @@ void rds_connect_worker(struct work_struct *work) struct rds_connection *conn = cp->cp_conn; int ret; - if (cp->cp_index > 0 && cp->cp_conn->c_laddr > cp->cp_conn->c_faddr) + if (cp->cp_index > 0 && + !IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr)) return; clear_bit(RDS_RECONNECT_PENDING, &cp->cp_flags); ret = rds_conn_path_transition(cp, RDS_CONN_DOWN, RDS_CONN_CONNECTING); |