From f0ec4f1d32ad49a23b93156949208dd9348e3590 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:37 +0200 Subject: net/smc: save state of last sent CDC message When a link goes down and all connections of this link need to be switched to an other link then the producer cursor and the sequence of the last successfully sent CDC message must be known. Add the two fields to the SMC connection and update it in the tx completion handler. And to allow matching of sequences in error cases reset the seqno to the old value in smc_cdc_msg_send() when the actual send failed. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc.h | 4 ++++ net/smc/smc_cdc.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/net/smc/smc.h b/net/smc/smc.h index 1a084afa7372..1e9113771600 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -143,6 +143,9 @@ struct smc_connection { * .prod cf. TCP snd_nxt * .cons cf. TCP sends ack */ + union smc_host_cursor local_tx_ctrl_fin; + /* prod crsr - confirmed by peer + */ union smc_host_cursor tx_curs_prep; /* tx - prepared data * snd_max..wmem_alloc */ @@ -154,6 +157,7 @@ struct smc_connection { */ atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ + u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index f64589d823aa..c5e33296e55c 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -47,6 +47,9 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, /* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */ smp_mb__after_atomic(); smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn); + smc_curs_copy(&conn->local_tx_ctrl_fin, &cdcpend->p_cursor, + conn); + conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); @@ -104,6 +107,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0; + } else { + conn->tx_cdc_seq--; + conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; } return rc; -- cgit v1.2.3 From c6f02ebeea3a0ff4bddddf0fd82303190ebb3dd1 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:38 +0200 Subject: net/smc: switch connections to alternate link Add smc_switch_conns() to switch all connections from a link that is going down. Find an other link to switch the connections to, and switch each connection to the new link. smc_switch_cursor() updates the cursors of a connection to the state of the last successfully sent CDC message. When there is no link to switch to, terminate the link group. Call smc_switch_conns() when a link is going down. And with the possibility that links of connections can switch adapt CDC and TX functions to detect and handle link switches. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 18 +++++++- net/smc/smc_cdc.h | 1 + net/smc/smc_core.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++- net/smc/smc_core.h | 2 + net/smc/smc_llc.c | 6 +-- net/smc/smc_tx.c | 12 ++++- 6 files changed, 162 insertions(+), 9 deletions(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index c5e33296e55c..3ca986066f32 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -56,11 +56,11 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, } int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_link *link, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend) { - struct smc_link *link = conn->lnk; int rc; rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, @@ -119,13 +119,27 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) { struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; + struct smc_link *link; + bool again = false; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend); +again: + link = conn->lnk; + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); if (rc) return rc; spin_lock_bh(&conn->send_lock); + if (link != conn->lnk) { + /* link of connection changed, try again one time*/ + spin_unlock_bh(&conn->send_lock); + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); + if (again) + return -ENOLINK; + again = true; + goto again; + } rc = smc_cdc_msg_send(conn, wr_buf, pend); spin_unlock_bh(&conn->send_lock); return rc; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 861dc24c588c..42246b4bdcc9 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -304,6 +304,7 @@ struct smc_cdc_tx_pend { }; int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_link *link, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 32a6cadc5c1f..21bc1ec07e99 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -432,6 +432,135 @@ out: return rc; } +static int smc_write_space(struct smc_connection *conn) +{ + int buffer_len = conn->peer_rmbe_size; + union smc_host_cursor prod; + union smc_host_cursor cons; + int space; + + smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn); + smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); + /* determine rx_buf space */ + space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod); + return space; +} + +static int smc_switch_cursor(struct smc_sock *smc) +{ + struct smc_connection *conn = &smc->conn; + union smc_host_cursor cons, fin; + int rc = 0; + int diff; + + smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn); + smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn); + /* set prod cursor to old state, enforce tx_rdma_writes() */ + smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn); + smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn); + + if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) { + /* cons cursor advanced more than fin, and prod was set + * fin above, so now prod is smaller than cons. Fix that. + */ + diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons); + smc_curs_add(conn->sndbuf_desc->len, + &conn->tx_curs_sent, diff); + smc_curs_add(conn->sndbuf_desc->len, + &conn->tx_curs_fin, diff); + + smp_mb__before_atomic(); + atomic_add(diff, &conn->sndbuf_space); + smp_mb__after_atomic(); + + smc_curs_add(conn->peer_rmbe_size, + &conn->local_tx_ctrl.prod, diff); + smc_curs_add(conn->peer_rmbe_size, + &conn->local_tx_ctrl_fin, diff); + } + /* recalculate, value is used by tx_rdma_writes() */ + atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn)); + + if (smc->sk.sk_state != SMC_INIT && + smc->sk.sk_state != SMC_CLOSED) { + /* tbd: call rc = smc_cdc_get_slot_and_msg_send(conn); */ + if (!rc) { + schedule_delayed_work(&conn->tx_work, 0); + smc->sk.sk_data_ready(&smc->sk); + } + } + return rc; +} + +struct smc_link *smc_switch_conns(struct smc_link_group *lgr, + struct smc_link *from_lnk, bool is_dev_err) +{ + struct smc_link *to_lnk = NULL; + struct smc_connection *conn; + struct smc_sock *smc; + struct rb_node *node; + int i, rc = 0; + + /* link is inactive, wake up tx waiters */ + smc_wr_wakeup_tx_wait(from_lnk); + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_ACTIVE || + i == from_lnk->link_idx) + continue; + if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && + from_lnk->ibport == lgr->lnk[i].ibport) { + continue; + } + to_lnk = &lgr->lnk[i]; + break; + } + if (!to_lnk) { + smc_lgr_terminate_sched(lgr); + return NULL; + } +again: + read_lock_bh(&lgr->conns_lock); + for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { + conn = rb_entry(node, struct smc_connection, alert_node); + if (conn->lnk != from_lnk) + continue; + smc = container_of(conn, struct smc_sock, conn); + /* conn->lnk not yet set in SMC_INIT state */ + if (smc->sk.sk_state == SMC_INIT) + continue; + if (smc->sk.sk_state == SMC_CLOSED || + smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || + smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || + smc->sk.sk_state == SMC_APPFINCLOSEWAIT || + smc->sk.sk_state == SMC_APPCLOSEWAIT1 || + smc->sk.sk_state == SMC_APPCLOSEWAIT2 || + smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || + smc->sk.sk_state == SMC_PEERABORTWAIT || + smc->sk.sk_state == SMC_PROCESSABORT) { + spin_lock_bh(&conn->send_lock); + conn->lnk = to_lnk; + spin_unlock_bh(&conn->send_lock); + continue; + } + sock_hold(&smc->sk); + read_unlock_bh(&lgr->conns_lock); + /* avoid race with smcr_tx_sndbuf_nonempty() */ + spin_lock_bh(&conn->send_lock); + conn->lnk = to_lnk; + rc = smc_switch_cursor(smc); + spin_unlock_bh(&conn->send_lock); + sock_put(&smc->sk); + if (rc) { + smcr_link_down_cond_sched(to_lnk); + return NULL; + } + goto again; + } + read_unlock_bh(&lgr->conns_lock); + return to_lnk; +} + static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, struct smc_link_group *lgr) { @@ -943,8 +1072,7 @@ static void smcr_link_down(struct smc_link *lnk) return; smc_ib_modify_qp_reset(lnk); - to_lnk = NULL; - /* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */ + to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk); return; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 7fe53feb9dc4..584f11230c4f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -380,6 +380,8 @@ void smcr_link_clear(struct smc_link *lnk); int smcr_buf_map_lgr(struct smc_link *lnk); int smcr_buf_reg_lgr(struct smc_link *lnk); int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); +struct smc_link *smc_switch_conns(struct smc_link_group *lgr, + struct smc_link *from_lnk, bool is_dev_err); void smcr_link_down_cond(struct smc_link *lnk); void smcr_link_down_cond_sched(struct smc_link *lnk); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 7675ccd6f3c3..8d2368accbad 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -933,7 +933,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr) return; /* no asymmetric link */ if (!smc_link_downing(&lnk_asym->state)) return; - /* tbd: lnk_new = smc_switch_conns(lgr, lnk_asym, false); */ + lnk_new = smc_switch_conns(lgr, lnk_asym, false); smc_wr_tx_wait_no_pending_sends(lnk_asym); if (!lnk_new) goto out_free; @@ -1195,7 +1195,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) smc_llc_send_message(lnk, &qentry->msg); /* response */ if (smc_link_downing(&lnk_del->state)) { - /* tbd: call smc_switch_conns(lgr, lnk_del, false); */ + smc_switch_conns(lgr, lnk_del, false); smc_wr_tx_wait_no_pending_sends(lnk_del); } smcr_link_clear(lnk_del); @@ -1245,7 +1245,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) goto out; /* asymmetric link already deleted */ if (smc_link_downing(&lnk_del->state)) { - /* tbd: call smc_switch_conns(lgr, lnk_del, false); */ + smc_switch_conns(lgr, lnk_del, false); smc_wr_tx_wait_no_pending_sends(lnk_del); } if (!list_empty(&lgr->list)) { diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 417204572a69..54ba0443847e 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -482,12 +482,13 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; + struct smc_link *link = conn->lnk; struct smc_rdma_wr *wr_rdma_buf; struct smc_cdc_tx_pend *pend; struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend); + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = @@ -505,10 +506,17 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) } spin_lock_bh(&conn->send_lock); + if (link != conn->lnk) { + /* link of connection changed, tx_work will restart */ + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); + rc = -ENOLINK; + goto out_unlock; + } if (!pflags->urg_data_present) { rc = smc_tx_rdma_writes(conn, wr_rdma_buf); if (rc) { - smc_wr_tx_put_slot(conn->lnk, + smc_wr_tx_put_slot(link, (struct smc_wr_tx_pend_priv *)pend); goto out_unlock; } -- cgit v1.2.3 From 29bd73dba4f72970895a2459f7190d388f5204f7 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:39 +0200 Subject: net/smc: send failover validation message When a connection is switched to a new link then a link validation message must be sent to the peer over the new link, containing the sequence number of the last CDC message that was sent over the old link. The peer will validate if this sequence number is the same or lower then the number he received, and abort the connection if messages were lost. Add smcr_cdc_msg_send_validation() to send the message validation message and call it when a connection was switched in smc_switch_cursor(). Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_cdc.c | 25 +++++++++++++++++++++++++ net/smc/smc_cdc.h | 1 + net/smc/smc_core.c | 2 +- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 3ca986066f32..e6b7eef71831 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -115,6 +115,31 @@ int smc_cdc_msg_send(struct smc_connection *conn, return rc; } +/* send a validation msg indicating the move of a conn to an other QP link */ +int smcr_cdc_msg_send_validation(struct smc_connection *conn) +{ + struct smc_host_cdc_msg *local = &conn->local_tx_ctrl; + struct smc_link *link = conn->lnk; + struct smc_cdc_tx_pend *pend; + struct smc_wr_buf *wr_buf; + struct smc_cdc_msg *peer; + int rc; + + rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); + if (rc) + return rc; + + peer = (struct smc_cdc_msg *)wr_buf; + peer->common.type = local->common.type; + peer->len = local->len; + peer->seqno = htons(conn->tx_cdc_seq_fin); /* seqno last compl. tx */ + peer->token = htonl(local->token); + peer->prod_flags.failover_validation = 1; + + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + return rc; +} + static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) { struct smc_cdc_tx_pend *pend; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 42246b4bdcc9..9cfabc9af120 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -313,6 +313,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); int smcd_cdc_msg_send(struct smc_connection *conn); +int smcr_cdc_msg_send_validation(struct smc_connection *conn); int smc_cdc_init(void) __init; void smcd_cdc_rx_init(struct smc_connection *conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 21bc1ec07e99..a558ce0bde97 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -483,7 +483,7 @@ static int smc_switch_cursor(struct smc_sock *smc) if (smc->sk.sk_state != SMC_INIT && smc->sk.sk_state != SMC_CLOSED) { - /* tbd: call rc = smc_cdc_get_slot_and_msg_send(conn); */ + rc = smcr_cdc_msg_send_validation(conn); if (!rc) { schedule_delayed_work(&conn->tx_work, 0); smc->sk.sk_data_ready(&smc->sk); -- cgit v1.2.3 From b286a0651e4404ab96cdfdcdad8a839a26b3751e Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:40 +0200 Subject: net/smc: handle incoming CDC validation message Call smc_cdc_msg_validate() when a CDC message with the failover validation bit enabled was received. Validate that the sequence number sent with the message is one we already have received. If not, messages were lost and the connection is terminated using a new abort_work. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc.h | 2 ++ net/smc/smc_cdc.c | 37 +++++++++++++++++++++++++++++++------ net/smc/smc_core.c | 15 +++++++++++++++ 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index 1e9113771600..6f1c42da7a4c 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -188,12 +188,14 @@ struct smc_connection { spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ + struct work_struct abort_work; /* abort the connection */ struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ u8 rx_off; /* receive offset: * 0 for SMC-R, 32 for SMC-D */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ + u8 out_of_sync : 1; /* out of sync with peer */ }; struct smc_sock { /* smc sock container */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index e6b7eef71831..b2b85e1be72c 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -282,6 +282,28 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc, sk_send_sigurg(&smc->sk); } +static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc, + struct smc_link *link) +{ + struct smc_connection *conn = &smc->conn; + u16 recv_seq = ntohs(cdc->seqno); + s16 diff; + + /* check that seqnum was seen before */ + diff = conn->local_rx_ctrl.seqno - recv_seq; + if (diff < 0) { /* diff larger than 0x7fff */ + /* drop connection */ + conn->out_of_sync = 1; /* prevent any further receives */ + spin_lock_bh(&conn->send_lock); + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + conn->lnk = link; + spin_unlock_bh(&conn->send_lock); + sock_hold(&smc->sk); /* sock_put in abort_work */ + if (!schedule_work(&conn->abort_work)) + sock_put(&smc->sk); + } +} + static void smc_cdc_msg_recv_action(struct smc_sock *smc, struct smc_cdc_msg *cdc) { @@ -412,16 +434,19 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf) read_lock_bh(&lgr->conns_lock); conn = smc_lgr_find_conn(ntohl(cdc->token), lgr); read_unlock_bh(&lgr->conns_lock); - if (!conn) + if (!conn || conn->out_of_sync) return; smc = container_of(conn, struct smc_sock, conn); - if (!cdc->prod_flags.failover_validation) { - if (smc_cdc_before(ntohs(cdc->seqno), - conn->local_rx_ctrl.seqno)) - /* received seqno is old */ - return; + if (cdc->prod_flags.failover_validation) { + smc_cdc_msg_validate(smc, cdc, link); + return; } + if (smc_cdc_before(ntohs(cdc->seqno), + conn->local_rx_ctrl.seqno)) + /* received seqno is old */ + return; + smc_cdc_msg_recv(smc, cdc); } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index a558ce0bde97..b5633fa19b6d 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -615,6 +615,8 @@ void smc_conn_free(struct smc_connection *conn) tasklet_kill(&conn->rx_tsklet); } else { smc_cdc_tx_dismiss_slots(conn); + if (current_work() != &conn->abort_work) + cancel_work_sync(&conn->abort_work); } if (!list_empty(&lgr->list)) { smc_lgr_unregister_conn(conn); @@ -996,6 +998,18 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) } } +/* abort connection, abort_work scheduled from tasklet context */ +static void smc_conn_abort_work(struct work_struct *work) +{ + struct smc_connection *conn = container_of(work, + struct smc_connection, + abort_work); + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + + smc_conn_kill(conn, true); + sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ +} + /* link is up - establish alternate link if applicable */ static void smcr_link_up(struct smc_link_group *lgr, struct smc_ib_device *smcibdev, u8 ibport) @@ -1302,6 +1316,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); smcd_cdc_rx_init(conn); /* init tasklet for this conn */ -- cgit v1.2.3 From 09c61d24f96dfef7791debfcaf96efe067ab2ba8 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:41 +0200 Subject: net/smc: wait for departure of an IB message Introduce smc_wr_tx_send_wait() to send an IB message and wait for the tx completion event of the message. This makes sure that the message is no longer in-flight when the function returns. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 1 + net/smc/smc_wr.c | 39 +++++++++++++++++++++++++++++++++++++++ net/smc/smc_wr.h | 2 ++ 3 files changed, 42 insertions(+) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 584f11230c4f..86eebbadc8f6 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -85,6 +85,7 @@ struct smc_link { struct smc_rdma_sges *wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/ struct smc_rdma_wr *wr_tx_rdmas; /* WR RDMA WRITE */ struct smc_wr_tx_pend *wr_tx_pends; /* WR send waiting for CQE */ + struct completion *wr_tx_compl; /* WR send CQE completion */ /* above four vectors have wr_tx_cnt elements and use the same index */ dma_addr_t wr_tx_dma_addr; /* DMA address of wr_tx_bufs */ atomic_long_t wr_tx_id; /* seq # of last sent WR */ diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 3fd27bea4f7a..7239ba9b99dc 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -44,6 +44,7 @@ struct smc_wr_tx_pend { /* control data for a pending send request */ struct smc_link *link; u32 idx; struct smc_wr_tx_pend_priv priv; + u8 compl_requested; }; /******************************** send queue *********************************/ @@ -103,6 +104,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) if (pnd_snd_idx == link->wr_tx_cnt) return; link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status; + if (link->wr_tx_pends[pnd_snd_idx].compl_requested) + complete(&link->wr_tx_compl[pnd_snd_idx]); memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd)); /* clear the full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[pnd_snd_idx], 0, @@ -275,6 +278,33 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +/* Send prepared WR slot via ib_post_send and wait for send completion + * notification. + * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer + */ +int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + unsigned long timeout) +{ + struct smc_wr_tx_pend *pend; + int rc; + + pend = container_of(priv, struct smc_wr_tx_pend, priv); + pend->compl_requested = 1; + init_completion(&link->wr_tx_compl[pend->idx]); + + rc = smc_wr_tx_send(link, priv); + if (rc) + return rc; + /* wait for completion by smc_wr_tx_process_cqe() */ + rc = wait_for_completion_interruptible_timeout( + &link->wr_tx_compl[pend->idx], timeout); + if (rc <= 0) + rc = -ENODATA; + if (rc > 0) + rc = 0; + return rc; +} + /* Register a memory region and wait for result. */ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) { @@ -555,6 +585,8 @@ void smc_wr_free_link(struct smc_link *lnk) void smc_wr_free_link_mem(struct smc_link *lnk) { + kfree(lnk->wr_tx_compl); + lnk->wr_tx_compl = NULL; kfree(lnk->wr_tx_pends); lnk->wr_tx_pends = NULL; kfree(lnk->wr_tx_mask); @@ -625,8 +657,15 @@ int smc_wr_alloc_link_mem(struct smc_link *link) GFP_KERNEL); if (!link->wr_tx_pends) goto no_mem_wr_tx_mask; + link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT, + sizeof(link->wr_tx_compl[0]), + GFP_KERNEL); + if (!link->wr_tx_compl) + goto no_mem_wr_tx_pends; return 0; +no_mem_wr_tx_pends: + kfree(link->wr_tx_pends); no_mem_wr_tx_mask: kfree(link->wr_tx_mask); no_mem_wr_rx_sges: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index f7eaeb3391f3..423b8709f1c9 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -101,6 +101,8 @@ int smc_wr_tx_put_slot(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *wr_pend_priv); +int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, + unsigned long timeout); void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context); void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, -- cgit v1.2.3 From f3811fd7bc97587b142fed9edf8c726694220cb2 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:42 +0200 Subject: net/smc: send DELETE_LINK, ALL message and wait for send to complete Add smc_llc_send_message_wait() which uses smc_wr_tx_send_wait() to send an LLC message and waits for the message send to complete. smc_llc_send_link_delete_all() calls the new function to send an DELETE_LINK,ALL LLC message. The RFC states that the sender of this type of message needs to wait for the completion event of the message transmission and can terminate the link afterwards. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 5 +++++ net/smc/smc_llc.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_llc.h | 2 ++ 3 files changed, 51 insertions(+) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index b5633fa19b6d..8f630b76c5a4 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -238,6 +238,9 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(lgr_lock); cancel_delayed_work(&lgr->free_work); + if (!lgr->is_smcd && !lgr->terminating) + smc_llc_send_link_delete_all(lgr, true, + SMC_LLC_DEL_PROG_INIT_TERM); if (lgr->is_smcd && !lgr->terminating) smc_ism_signal_shutdown(lgr); if (!lgr->is_smcd) { @@ -847,6 +850,8 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); } else { + smc_llc_send_link_delete_all(lgr, false, + SMC_LLC_DEL_OP_INIT_TERM); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 8d2368accbad..0ea7ad6188ae 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -560,6 +560,25 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) return smc_wr_tx_send(link, pend); } +/* schedule an llc send on link, may wait for buffers, + * and wait for send completion notification. + * @return 0 on success + */ +static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf) +{ + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + if (!smc_link_usable(link)) + return -ENOLINK; + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); + return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); +} + /********************************* receive ***********************************/ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr, @@ -1215,6 +1234,29 @@ out: kfree(qentry); } +/* try to send a DELETE LINK ALL request on any active link, + * waiting for send completion + */ +void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) +{ + struct smc_llc_msg_del_link delllc = {0}; + int i; + + delllc.hd.common.type = SMC_LLC_DELETE_LINK; + delllc.hd.length = sizeof(delllc); + if (ord) + delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY; + delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL; + delllc.reason = htonl(rsn); + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (!smc_link_usable(&lgr->lnk[i])) + continue; + if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) + break; + } +} + static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) { struct smc_llc_msg_del_link *del_llc; @@ -1230,6 +1272,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) { /* delete entire lgr */ + smc_llc_send_link_delete_all(lgr, true, ntohl( + qentry->msg.delete_link.reason)); smc_lgr_terminate_sched(lgr); goto out; } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index c335fc5f363c..6d2a5d943b83 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -89,6 +89,8 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, int time_out, u8 exp_msg); struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow); void smc_llc_flow_qentry_del(struct smc_llc_flow *flow); +void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, + u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); int smc_llc_srv_add_link(struct smc_link *link); void smc_llc_srv_add_link_local(struct smc_link *link); -- cgit v1.2.3 From 56bc3b2094b428d808dd1704fdb3086c66bcb310 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:43 +0200 Subject: net/smc: assign link to a new connection For new connections, assign a link from the link group, using some simple load balancing. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 65 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 8f630b76c5a4..9c19b9aa3719 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -121,16 +121,59 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn) rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); } +/* assign an SMC-R link to the connection */ +static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) +{ + enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : + SMC_LNK_ACTIVE; + int i, j; + + /* do link balancing */ + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &conn->lgr->lnk[i]; + + if (lnk->state != expected) + continue; + if (conn->lgr->role == SMC_CLNT) { + conn->lnk = lnk; /* temporary, SMC server assigns link*/ + break; + } + if (conn->lgr->conns_num % 2) { + for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { + struct smc_link *lnk2; + + lnk2 = &conn->lgr->lnk[j]; + if (lnk2->state == expected) { + conn->lnk = lnk2; + break; + } + } + } + if (!conn->lnk) + conn->lnk = lnk; + break; + } + if (!conn->lnk) + return SMC_CLC_DECL_NOACTLINK; + return 0; +} + /* Register connection in link group by assigning an alert token * registered in a search tree. * Requires @conns_lock * Note that '0' is a reserved value and not assigned. */ -static int smc_lgr_register_conn(struct smc_connection *conn) +static int smc_lgr_register_conn(struct smc_connection *conn, bool first) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); static atomic_t nexttoken = ATOMIC_INIT(0); + int rc; + if (!conn->lgr->is_smcd) { + rc = smcr_lgr_conn_assign_link(conn, first); + if (rc) + return rc; + } /* find a new alert_token_local value not yet used by some connection * in this link group */ @@ -141,22 +184,6 @@ static int smc_lgr_register_conn(struct smc_connection *conn) conn->alert_token_local = 0; } smc_lgr_add_alert_token(conn); - - /* assign the new connection to a link */ - if (!conn->lgr->is_smcd) { - struct smc_link *lnk; - int i; - - /* tbd - link balancing */ - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - lnk = &conn->lgr->lnk[i]; - if (lnk->state == SMC_LNK_ACTIVATING || - lnk->state == SMC_LNK_ACTIVE) - conn->lnk = lnk; - } - if (!conn->lnk) - return SMC_CLC_DECL_NOACTLINK; - } conn->lgr->conns_num++; return 0; } @@ -1285,7 +1312,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; - rc = smc_lgr_register_conn(conn); /* add conn to lgr */ + rc = smc_lgr_register_conn(conn, false); write_unlock_bh(&lgr->conns_lock); if (!rc && delayed_work_pending(&lgr->free_work)) cancel_delayed_work(&lgr->free_work); @@ -1313,7 +1340,7 @@ create: goto out; lgr = conn->lgr; write_lock_bh(&lgr->conns_lock); - rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */ + rc = smc_lgr_register_conn(conn, true); write_unlock_bh(&lgr->conns_lock); if (rc) goto out; -- cgit v1.2.3 From ad6c111b8ae760114df6765d5a5ed1b09020d45d Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:44 +0200 Subject: net/smc: asymmetric link tagging New connections must not be assigned to asymmetric links. Add asymmetric link tagging using new link variable link_is_asym. The new helpers smcr_lgr_set_type() and smcr_lgr_set_type_asym() are called to set the state of the link group, and tag all links accordingly. smcr_lgr_conn_assign_link() respects the link tagging and will not assign new connections to links tagged as asymmetric link. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 26 +++++++++++++++++++++++--- net/smc/smc_core.h | 4 ++++ net/smc/smc_llc.c | 20 ++++++++++++++------ 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 9c19b9aa3719..be15b30a1234 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -132,7 +132,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &conn->lgr->lnk[i]; - if (lnk->state != expected) + if (lnk->state != expected || lnk->link_is_asym) continue; if (conn->lgr->role == SMC_CLNT) { conn->lnk = lnk; /* temporary, SMC server assigns link*/ @@ -143,7 +143,8 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) struct smc_link *lnk2; lnk2 = &conn->lgr->lnk[j]; - if (lnk2->state == expected) { + if (lnk2->state == expected && + !lnk2->link_is_asym) { conn->lnk = lnk2; break; } @@ -1030,6 +1031,25 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) } } +/* set new lgr type and clear all asymmetric link tagging */ +void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) +{ + int i; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) + if (smc_link_usable(&lgr->lnk[i])) + lgr->lnk[i].link_is_asym = false; + lgr->type = new_type; +} + +/* set new lgr type and tag a link as asymmetric */ +void smcr_lgr_set_type_asym(struct smc_link_group *lgr, + enum smc_lgr_type new_type, int asym_lnk_idx) +{ + smcr_lgr_set_type(lgr, new_type); + lgr->lnk[asym_lnk_idx].link_is_asym = true; +} + /* abort connection, abort_work scheduled from tasklet context */ static void smc_conn_abort_work(struct work_struct *work) { @@ -1123,7 +1143,7 @@ static void smcr_link_down(struct smc_link *lnk) smcr_link_clear(lnk); return; } - lgr->type = SMC_LGR_SINGLE; + smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); del_link_id = lnk->link_id; if (lgr->role == SMC_SERV) { diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 86eebbadc8f6..6ed7ab6d89d5 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -117,6 +117,7 @@ struct smc_link { u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ u8 link_idx; /* index in lgr link array */ + u8 link_is_asym; /* is link asymmetric? */ struct smc_link_group *lgr; /* parent link group */ struct work_struct link_down_wrk; /* wrk to bring link down */ @@ -380,6 +381,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, void smcr_link_clear(struct smc_link *lnk); int smcr_buf_map_lgr(struct smc_link *lnk); int smcr_buf_reg_lgr(struct smc_link *lnk); +void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type); +void smcr_lgr_set_type_asym(struct smc_link_group *lgr, + enum smc_lgr_type new_type, int asym_lnk_idx); int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc); struct smc_link *smc_switch_conns(struct smc_link_group *lgr, struct smc_link *from_lnk, bool is_dev_err); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 0ea7ad6188ae..f65b2aac6b52 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -796,7 +796,11 @@ static int smc_llc_cli_conf_link(struct smc_link *link, return -ENOLINK; } smc_llc_link_active(link_new); - lgr->type = lgr_new_t; + if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || + lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) + smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); + else + smcr_lgr_set_type(lgr, lgr_new_t); return 0; } @@ -1038,7 +1042,11 @@ static int smc_llc_srv_conf_link(struct smc_link *link, return -ENOLINK; } smc_llc_link_active(link_new); - lgr->type = lgr_new_t; + if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || + lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) + smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx); + else + smcr_lgr_set_type(lgr, lgr_new_t); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return 0; } @@ -1223,9 +1231,9 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) if (lnk_del == lnk_asym) { /* expected deletion of asym link, don't change lgr state */ } else if (active_links == 1) { - lgr->type = SMC_LGR_SINGLE; + smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); } else if (!active_links) { - lgr->type = SMC_LGR_NONE; + smcr_lgr_set_type(lgr, SMC_LGR_NONE); smc_lgr_terminate_sched(lgr); } out_unlock: @@ -1314,9 +1322,9 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) active_links = smc_llc_active_link_count(lgr); if (active_links == 1) { - lgr->type = SMC_LGR_SINGLE; + smcr_lgr_set_type(lgr, SMC_LGR_SINGLE); } else if (!active_links) { - lgr->type = SMC_LGR_NONE; + smcr_lgr_set_type(lgr, SMC_LGR_NONE); smc_lgr_terminate_sched(lgr); } -- cgit v1.2.3 From 3e0c40afce4ea5b08bb7e3f65c55157817116640 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:45 +0200 Subject: net/smc: add termination reason and handle LLC protocol violation Allow to set the reason code for the link group termination, and set meaningful values before termination processing is triggered. This reason code is sent to the peer in the final delete link message. When the LLC request or response layer receives a message type that was not handled, drop a warning and terminate the link group. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 8 ++++++-- net/smc/smc_core.h | 2 ++ net/smc/smc_llc.c | 14 ++++++++++++++ net/smc/smc_llc.h | 8 ++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index be15b30a1234..b6f93b44f9c7 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -878,8 +878,11 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) smc_ism_put_vlan(lgr->smcd, lgr->vlan_id); put_device(&lgr->smcd->dev); } else { - smc_llc_send_link_delete_all(lgr, false, - SMC_LLC_DEL_OP_INIT_TERM); + u32 rsn = lgr->llc_termination_rsn; + + if (!rsn) + rsn = SMC_LLC_DEL_PROG_INIT_TERM; + smc_llc_send_link_delete_all(lgr, false, rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; @@ -1018,6 +1021,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { list_del_init(&lgr->list); + smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); __smc_lgr_terminate(lgr, false); } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 6ed7ab6d89d5..32bc45af9a1a 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -271,6 +271,8 @@ struct smc_link_group { /* protects llc flow */ int llc_testlink_time; /* link keep alive time */ + u32 llc_termination_rsn; + /* rsn code for termination */ }; struct { /* SMC-D */ u64 peer_gid; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index f65b2aac6b52..482acf80e26e 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1420,6 +1420,14 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) smc_llc_flow_qentry_del(&lgr->llc_flow_rmt); } +static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type) +{ + pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: " + "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type); + smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL); + smc_lgr_terminate_sched(lgr); +} + /* flush the llc event queue */ static void smc_llc_event_flush(struct smc_link_group *lgr) { @@ -1520,6 +1528,9 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt); } return; + default: + smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type); + break; } out: kfree(qentry); @@ -1579,6 +1590,9 @@ static void smc_llc_rx_response(struct smc_link *link, case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; + default: + smc_llc_protocol_violation(link->lgr, llc_type); + break; } kfree(qentry); } diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 6d2a5d943b83..f5882ebf357b 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -60,6 +60,14 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr) return NULL; } +/* set the termination reason code for the link group */ +static inline void smc_llc_set_termination_rsn(struct smc_link_group *lgr, + u32 rsn) +{ + if (!lgr->llc_termination_rsn) + lgr->llc_termination_rsn = rsn; +} + /* transmit */ int smc_llc_send_confirm_link(struct smc_link *lnk, enum smc_llc_reqresp reqresp); -- cgit v1.2.3 From a52bcc919b14c9d78f03b2b4ff604e5ca69c7e6d Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:46 +0200 Subject: net/smc: improve termination processing Add helper smcr_lgr_link_deactivate_all() and eliminate duplicate code. In smc_lgr_free(), clear the smc-r links before smc_lgr_free_bufs() is called so buffers are already prepared for free. The usage of the soft parameter in __smc_lgr_terminate() is no longer needed, smc_lgr_free() can be called directly. smc_lgr_terminate_sched() and smc_smcd_terminate() set lgr->freeing to indicate that the link group will be freed soon to avoid unnecessary schedules of the free worker. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 61 +++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index b6f93b44f9c7..fb391bc6781e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -237,6 +237,19 @@ void smc_lgr_cleanup_early(struct smc_connection *conn) smc_lgr_schedule_free_work_fast(lgr); } +static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) +{ + int i; + + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + struct smc_link *lnk = &lgr->lnk[i]; + + if (smc_link_usable(lnk)) + lnk->state = SMC_LNK_INACTIVE; + } + wake_up_interruptible_all(&lgr->llc_waiter); +} + static void smc_lgr_free(struct smc_link_group *lgr); static void smc_lgr_free_work(struct work_struct *work) @@ -246,7 +259,6 @@ static void smc_lgr_free_work(struct work_struct *work) free_work); spinlock_t *lgr_lock; bool conns; - int i; smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); @@ -271,15 +283,8 @@ static void smc_lgr_free_work(struct work_struct *work) SMC_LLC_DEL_PROG_INIT_TERM); if (lgr->is_smcd && !lgr->terminating) smc_ism_signal_shutdown(lgr); - if (!lgr->is_smcd) { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - struct smc_link *lnk = &lgr->lnk[i]; - - if (smc_link_usable(lnk)) - lnk->state = SMC_LNK_INACTIVE; - } - wake_up_interruptible_all(&lgr->llc_waiter); - } + if (!lgr->is_smcd) + smcr_lgr_link_deactivate_all(lgr); smc_lgr_free(lgr); } @@ -802,6 +807,16 @@ static void smc_lgr_free(struct smc_link_group *lgr) { int i; + if (!lgr->is_smcd) { + mutex_lock(&lgr->llc_conf_mutex); + for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { + if (lgr->lnk[i].state != SMC_LNK_UNUSED) + smcr_link_clear(&lgr->lnk[i]); + } + mutex_unlock(&lgr->llc_conf_mutex); + smc_llc_lgr_clear(lgr); + } + smc_lgr_free_bufs(lgr); if (lgr->is_smcd) { if (!lgr->terminating) { @@ -811,11 +826,6 @@ static void smc_lgr_free(struct smc_link_group *lgr) if (!atomic_dec_return(&lgr->smcd->lgr_cnt)) wake_up(&lgr->smcd->lgrs_deleted); } else { - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_UNUSED) - smcr_link_clear(&lgr->lnk[i]); - } - smc_llc_lgr_clear(lgr); if (!atomic_dec_return(&lgr_cnt)) wake_up(&lgrs_deleted); } @@ -870,8 +880,6 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) static void smc_lgr_cleanup(struct smc_link_group *lgr) { - int i; - if (lgr->is_smcd) { smc_ism_signal_shutdown(lgr); smcd_unregister_all_dmbs(lgr); @@ -883,13 +891,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr) if (!rsn) rsn = SMC_LLC_DEL_PROG_INIT_TERM; smc_llc_send_link_delete_all(lgr, false, rsn); - for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - struct smc_link *lnk = &lgr->lnk[i]; - - if (smc_link_usable(lnk)) - lnk->state = SMC_LNK_INACTIVE; - } - wake_up_interruptible_all(&lgr->llc_waiter); + smcr_lgr_link_deactivate_all(lgr); } } @@ -905,8 +907,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) if (lgr->terminating) return; /* lgr already terminating */ - if (!soft) - cancel_delayed_work_sync(&lgr->free_work); + /* cancel free_work sync, will terminate when lgr->freeing is set */ + cancel_delayed_work_sync(&lgr->free_work); lgr->terminating = 1; /* kill remaining link group connections */ @@ -926,10 +928,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) } read_unlock_bh(&lgr->conns_lock); smc_lgr_cleanup(lgr); - if (soft) - smc_lgr_schedule_free_work_fast(lgr); - else - smc_lgr_free(lgr); + smc_lgr_free(lgr); } /* unlink link group and schedule termination */ @@ -944,6 +943,7 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr) return; /* lgr already terminating */ } list_del_init(&lgr->list); + lgr->freeing = 1; spin_unlock_bh(lgr_lock); schedule_work(&lgr->terminate_work); } @@ -962,6 +962,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) if (peer_gid) /* peer triggered termination */ lgr->peer_shutdown = 1; list_move(&lgr->list, &lgr_free_list); + lgr->freeing = 1; } } spin_unlock_bh(&dev->lgr_lock); -- cgit v1.2.3 From 45fa8da0bf5cb447fcf835d184e2d3b745376e69 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:47 +0200 Subject: net/smc: create improved SMC-R link_uid The link_uid of an SMC-R link is exchanged between SMC peers and its value can be used for debugging purposes. Create a unique link_uid during link initialization and use it in communication with SMC-R peers. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 1 + net/smc/smc_core.h | 4 +++- net/smc/smc_llc.c | 18 ++++++++++++++---- net/smc/smc_llc.h | 1 + 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index fb391bc6781e..fb5f685ff494 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -331,6 +331,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu; + smc_llc_link_set_uid(lnk); INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); if (!ini->ib_dev->initialized) { rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 32bc45af9a1a..e2ace20db7fd 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -70,6 +70,8 @@ struct smc_rdma_wr { /* work requests per message struct ib_rdma_wr wr_tx_rdma[SMC_MAX_RDMA_WRITES]; }; +#define SMC_LGR_ID_SIZE 4 + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -116,6 +118,7 @@ struct smc_link { u8 peer_mac[ETH_ALEN]; /* = gid[8:10||13:15] */ u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ + u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ struct smc_link_group *lgr; /* parent link group */ @@ -178,7 +181,6 @@ struct smc_rtoken { /* address/key of remote RMB */ u32 rkey; }; -#define SMC_LGR_ID_SIZE 4 #define SMC_BUF_MIN_SIZE 16384 /* minimum size of an RMB */ #define SMC_RMBE_SIZES 16 /* number of distinct RMBE sizes */ /* theoretically, the RFC states that largest size would be 512K, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 482acf80e26e..afb889d60881 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -361,7 +361,6 @@ static int smc_llc_add_pending_send(struct smc_link *link, int smc_llc_send_confirm_link(struct smc_link *link, enum smc_llc_reqresp reqresp) { - struct smc_link_group *lgr = smc_get_lgr(link); struct smc_llc_msg_confirm_link *confllc; struct smc_wr_tx_pend_priv *pend; struct smc_wr_buf *wr_buf; @@ -382,7 +381,7 @@ int smc_llc_send_confirm_link(struct smc_link *link, memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE); hton24(confllc->sender_qp_num, link->roce_qp->qp_num); confllc->link_num = link->link_id; - memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE); + memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE); confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* send llc message */ rc = smc_wr_tx_send(link, pend); @@ -845,7 +844,8 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) if (rc) goto out_reject; smc_llc_save_add_link_info(lnk_new, llc); - lnk_new->link_id = llc->link_num; + lnk_new->link_id = llc->link_num; /* SMC server assigns link id */ + smc_llc_link_set_uid(lnk_new); rc = smc_ib_ready_link(lnk_new); if (rc) @@ -1775,12 +1775,22 @@ out: return rc; } +void smc_llc_link_set_uid(struct smc_link *link) +{ + __be32 link_uid; + + link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id); + memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE); +} + /* evaluate confirm link request or response */ int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type) { - if (type == SMC_LLC_REQ) /* SMC server assigns link_id */ + if (type == SMC_LLC_REQ) { /* SMC server assigns link_id */ qentry->link->link_id = qentry->msg.confirm_link.link_num; + smc_llc_link_set_uid(qentry->link); + } if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)) return -ENOTSUPP; return 0; diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index f5882ebf357b..1b68f229cb99 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -92,6 +92,7 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr, void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow); int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type); +void smc_llc_link_set_uid(struct smc_link *link); struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, struct smc_link *lnk, int time_out, u8 exp_msg); -- cgit v1.2.3 From 649758fff327eeb184713db8b0b0ebfa28693077 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 4 May 2020 14:18:48 +0200 Subject: net/smc: save SMC-R peer link_uid During SMC-R link establishment the peers exchange the link_uid that is used for debugging purposes. Save the peer link_uid in smc_link so it can be retrieved by the smc_diag netlink interface. Signed-off-by: Karsten Graul Reviewed-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 ++ net/smc/smc_core.h | 1 + net/smc/smc_llc.c | 9 +++++++++ net/smc/smc_llc.h | 1 + 4 files changed, 13 insertions(+) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index c67272007f41..4e4421c95ca1 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -390,6 +390,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc) SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } + smc_llc_save_peer_uid(qentry); rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ); smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); if (rc) @@ -1056,6 +1057,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc) SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } + smc_llc_save_peer_uid(qentry); rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP); smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl); if (rc) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index e2ace20db7fd..4ae76802214f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -119,6 +119,7 @@ struct smc_link { u8 peer_gid[SMC_GID_SIZE]; /* gid of peer*/ u8 link_id; /* unique # within link group */ u8 link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */ + u8 peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */ u8 link_idx; /* index in lgr link array */ u8 link_is_asym; /* is link asymmetric? */ struct smc_link_group *lgr; /* parent link group */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index afb889d60881..66ddc9cf5e2f 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -770,6 +770,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link, smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } + smc_llc_save_peer_uid(qentry); smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); rc = smc_ib_modify_qp_rts(link_new); @@ -1041,6 +1042,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link, false, SMC_LLC_DEL_LOST_PATH); return -ENOLINK; } + smc_llc_save_peer_uid(qentry); smc_llc_link_active(link_new); if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL || lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) @@ -1783,6 +1785,13 @@ void smc_llc_link_set_uid(struct smc_link *link) memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE); } +/* save peers link user id, used for debug purposes */ +void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry) +{ + memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid, + SMC_LGR_ID_SIZE); +} + /* evaluate confirm link request or response */ int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type) diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 1b68f229cb99..55287376112d 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -93,6 +93,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow); int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry, enum smc_llc_reqresp type); void smc_llc_link_set_uid(struct smc_link *link); +void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry); struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr, struct smc_link *lnk, int time_out, u8 exp_msg); -- cgit v1.2.3