diff options
author | Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com> | 2021-02-05 09:14:28 +0100 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2021-02-16 20:01:50 -0400 |
commit | 2b5715fc17386a6223490d5b8f08d031999b0c0b (patch) | |
tree | 532a56c17078e9d8eb4859e9871aed64eaad1868 /drivers/infiniband | |
parent | 68ad4d1cc679c1704faf9db6ddd0550702b5d093 (diff) |
RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
The current code computes a number of channels per SRP target and spreads
them equally across all online NUMA nodes. Each channel is then assigned
a CPU within this node.
In the case of unbalanced, or even unpopulated nodes, some channels do not
get a CPU associated and thus do not get connected. This causes the SRP
connection to fail.
This patch solves the issue by rewriting channel computation and
allocation:
- Drop channel to node/CPU association as it had no real effect on
locality but added unnecessary complexity.
- Tweak the number of channels allocated to reduce CPU contention when
possible:
- Up to one channel per CPU (instead of up to 4 by node)
- At least 4 channels per node, unless ch_count module parameter is
used.
Link: https://lore.kernel.org/r/9cb4d9d3-30ad-2276-7eff-e85f7ddfb411@suse.com
Signed-off-by: Nicolas Morey-Chaisemartin <nmoreychaisemartin@suse.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 110 |
1 files changed, 45 insertions, 65 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5492b66a8153..31f8aa2c40ed 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_rdma_ch *ch; struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - int ret, node_idx, node, cpu, i; + int ret, i, ch_idx; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; uint32_t max_iu_len; @@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev, goto out; ret = -ENOMEM; - if (target->ch_count == 0) + if (target->ch_count == 0) { target->ch_count = - max_t(unsigned int, num_online_nodes(), - min(ch_count ?: - min(4 * num_online_nodes(), - ibdev->num_comp_vectors), - num_online_cpus())); + min(ch_count ?: + max(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus()); + } + target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) goto out; - node_idx = 0; - for_each_online_node(node) { - const int ch_start = (node_idx * target->ch_count / - num_online_nodes()); - const int ch_end = ((node_idx + 1) * target->ch_count / - num_online_nodes()); - const int cv_start = node_idx * ibdev->num_comp_vectors / - num_online_nodes(); - const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes(); - int cpu_idx = 0; - - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) != node) - continue; - if (ch_start + cpu_idx >= ch_end) - continue; - ch = &target->ch[ch_start + cpu_idx]; - ch->target = target; - ch->comp_vector = cv_start == cv_end ? cv_start : - cv_start + cpu_idx % (cv_end - cv_start); - spin_lock_init(&ch->lock); - INIT_LIST_HEAD(&ch->free_tx); - ret = srp_new_cm_id(ch); - if (ret) - goto err_disconnect; + for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) { + ch = &target->ch[ch_idx]; + ch->target = target; + ch->comp_vector = ch_idx % ibdev->num_comp_vectors; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->free_tx); + ret = srp_new_cm_id(ch); + if (ret) + goto err_disconnect; - ret = srp_create_ch_ib(ch); - if (ret) - goto err_disconnect; + ret = srp_create_ch_ib(ch); + if (ret) + goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; + ret = srp_alloc_req_data(ch); + if (ret) + goto err_disconnect; - ret = srp_connect_ch(ch, max_iu_len, multich); - if (ret) { - char dst[64]; - - if (target->using_rdma_cm) - snprintf(dst, sizeof(dst), "%pIS", - &target->rdma_cm.dst); - else - snprintf(dst, sizeof(dst), "%pI6", - target->ib_cm.orig_dgid.raw); - shost_printk(KERN_ERR, target->scsi_host, - PFX "Connection %d/%d to %s failed\n", - ch_start + cpu_idx, - target->ch_count, dst); - if (node_idx == 0 && cpu_idx == 0) { - goto free_ch; - } else { - srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); - target->ch_count = ch - target->ch; - goto connected; - } - } + ret = srp_connect_ch(ch, max_iu_len, multich); + if (ret) { + char dst[64]; - multich = true; - cpu_idx++; + if (target->using_rdma_cm) + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); + else + snprintf(dst, sizeof(dst), "%pI6", + target->ib_cm.orig_dgid.raw); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection %d/%d to %s failed\n", + ch_idx, + target->ch_count, dst); + if (ch_idx == 0) { + goto free_ch; + } else { + srp_free_ch_ib(target, ch); + srp_free_req_data(target, ch); + target->ch_count = ch - target->ch; + goto connected; + } } - node_idx++; + multich = true; } connected: |