diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r-- | drivers/infiniband/hw/hfi1/affinity.c | 414 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/affinity.h | 10 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/chip.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/hfi.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/init.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/trace.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/trace_dbg.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/verbs.c | 7 |
8 files changed, 444 insertions, 16 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index eca9e6354017..fbe7198a715a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -208,7 +208,13 @@ int node_affinity_init(void) return 0; } -void node_affinity_destroy(void) +static void node_affinity_destroy(struct hfi1_affinity_node *entry) +{ + free_percpu(entry->comp_vect_affinity); + kfree(entry); +} + +void node_affinity_destroy_all(void) { struct list_head *pos, *q; struct hfi1_affinity_node *entry; @@ -218,7 +224,7 @@ void node_affinity_destroy(void) entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); - kfree(entry); + node_affinity_destroy(entry); } mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); @@ -232,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node) if (!entry) return NULL; entry->node = node; + entry->comp_vect_affinity = alloc_percpu(u16); INIT_LIST_HEAD(&entry->list); return entry; @@ -261,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node) return NULL; } +static int per_cpu_affinity_get(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + u16 cntr; + u16 prev_cntr; + int ret_cpu; + + if (!possible_cpumask) { + ret_cpu = -EINVAL; + goto fail; + } + + if (!comp_vect_affinity) { + ret_cpu = -EINVAL; + goto fail; + } + + ret_cpu = cpumask_first(possible_cpumask); + if (ret_cpu >= nr_cpu_ids) { + ret_cpu = -EINVAL; + goto fail; + } + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr < prev_cntr) { + ret_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; + +fail: + return ret_cpu; +} + +static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + int max_cpu; + u16 cntr; + u16 prev_cntr; + + if (!possible_cpumask) + return -EINVAL; + + if (!comp_vect_affinity) + return -EINVAL; + + max_cpu = cpumask_first(possible_cpumask); + if (max_cpu >= nr_cpu_ids) + return -EINVAL; + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr > prev_cntr) { + max_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; + + return max_cpu; +} + +/* + * Non-interrupt CPUs are used first, then interrupt CPUs. + * Two already allocated cpu masks must be passed. + */ +static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + cpumask_var_t non_intr_cpus, + cpumask_var_t available_cpus) + __must_hold(&node_affinity.lock) +{ + int cpu; + struct cpu_mask_set *set = dd->comp_vect; + + lockdep_assert_held(&node_affinity.lock); + if (!non_intr_cpus) { + cpu = -1; + goto fail; + } + + if (!available_cpus) { + cpu = -1; + goto fail; + } + + /* Available CPUs for pinning completion vectors */ + _cpu_mask_set_gen_inc(set); + cpumask_andnot(available_cpus, &set->mask, &set->used); + + /* Available CPUs without SDMA engine interrupts */ + cpumask_andnot(non_intr_cpus, available_cpus, + &entry->def_intr.used); + + /* If there are non-interrupt CPUs available, use them first */ + if (!cpumask_empty(non_intr_cpus)) + cpu = cpumask_first(non_intr_cpus); + else /* Otherwise, use interrupt CPUs */ + cpu = cpumask_first(available_cpus); + + if (cpu >= nr_cpu_ids) { /* empty */ + cpu = -1; + goto fail; + } + cpumask_set_cpu(cpu, &set->used); + +fail: + return cpu; +} + +static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) +{ + struct cpu_mask_set *set = dd->comp_vect; + + if (cpu < 0) + return; + + cpu_mask_set_put(set, cpu); +} + +/* _dev_comp_vect_mappings_destroy() is reentrant */ +static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) +{ + int i, cpu; + + if (!dd->comp_vect_mappings) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = dd->comp_vect_mappings[i]; + _dev_comp_vect_cpu_put(dd, cpu); + dd->comp_vect_mappings[i] = -1; + hfi1_cdbg(AFFINITY, + "[%s] Release CPU %d from completion vector %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); + } + + kfree(dd->comp_vect_mappings); + dd->comp_vect_mappings = NULL; +} + +/* + * This function creates the table for looking up CPUs for completion vectors. + * num_comp_vectors needs to have been initilized before calling this function. + */ +static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu, ret; + cpumask_var_t non_intr_cpus; + cpumask_var_t available_cpus; + + lockdep_assert_held(&node_affinity.lock); + + if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) { + free_cpumask_var(non_intr_cpus); + return -ENOMEM; + } + + dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus, + sizeof(*dd->comp_vect_mappings), + GFP_KERNEL); + if (!dd->comp_vect_mappings) { + ret = -ENOMEM; + goto fail; + } + for (i = 0; i < dd->comp_vect_possible_cpus; i++) + dd->comp_vect_mappings[i] = -1; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, + available_cpus); + if (cpu < 0) { + ret = -EINVAL; + goto fail; + } + + dd->comp_vect_mappings[i] = cpu; + hfi1_cdbg(AFFINITY, + "[%s] Completion Vector %d -> CPU %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); + } + + return 0; + +fail: + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); + _dev_comp_vect_mappings_destroy(dd); + + return ret; +} + +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) +{ + int ret; + struct hfi1_affinity_node *entry; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) { + ret = -EINVAL; + goto unlock; + } + ret = _dev_comp_vect_mappings_create(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + + return ret; +} + +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) +{ + _dev_comp_vect_mappings_destroy(dd); +} + +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) +{ + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + + if (!dd->comp_vect_mappings) + return -EINVAL; + if (comp_vect >= dd->comp_vect_possible_cpus) + return -EINVAL; + + return dd->comp_vect_mappings[comp_vect]; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + bool first_dev_init) + __must_hold(&node_affinity.lock) +{ + int i, j, curr_cpu; + int possible_cpus_comp_vect = 0; + struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; + + lockdep_assert_held(&node_affinity.lock); + /* + * If there's only one CPU available for completion vectors, then + * there will only be one completion vector available. Othewise, + * the number of completion vector available will be the number of + * available CPUs divide it by the number of devices in the + * local NUMA node. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 1) { + possible_cpus_comp_vect = 1; + dd_dev_warn(dd, + "Number of kernel receive queues is too large for completion vector affinity to be effective\n"); + } else { + possible_cpus_comp_vect += + cpumask_weight(&entry->comp_vect_mask) / + hfi1_per_node_cntr[dd->node]; + + /* + * If the completion vector CPUs available doesn't divide + * evenly among devices, then the first device device to be + * initialized gets an extra CPU. + */ + if (first_dev_init && + cpumask_weight(&entry->comp_vect_mask) % + hfi1_per_node_cntr[dd->node] != 0) + possible_cpus_comp_vect++; + } + + dd->comp_vect_possible_cpus = possible_cpus_comp_vect; + + /* Reserving CPUs for device completion vector */ + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask, + entry->comp_vect_affinity); + if (curr_cpu < 0) + goto fail; + + cpumask_set_cpu(curr_cpu, dev_comp_vect_mask); + } + + hfi1_cdbg(AFFINITY, + "[%s] Completion vector affinity CPU set(s) %*pbl", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), + cpumask_pr_args(dev_comp_vect_mask)); + + return 0; + +fail: + for (j = 0; j < i; j++) + per_cpu_affinity_put_max(&entry->comp_vect_mask, + entry->comp_vect_affinity); + + return curr_cpu; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu; + + lockdep_assert_held(&node_affinity.lock); + if (!dd->comp_vect_possible_cpus) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask, + entry->comp_vect_affinity); + /* Clearing CPU in device completion vector cpu mask */ + if (cpu >= 0) + cpumask_clear_cpu(cpu, &dd->comp_vect->mask); + } + + dd->comp_vect_possible_cpus = 0; +} + /* * Interrupt affinity. * @@ -277,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) int node = pcibus_to_node(dd->pcidev->bus); struct hfi1_affinity_node *entry; const struct cpumask *local_mask; - int curr_cpu, possible, i; + int curr_cpu, possible, i, ret; + bool new_entry = false; if (node < 0) node = numa_node_id(); @@ -299,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); - mutex_unlock(&node_affinity.lock); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } + new_entry = true; + init_cpu_mask_set(&entry->def_intr); init_cpu_mask_set(&entry->rcv_intr); + cpumask_clear(&entry->comp_vect_mask); cpumask_clear(&entry->general_intr_mask); /* Use the "real" cpu mask of this node as the default */ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, @@ -356,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - node_affinity_add_tail(entry); + /* Determine completion vector CPUs for the entire node */ + cpumask_and(&entry->comp_vect_mask, + &node_affinity.real_cpu_mask, local_mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->rcv_intr.mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->general_intr_mask); + + /* + * If there ends up being 0 CPU cores leftover for completion + * vectors, use the same CPU core as the general/control + * context. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 0) + cpumask_copy(&entry->comp_vect_mask, + &entry->general_intr_mask); } + + ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry); + if (ret < 0) + goto fail; + + if (new_entry) + node_affinity_add_tail(entry); + mutex_unlock(&node_affinity.lock); + return 0; + +fail: + if (new_entry) + node_affinity_destroy(entry); + mutex_unlock(&node_affinity.lock); + return ret; +} + +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) +{ + struct hfi1_affinity_node *entry; + + if (dd->node < 0) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + /* + * Free device completion vector CPUs to be used by future + * completion vectors + */ + _dev_comp_vect_cpu_mask_clean_up(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + dd->node = -1; } /* diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 2a1e374169c0..6a7e6ea4e426 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; + u16 __percpu *comp_vect_affinity; struct cpu_mask_set def_intr; struct cpu_mask_set rcv_intr; struct cpumask general_intr_mask; + struct cpumask comp_vect_mask; struct list_head list; }; @@ -116,7 +118,11 @@ struct hfi1_affinity_node_list { }; int node_affinity_init(void); -void node_affinity_destroy(void); +void node_affinity_destroy_all(void); extern struct hfi1_affinity_node_list node_affinity; +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd); +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect); +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd); +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd); #endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 0fab6df0a345..46e9e4ffcba4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -15233,6 +15233,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + ret = hfi1_comp_vectors_set_up(dd); + if (ret) + goto bail_clear_intr; + /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); @@ -15275,6 +15279,7 @@ bail_free_rcverr: bail_free_cntrs: free_cntrs(dd); bail_clear_intr: + hfi1_comp_vectors_clean_up(dd); hfi1_clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 9cd758ce7764..dd84238c1aac 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1263,6 +1263,9 @@ struct hfi1_devdata { /* Save the enabled LCB error bits */ u64 lcb_err_en; + struct cpu_mask_set *comp_vect; + int *comp_vect_mappings; + u32 comp_vect_possible_cpus; /* * Capability to have different send engines simply by changing a diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 790542ce89a5..5d1adfc450d3 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -1244,6 +1244,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) dd->rcv_limit = NULL; dd->send_schedule = NULL; dd->tx_opstats = NULL; + kfree(dd->comp_vect); + dd->comp_vect = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1300,6 +1302,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->unit = ret; list_add(&dd->list, &hfi1_dev_list); } + dd->node = -1; spin_unlock_irqrestore(&hfi1_devs_lock, flags); idr_preload_end(); @@ -1352,6 +1355,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } + dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL); + if (!dd->comp_vect) { + ret = -ENOMEM; + goto bail; + } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; @@ -1521,7 +1530,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); - node_affinity_destroy(); + node_affinity_destroy_all(); hfi1_wss_exit(); hfi1_dbg_exit(); @@ -1605,6 +1614,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd) static void postinit_cleanup(struct hfi1_devdata *dd) { hfi1_start_cleanup(dd); + hfi1_comp_vectors_clean_up(dd); + hfi1_dev_affinity_clean_up(dd); hfi1_pcie_ddcleanup(dd); hfi1_pcie_cleanup(dd->pcidev); diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 89bd9851065b..332b9b7c554a 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -374,6 +374,7 @@ const char *print_u32_array( return ret; } +__hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 0e7d929530c5..e62171fb7379 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ * hfi1_cdbg(LVL, fmt, ...); as well as take care of all * the debugfs stuff. */ +__hfi1_trace_def(AFFINITY); __hfi1_trace_def(PKT); __hfi1_trace_def(PROC); __hfi1_trace_def(SDMA); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 9554e912af98..fc2e44cde161 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -64,6 +64,7 @@ #include "debugfs.h" #include "vnic.h" #include "fault.h" +#include "affinity.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -1934,11 +1935,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; + dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = + hfi1_comp_vect_mappings_lookup; /* completeion queue */ - snprintf(dd->verbs_dev.rdi.dparms.cq_name, - sizeof(dd->verbs_dev.rdi.dparms.cq_name), - "hfi1_cq%d", dd->unit); + dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus; dd->verbs_dev.rdi.dparms.node = dd->node; /* misc settings */ |