diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-07 16:08:41 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-07 16:08:41 -0700 |
commit | 787f74fc5050c77e29a96f480f40421336eed5ac (patch) | |
tree | 44108cccfc18696e23014748df7d9bc4f40cc1e3 /drivers | |
parent | a2b447066cacb6db82a1f69b46d9f894f695badf (diff) | |
parent | 2130c0ba69d69bb21f5c52787f2587db00d13d8a (diff) |
Merge tag 'ntb-5.8' of git://github.com/jonmason/ntb
Pull NTB updates from Jon Mason:
"Intel Icelake NTB support, Intel driver bug fixes, and lots of bug
fixes for ntb tests"
* tag 'ntb-5.8' of git://github.com/jonmason/ntb:
NTB: ntb_test: Fix bug when counting remote files
NTB: perf: Fix race condition when run with ntb_test
NTB: perf: Fix support for hardware that doesn't have port numbers
NTB: perf: Don't require one more memory window than number of peers
NTB: ntb_pingpong: Choose doorbells based on port number
NTB: Fix the default port and peer numbers for legacy drivers
NTB: Revert the change to use the NTB device dev for DMA allocations
NTB: ntb_tool: reading the link file should not end in a NULL byte
ntb_perf: avoid false dma unmap of destination address
ntb_perf: increase sleep time from one milli sec to one sec
ntb_tool: pass correct struct device to dma_alloc_coherent
ntb_perf: pass correct struct device to dma_alloc_coherent
ntb: hw: remove the code that sets the DMA mask
NTB: correct ntb_peer_spad_addr and ntb_peer_spad_read comment typos
ntb: intel: fix static declaration
ntb: intel: add hw workaround for NTB BAR alignment
ntb: intel: Add Icelake (gen4) support for Intel NTB
NTB: Fix static check warning in perf_clear_test
include/ntb: Fix typo in ntb_unregister_device description
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/ntb/core.c | 9 | ||||
-rw-r--r-- | drivers/ntb/hw/amd/ntb_hw_amd.c | 4 | ||||
-rw-r--r-- | drivers/ntb/hw/idt/ntb_hw_idt.c | 6 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/Makefile | 2 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen1.c | 49 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen1.h | 1 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen3.c | 13 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen3.h | 8 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen4.c | 552 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_gen4.h | 100 | ||||
-rw-r--r-- | drivers/ntb/hw/intel/ntb_hw_intel.h | 12 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_perf.c | 49 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_pingpong.c | 14 | ||||
-rw-r--r-- | drivers/ntb/test/ntb_tool.c | 9 |
14 files changed, 747 insertions, 81 deletions
diff --git a/drivers/ntb/core.c b/drivers/ntb/core.c index 2581ab724c34..f8f75a504a58 100644 --- a/drivers/ntb/core.c +++ b/drivers/ntb/core.c @@ -214,10 +214,8 @@ int ntb_default_port_number(struct ntb_dev *ntb) case NTB_TOPO_B2B_DSD: return NTB_PORT_SEC_DSD; default: - break; + return 0; } - - return -EINVAL; } EXPORT_SYMBOL(ntb_default_port_number); @@ -240,10 +238,8 @@ int ntb_default_peer_port_number(struct ntb_dev *ntb, int pidx) case NTB_TOPO_B2B_DSD: return NTB_PORT_PRI_USD; default: - break; + return 0; } - - return -EINVAL; } EXPORT_SYMBOL(ntb_default_peer_port_number); @@ -315,4 +311,3 @@ static void __exit ntb_driver_exit(void) bus_unregister(&ntb_bus); } module_exit(ntb_driver_exit); - diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index 9e310e1ad4d0..88e1db65be02 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1191,10 +1191,6 @@ static int amd_ntb_init_pci(struct amd_ntb_dev *ndev, goto err_dma_mask; dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n"); } - rc = dma_coerce_mask_and_coherent(&ndev->ntb.dev, - dma_get_mask(&pdev->dev)); - if (rc) - goto err_dma_mask; ndev->self_mmio = pci_iomap(pdev, 0, 0); if (!ndev->self_mmio) { diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index edae52384b8a..d54261f50851 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2660,12 +2660,6 @@ static int idt_init_pci(struct idt_ntb_dev *ndev) dev_warn(&pdev->dev, "Cannot set consistent DMA highmem bit mask\n"); } - ret = dma_coerce_mask_and_coherent(&ndev->ntb.dev, - dma_get_mask(&pdev->dev)); - if (ret != 0) { - dev_err(&pdev->dev, "Failed to set NTB device DMA bit mask\n"); - return ret; - } /* * Enable the device advanced error reporting. It's not critical to diff --git a/drivers/ntb/hw/intel/Makefile b/drivers/ntb/hw/intel/Makefile index 60ec8a773eea..f80da0ba15b2 100644 --- a/drivers/ntb/hw/intel/Makefile +++ b/drivers/ntb/hw/intel/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_NTB_INTEL) += ntb_hw_intel.o -ntb_hw_intel-y := ntb_hw_gen1.o ntb_hw_gen3.o +ntb_hw_intel-y := ntb_hw_gen1.o ntb_hw_gen3.o ntb_hw_gen4.o diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index bb57ec239029..423f9b8fbbcf 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -60,6 +60,7 @@ #include "ntb_hw_intel.h" #include "ntb_hw_gen1.h" #include "ntb_hw_gen3.h" +#include "ntb_hw_gen4.h" #define NTB_NAME "ntb_hw_intel" #define NTB_DESC "Intel(R) PCI-E Non-Transparent Bridge Driver" @@ -762,6 +763,8 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, return ndev_ntb_debugfs_read(filp, ubuf, count, offp); else if (pdev_is_gen3(ndev->ntb.pdev)) return ndev_ntb3_debugfs_read(filp, ubuf, count, offp); + else if (pdev_is_gen4(ndev->ntb.pdev)) + return ndev_ntb4_debugfs_read(filp, ubuf, count, offp); return -ENXIO; } @@ -1783,10 +1786,6 @@ static int intel_ntb_init_pci(struct intel_ntb_dev *ndev, struct pci_dev *pdev) goto err_dma_mask; dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n"); } - rc = dma_coerce_mask_and_coherent(&ndev->ntb.dev, - dma_get_mask(&pdev->dev)); - if (rc) - goto err_dma_mask; ndev->self_mmio = pci_iomap(pdev, 0, 0); if (!ndev->self_mmio) { @@ -1858,16 +1857,15 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, int rc, node; node = dev_to_node(&pdev->dev); + ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); + if (!ndev) { + rc = -ENOMEM; + goto err_ndev; + } - if (pdev_is_gen1(pdev)) { - ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); - if (!ndev) { - rc = -ENOMEM; - goto err_ndev; - } - - ndev_init_struct(ndev, pdev); + ndev_init_struct(ndev, pdev); + if (pdev_is_gen1(pdev)) { rc = intel_ntb_init_pci(ndev, pdev); if (rc) goto err_init_pci; @@ -1875,17 +1873,8 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, rc = xeon_init_dev(ndev); if (rc) goto err_init_dev; - } else if (pdev_is_gen3(pdev)) { - ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node); - if (!ndev) { - rc = -ENOMEM; - goto err_ndev; - } - - ndev_init_struct(ndev, pdev); ndev->ntb.ops = &intel_ntb3_ops; - rc = intel_ntb_init_pci(ndev, pdev); if (rc) goto err_init_pci; @@ -1893,7 +1882,15 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, rc = gen3_init_dev(ndev); if (rc) goto err_init_dev; + } else if (pdev_is_gen4(pdev)) { + ndev->ntb.ops = &intel_ntb4_ops; + rc = intel_ntb_init_pci(ndev, pdev); + if (rc) + goto err_init_pci; + rc = gen4_init_dev(ndev); + if (rc) + goto err_init_dev; } else { rc = -EINVAL; goto err_ndev; @@ -1915,7 +1912,7 @@ static int intel_ntb_pci_probe(struct pci_dev *pdev, err_register: ndev_deinit_debugfs(ndev); - if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev)) + if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) || pdev_is_gen4(pdev)) xeon_deinit_dev(ndev); err_init_dev: intel_ntb_deinit_pci(ndev); @@ -1931,7 +1928,7 @@ static void intel_ntb_pci_remove(struct pci_dev *pdev) ntb_unregister_device(&ndev->ntb); ndev_deinit_debugfs(ndev); - if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev)) + if (pdev_is_gen1(pdev) || pdev_is_gen3(pdev) || pdev_is_gen4(pdev)) xeon_deinit_dev(ndev); intel_ntb_deinit_pci(ndev); kfree(ndev); @@ -2036,6 +2033,7 @@ static const struct file_operations intel_ntb_debugfs_info = { }; static const struct pci_device_id intel_ntb_pci_tbl[] = { + /* GEN1 */ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, @@ -2051,7 +2049,12 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)}, + + /* GEN3 */ {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SKX)}, + + /* GEN4 */ + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_ICX)}, {0} }; MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.h b/drivers/ntb/hw/intel/ntb_hw_gen1.h index 544cf5c06f4d..1b759942d8af 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.h @@ -140,6 +140,7 @@ #define NTB_HWERR_SB01BASE_LOCKUP BIT_ULL(1) #define NTB_HWERR_B2BDOORBELL_BIT14 BIT_ULL(2) #define NTB_HWERR_MSIX_VECTOR32_BAD BIT_ULL(3) +#define NTB_HWERR_BAR_ALIGN BIT_ULL(4) extern struct intel_b2b_addr xeon_b2b_usd_addr; extern struct intel_b2b_addr xeon_b2b_dsd_addr; diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c index c3397160db7f..ffcfc3e02c35 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c @@ -415,9 +415,8 @@ ssize_t ndev_ntb3_debugfs_read(struct file *filp, char __user *ubuf, return ret; } -static int intel_ntb3_link_enable(struct ntb_dev *ntb, - enum ntb_speed max_speed, - enum ntb_width max_width) +int intel_ntb3_link_enable(struct ntb_dev *ntb, enum ntb_speed max_speed, + enum ntb_width max_width) { struct intel_ntb_dev *ndev; u32 ntb_ctl; @@ -532,7 +531,7 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, return 0; } -static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, +int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, resource_size_t *db_size, u64 *db_data, int db_bit) { @@ -563,7 +562,7 @@ static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, return 0; } -static int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits) +int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits) { struct intel_ntb_dev *ndev = ntb_ndev(ntb); int bit; @@ -581,7 +580,7 @@ static int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits) return 0; } -static u64 intel_ntb3_db_read(struct ntb_dev *ntb) +u64 intel_ntb3_db_read(struct ntb_dev *ntb) { struct intel_ntb_dev *ndev = ntb_ndev(ntb); @@ -590,7 +589,7 @@ static u64 intel_ntb3_db_read(struct ntb_dev *ntb) ndev->self_reg->db_clear); } -static int intel_ntb3_db_clear(struct ntb_dev *ntb, u64 db_bits) +int intel_ntb3_db_clear(struct ntb_dev *ntb, u64 db_bits) { struct intel_ntb_dev *ndev = ntb_ndev(ntb); diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.h b/drivers/ntb/hw/intel/ntb_hw_gen3.h index 75fb86ca27bb..2bc5d8356045 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen3.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen3.h @@ -104,6 +104,14 @@ static inline void gen3_db_iowrite(u64 bits, void __iomem *mmio) ssize_t ndev_ntb3_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp); int gen3_init_dev(struct intel_ntb_dev *ndev); +int intel_ntb3_link_enable(struct ntb_dev *ntb, enum ntb_speed max_speed, + enum ntb_width max_width); +u64 intel_ntb3_db_read(struct ntb_dev *ntb); +int intel_ntb3_db_clear(struct ntb_dev *ntb, u64 db_bits); +int intel_ntb3_peer_db_set(struct ntb_dev *ntb, u64 db_bits); +int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr, + resource_size_t *db_size, + u64 *db_data, int db_bit); extern const struct ntb_dev_ops intel_ntb3_ops; diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c new file mode 100644 index 000000000000..bc4541cbf8c6 --- /dev/null +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include <linux/debugfs.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/random.h> +#include <linux/slab.h> +#include <linux/ntb.h> +#include <linux/log2.h> + +#include "ntb_hw_intel.h" +#include "ntb_hw_gen1.h" +#include "ntb_hw_gen3.h" +#include "ntb_hw_gen4.h" + +static int gen4_poll_link(struct intel_ntb_dev *ndev); +static int gen4_link_is_up(struct intel_ntb_dev *ndev); + +static const struct intel_ntb_reg gen4_reg = { + .poll_link = gen4_poll_link, + .link_is_up = gen4_link_is_up, + .db_ioread = gen3_db_ioread, + .db_iowrite = gen3_db_iowrite, + .db_size = sizeof(u32), + .ntb_ctl = GEN4_NTBCNTL_OFFSET, + .mw_bar = {2, 4}, +}; + +static const struct intel_ntb_alt_reg gen4_pri_reg = { + .db_clear = GEN4_IM_INT_STATUS_OFFSET, + .db_mask = GEN4_IM_INT_DISABLE_OFFSET, + .spad = GEN4_IM_SPAD_OFFSET, +}; + +static const struct intel_ntb_xlat_reg gen4_sec_xlat = { + .bar2_limit = GEN4_IM23XLMT_OFFSET, + .bar2_xlat = GEN4_IM23XBASE_OFFSET, + .bar2_idx = GEN4_IM23XBASEIDX_OFFSET, +}; + +static const struct intel_ntb_alt_reg gen4_b2b_reg = { + .db_bell = GEN4_IM_DOORBELL_OFFSET, + .spad = GEN4_EM_SPAD_OFFSET, +}; + +static int gen4_poll_link(struct intel_ntb_dev *ndev) +{ + u16 reg_val; + + /* + * We need to write to DLLSCS bit in the SLOTSTS before we + * can clear the hardware link interrupt on ICX NTB. + */ + iowrite16(GEN4_SLOTSTS_DLLSCS, ndev->self_mmio + GEN4_SLOTSTS); + ndev->reg->db_iowrite(ndev->db_link_mask, + ndev->self_mmio + + ndev->self_reg->db_clear); + + reg_val = ioread16(ndev->self_mmio + GEN4_LINK_STATUS_OFFSET); + if (reg_val == ndev->lnk_sta) + return 0; + + ndev->lnk_sta = reg_val; + + return 1; +} + +static int gen4_link_is_up(struct intel_ntb_dev *ndev) +{ + return NTB_LNK_STA_ACTIVE(ndev->lnk_sta); +} + +static int gen4_init_isr(struct intel_ntb_dev *ndev) +{ + int i; + + /* + * The MSIX vectors and the interrupt status bits are not lined up + * on Gen3 (Skylake) and Gen4. By default the link status bit is bit + * 32, however it is by default MSIX vector0. We need to fixup to + * line them up. The vectors at reset is 1-32,0. We need to reprogram + * to 0-32. + */ + for (i = 0; i < GEN4_DB_MSIX_VECTOR_COUNT; i++) + iowrite8(i, ndev->self_mmio + GEN4_INTVEC_OFFSET + i); + + return ndev_init_isr(ndev, GEN4_DB_MSIX_VECTOR_COUNT, + GEN4_DB_MSIX_VECTOR_COUNT, + GEN4_DB_MSIX_VECTOR_SHIFT, + GEN4_DB_TOTAL_SHIFT); +} + +static int gen4_setup_b2b_mw(struct intel_ntb_dev *ndev, + const struct intel_b2b_addr *addr, + const struct intel_b2b_addr *peer_addr) +{ + struct pci_dev *pdev; + void __iomem *mmio; + phys_addr_t bar_addr; + + pdev = ndev->ntb.pdev; + mmio = ndev->self_mmio; + + /* setup incoming bar limits == base addrs (zero length windows) */ + bar_addr = addr->bar2_addr64; + iowrite64(bar_addr, mmio + GEN4_IM23XLMT_OFFSET); + bar_addr = ioread64(mmio + GEN4_IM23XLMT_OFFSET); + dev_dbg(&pdev->dev, "IM23XLMT %#018llx\n", bar_addr); + + bar_addr = addr->bar4_addr64; + iowrite64(bar_addr, mmio + GEN4_IM45XLMT_OFFSET); + bar_addr = ioread64(mmio + GEN4_IM45XLMT_OFFSET); + dev_dbg(&pdev->dev, "IM45XLMT %#018llx\n", bar_addr); + + /* zero incoming translation addrs */ + iowrite64(0, mmio + GEN4_IM23XBASE_OFFSET); + iowrite64(0, mmio + GEN4_IM45XBASE_OFFSET); + + ndev->peer_mmio = ndev->self_mmio; + + return 0; +} + +static int gen4_init_ntb(struct intel_ntb_dev *ndev) +{ + int rc; + + + ndev->mw_count = XEON_MW_COUNT; + ndev->spad_count = GEN4_SPAD_COUNT; + ndev->db_count = GEN4_DB_COUNT; + ndev->db_link_mask = GEN4_DB_LINK_BIT; + + ndev->self_reg = &gen4_pri_reg; + ndev->xlat_reg = &gen4_sec_xlat; + ndev->peer_reg = &gen4_b2b_reg; + + if (ndev->ntb.topo == NTB_TOPO_B2B_USD) + rc = gen4_setup_b2b_mw(ndev, &xeon_b2b_dsd_addr, + &xeon_b2b_usd_addr); + else + rc = gen4_setup_b2b_mw(ndev, &xeon_b2b_usd_addr, + &xeon_b2b_dsd_addr); + if (rc) + return rc; + + ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1; + + ndev->reg->db_iowrite(ndev->db_valid_mask, + ndev->self_mmio + + ndev->self_reg->db_mask); + + return 0; +} + +static enum ntb_topo gen4_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) +{ + switch (ppd & GEN4_PPD_TOPO_MASK) { + case GEN4_PPD_TOPO_B2B_USD: + return NTB_TOPO_B2B_USD; + case GEN4_PPD_TOPO_B2B_DSD: + return NTB_TOPO_B2B_DSD; + } + + return NTB_TOPO_NONE; +} + +int gen4_init_dev(struct intel_ntb_dev *ndev) +{ + struct pci_dev *pdev = ndev->ntb.pdev; + u32 ppd1/*, ppd0*/; + u16 lnkctl; + int rc; + + ndev->reg = &gen4_reg; + + if (pdev_is_ICX(pdev)) + ndev->hwerr_flags |= NTB_HWERR_BAR_ALIGN; + + ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET); + ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); + dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1, + ntb_topo_string(ndev->ntb.topo)); + if (ndev->ntb.topo == NTB_TOPO_NONE) + return -EINVAL; + + rc = gen4_init_ntb(ndev); + if (rc) + return rc; + + /* init link setup */ + lnkctl = ioread16(ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE; + iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + + return gen4_init_isr(ndev); +} + +ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct intel_ntb_dev *ndev; + void __iomem *mmio; + char *buf; + size_t buf_size; + ssize_t ret, off; + union { u64 v64; u32 v32; u16 v16; } u; + + ndev = filp->private_data; + mmio = ndev->self_mmio; + + buf_size = min(count, 0x800ul); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + off = 0; + + off += scnprintf(buf + off, buf_size - off, + "NTB Device Information:\n"); + + off += scnprintf(buf + off, buf_size - off, + "Connection Topology -\t%s\n", + ntb_topo_string(ndev->ntb.topo)); + + off += scnprintf(buf + off, buf_size - off, + "NTB CTL -\t\t%#06x\n", ndev->ntb_ctl); + off += scnprintf(buf + off, buf_size - off, + "LNK STA (cached) -\t\t%#06x\n", ndev->lnk_sta); + + if (!ndev->reg->link_is_up(ndev)) + off += scnprintf(buf + off, buf_size - off, + "Link Status -\t\tDown\n"); + else { + off += scnprintf(buf + off, buf_size - off, + "Link Status -\t\tUp\n"); + off += scnprintf(buf + off, buf_size - off, + "Link Speed -\t\tPCI-E Gen %u\n", + NTB_LNK_STA_SPEED(ndev->lnk_sta)); + off += scnprintf(buf + off, buf_size - off, + "Link Width -\t\tx%u\n", + NTB_LNK_STA_WIDTH(ndev->lnk_sta)); + } + + off += scnprintf(buf + off, buf_size - off, + "Memory Window Count -\t%u\n", ndev->mw_count); + off += scnprintf(buf + off, buf_size - off, + "Scratchpad Count -\t%u\n", ndev->spad_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Count -\t%u\n", ndev->db_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Vector Count -\t%u\n", ndev->db_vec_count); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Vector Shift -\t%u\n", ndev->db_vec_shift); + + off += scnprintf(buf + off, buf_size - off, + "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Link Mask -\t%#llx\n", ndev->db_link_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Mask Cached -\t%#llx\n", ndev->db_mask); + + u.v64 = ndev_db_read(ndev, mmio + ndev->self_reg->db_mask); + off += scnprintf(buf + off, buf_size - off, + "Doorbell Mask -\t\t%#llx\n", u.v64); + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Incoming XLAT:\n"); + + u.v64 = ioread64(mmio + GEN4_IM23XBASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "IM23XBASE -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + GEN4_IM45XBASE_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "IM45XBASE -\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + GEN4_IM23XLMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "IM23XLMT -\t\t\t%#018llx\n", u.v64); + + u.v64 = ioread64(mmio + GEN4_IM45XLMT_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "IM45XLMT -\t\t\t%#018llx\n", u.v64); + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Statistics:\n"); + + off += scnprintf(buf + off, buf_size - off, + "\nNTB Hardware Errors:\n"); + + if (!pci_read_config_word(ndev->ntb.pdev, + GEN4_DEVSTS_OFFSET, &u.v16)) + off += scnprintf(buf + off, buf_size - off, + "DEVSTS -\t\t%#06x\n", u.v16); + + u.v16 = ioread16(mmio + GEN4_LINK_STATUS_OFFSET); + off += scnprintf(buf + off, buf_size - off, + "LNKSTS -\t\t%#06x\n", u.v16); + + if (!pci_read_config_dword(ndev->ntb.pdev, + GEN4_UNCERRSTS_OFFSET, &u.v32)) + off += scnprintf(buf + off, buf_size - off, + "UNCERRSTS -\t\t%#06x\n", u.v32); + + if (!pci_read_config_dword(ndev->ntb.pdev, + GEN4_CORERRSTS_OFFSET, &u.v32)) + off += scnprintf(buf + off, buf_size - off, + "CORERRSTS -\t\t%#06x\n", u.v32); + + ret = simple_read_from_buffer(ubuf, count, offp, buf, off); + kfree(buf); + return ret; +} + +static int intel_ntb4_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx, + dma_addr_t addr, resource_size_t size) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + unsigned long xlat_reg, limit_reg, idx_reg; + unsigned short base_idx, reg_val16; + resource_size_t bar_size, mw_size; + void __iomem *mmio; + u64 base, limit, reg_val; + int bar; + + if (pidx != NTB_DEF_PEER_IDX) + return -EINVAL; + + if (idx >= ndev->b2b_idx && !ndev->b2b_off) + idx += 1; + + bar = ndev_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; + + bar_size = pci_resource_len(ndev->ntb.pdev, bar); + + if (idx == ndev->b2b_idx) + mw_size = bar_size - ndev->b2b_off; + else + mw_size = bar_size; + + if (ndev->hwerr_flags & NTB_HWERR_BAR_ALIGN) { + /* hardware requires that addr is aligned to bar size */ + if (addr & (bar_size - 1)) + return -EINVAL; + } else { + if (addr & (PAGE_SIZE - 1)) + return -EINVAL; + } + + /* make sure the range fits in the usable mw size */ + if (size > mw_size) + return -EINVAL; + + mmio = ndev->self_mmio; + xlat_reg = ndev->xlat_reg->bar2_xlat + (idx * 0x10); + limit_reg = ndev->xlat_reg->bar2_limit + (idx * 0x10); + base = pci_resource_start(ndev->ntb.pdev, bar); + + /* Set the limit if supported, if size is not mw_size */ + if (limit_reg && size != mw_size) { + limit = base + size; + base_idx = __ilog2_u64(size); + } else { + limit = base + mw_size; + base_idx = __ilog2_u64(mw_size); + } + + + /* set and verify setting the translation address */ + iowrite64(addr, mmio + xlat_reg); + reg_val = ioread64(mmio + xlat_reg); + if (reg_val != addr) { + iowrite64(0, mmio + xlat_reg); + return -EIO; + } + + dev_dbg(&ntb->pdev->dev, "BAR %d IMXBASE: %#Lx\n", bar, reg_val); + + /* set and verify setting the limit */ + iowrite64(limit, mmio + limit_reg); + reg_val = ioread64(mmio + limit_reg); + if (reg_val != limit) { + iowrite64(base, mmio + limit_reg); + iowrite64(0, mmio + xlat_reg); + return -EIO; + } + + dev_dbg(&ntb->pdev->dev, "BAR %d IMXLMT: %#Lx\n", bar, reg_val); + + if (ndev->hwerr_flags & NTB_HWERR_BAR_ALIGN) { + idx_reg = ndev->xlat_reg->bar2_idx + (idx * 0x2); + iowrite16(base_idx, mmio + idx_reg); + reg_val16 = ioread16(mmio + idx_reg); + if (reg_val16 != base_idx) { + iowrite64(base, mmio + limit_reg); + iowrite64(0, mmio + xlat_reg); + iowrite16(0, mmio + idx_reg); + return -EIO; + } + dev_dbg(&ntb->pdev->dev, "BAR %d IMBASEIDX: %#x\n", bar, reg_val16); + } + + + return 0; +} + +static int intel_ntb4_link_enable(struct ntb_dev *ntb, + enum ntb_speed max_speed, enum ntb_width max_width) +{ + struct intel_ntb_dev *ndev; + u32 ntb_ctl, ppd0; + u16 lnkctl; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + dev_dbg(&ntb->pdev->dev, + "Enabling link with max_speed %d max_width %d\n", + max_speed, max_width); + + if (max_speed != NTB_SPEED_AUTO) + dev_dbg(&ntb->pdev->dev, + "ignoring max_speed %d\n", max_speed); + if (max_width != NTB_WIDTH_AUTO) + dev_dbg(&ntb->pdev->dev, + "ignoring max_width %d\n", max_width); + + ntb_ctl = NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP; + ntb_ctl |= NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP; + iowrite32(ntb_ctl, ndev->self_mmio + ndev->reg->ntb_ctl); + + lnkctl = ioread16(ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + lnkctl &= ~GEN4_LINK_CTRL_LINK_DISABLE; + iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + + /* start link training in PPD0 */ + ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET); + ppd0 |= GEN4_PPD_LINKTRN; + iowrite32(ppd0, ndev->self_mmio + GEN4_PPD0_OFFSET); + + /* make sure link training has started */ + ppd0 = ioread32(ndev->self_mmio + GEN4_PPD0_OFFSET); + if (!(ppd0 & GEN4_PPD_LINKTRN)) { + dev_warn(&ntb->pdev->dev, "Link is not training\n"); + return -ENXIO; + } + + ndev->dev_up = 1; + + return 0; +} + +static int intel_ntb4_link_disable(struct ntb_dev *ntb) +{ + struct intel_ntb_dev *ndev; + u32 ntb_cntl; + u16 lnkctl; + + ndev = container_of(ntb, struct intel_ntb_dev, ntb); + + dev_dbg(&ntb->pdev->dev, "Disabling link\n"); + + /* clear the snoop bits */ + ntb_cntl = ioread32(ndev->self_mmio + ndev->reg->ntb_ctl); + ntb_cntl &= ~(NTB_CTL_E2I_BAR23_SNOOP | NTB_CTL_I2E_BAR23_SNOOP); + ntb_cntl &= ~(NTB_CTL_E2I_BAR45_SNOOP | NTB_CTL_I2E_BAR45_SNOOP); + iowrite32(ntb_cntl, ndev->self_mmio + ndev->reg->ntb_ctl); + + lnkctl = ioread16(ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + lnkctl |= GEN4_LINK_CTRL_LINK_DISABLE; + iowrite16(lnkctl, ndev->self_mmio + GEN4_LINK_CTRL_OFFSET); + + ndev->dev_up = 0; + + return 0; +} + +static int intel_ntb4_mw_get_align(struct ntb_dev *ntb, int pidx, int idx, + resource_size_t *addr_align, + resource_size_t *size_align, + resource_size_t *size_max) +{ + struct intel_ntb_dev *ndev = ntb_ndev(ntb); + resource_size_t bar_size, mw_size; + int bar; + + if (pidx != NTB_DEF_PEER_IDX) + return -EINVAL; + + if (idx >= ndev->b2b_idx && !ndev->b2b_off) + idx += 1; + + bar = ndev_mw_to_bar(ndev, idx); + if (bar < 0) + return bar; + + bar_size = pci_resource_len(ndev->ntb.pdev, bar); + + if (idx == ndev->b2b_idx) + mw_size = bar_size - ndev->b2b_off; + else + mw_size = bar_size; + + if (addr_align) { + if (ndev->hwerr_flags & NTB_HWERR_BAR_ALIGN) + *addr_align = pci_resource_len(ndev->ntb.pdev, bar); + else + *addr_align = PAGE_SIZE; + } + + if (size_align) + *size_align = 1; + + if (size_max) + *size_max = mw_size; + + return 0; +} + +const struct ntb_dev_ops intel_ntb4_ops = { + .mw_count = intel_ntb_mw_count, + .mw_get_align = intel_ntb4_mw_get_align, + .mw_set_trans = intel_ntb4_mw_set_trans, + .peer_mw_count = intel_ntb_peer_mw_count, + .peer_mw_get_addr = intel_ntb_peer_mw_get_addr, + .link_is_up = intel_ntb_link_is_up, + .link_enable = intel_ntb4_link_enable, + .link_disable = intel_ntb4_link_disable, + .db_valid_mask = intel_ntb_db_valid_mask, + .db_vector_count = intel_ntb_db_vector_count, + .db_vector_mask = intel_ntb_db_vector_mask, + .db_read = intel_ntb3_db_read, + .db_clear = intel_ntb3_db_clear, + .db_set_mask = intel_ntb_db_set_mask, + .db_clear_mask = intel_ntb_db_clear_mask, + .peer_db_addr = intel_ntb3_peer_db_addr, + .peer_db_set = intel_ntb3_peer_db_set, + .spad_is_unsafe = intel_ntb_spad_is_unsafe, + .spad_count = intel_ntb_spad_count, + .spad_read = intel_ntb_spad_read, + .spad_write = intel_ntb_spad_write, + .peer_spad_addr = intel_ntb_peer_spad_addr, + .peer_spad_read = intel_ntb_peer_spad_read, + .peer_spad_write = intel_ntb_peer_spad_write, +}; + diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h new file mode 100644 index 000000000000..a868c788de02 --- /dev/null +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#ifndef _NTB_INTEL_GEN4_H_ +#define _NTB_INTEL_GEN4_H_ + +#include "ntb_hw_intel.h" + +/* Supported PCI device revision range for ICX */ +#define PCI_DEVICE_REVISION_ICX_MIN 0x2 +#define PCI_DEVICE_REVISION_ICX_MAX 0xF + +/* Intel Gen4 NTB hardware */ +/* PCIe config space */ +#define GEN4_IMBAR23SZ_OFFSET 0x00c4 +#define GEN4_IMBAR45SZ_OFFSET 0x00c5 +#define GEN4_EMBAR23SZ_OFFSET 0x00c6 +#define GEN4_EMBAR45SZ_OFFSET 0x00c7 +#define GEN4_DEVCTRL_OFFSET 0x0048 +#define GEN4_DEVSTS_OFFSET 0x004a +#define GEN4_UNCERRSTS_OFFSET 0x0104 +#define GEN4_CORERRSTS_OFFSET 0x0110 + +/* BAR0 MMIO */ +#define GEN4_NTBCNTL_OFFSET 0x0000 +#define GEN4_IM23XBASE_OFFSET 0x0010 /* IMBAR1XBASE */ +#define GEN4_IM23XLMT_OFFSET 0x0018 /* IMBAR1XLMT */ +#define GEN4_IM45XBASE_OFFSET 0x0020 /* IMBAR2XBASE */ +#define GEN4_IM45XLMT_OFFSET 0x0028 /* IMBAR2XLMT */ +#define GEN4_IM_INT_STATUS_OFFSET 0x0040 +#define GEN4_IM_INT_DISABLE_OFFSET 0x0048 +#define GEN4_INTVEC_OFFSET 0x0050 /* 0-32 vecs */ +#define GEN4_IM23XBASEIDX_OFFSET 0x0074 +#define GEN4_IM45XBASEIDX_OFFSET 0x0076 +#define GEN4_IM_SPAD_OFFSET 0x0080 /* 0-15 SPADs */ +#define GEN4_IM_SPAD_SEM_OFFSET 0x00c0 /* SPAD hw semaphore */ +#define GEN4_IM_SPAD_STICKY_OFFSET 0x00c4 /* sticky SPAD */ +#define GEN4_IM_DOORBELL_OFFSET 0x0100 /* 0-31 doorbells */ +#define GEN4_EM_SPAD_OFFSET 0x8080 +/* note, link status is now in MMIO and not config space for NTB */ +#define GEN4_LINK_CTRL_OFFSET 0xb050 +#define GEN4_LINK_STATUS_OFFSET 0xb052 +#define GEN4_PPD0_OFFSET 0xb0d4 +#define GEN4_PPD1_OFFSET 0xb4c0 +#define GEN4_LTSSMSTATEJMP 0xf040 + +#define GEN4_PPD_CLEAR_TRN 0x0001 +#define GEN4_PPD_LINKTRN 0x0008 +#define GEN4_PPD_CONN_MASK 0x0300 +#define GEN4_PPD_CONN_B2B 0x0200 +#define GEN4_PPD_DEV_MASK 0x1000 +#define GEN4_PPD_DEV_DSD 0x1000 +#define GEN4_PPD_DEV_USD 0x0000 +#define GEN4_LINK_CTRL_LINK_DISABLE 0x0010 + +#define GEN4_SLOTSTS 0xb05a +#define GEN4_SLOTSTS_DLLSCS 0x100 + +#define GEN4_PPD_TOPO_MASK (GEN4_PPD_CONN_MASK | GEN4_PPD_DEV_MASK) +#define GEN4_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_USD) +#define GEN4_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_DSD) + +#define GEN4_DB_COUNT 32 +#define GEN4_DB_LINK 32 +#define GEN4_DB_LINK_BIT BIT_ULL(GEN4_DB_LINK) +#define GEN4_DB_MSIX_VECTOR_COUNT 33 +#define GEN4_DB_MSIX_VECTOR_SHIFT 1 +#define GEN4_DB_TOTAL_SHIFT 33 +#define GEN4_SPAD_COUNT 16 + +#define NTB_CTL_E2I_BAR23_SNOOP 0x000004 +#define NTB_CTL_E2I_BAR23_NOSNOOP 0x000008 +#define NTB_CTL_I2E_BAR23_SNOOP 0x000010 +#define NTB_CTL_I2E_BAR23_NOSNOOP 0x000020 +#define NTB_CTL_E2I_BAR45_SNOOP 0x000040 +#define NTB_CTL_E2I_BAR45_NOSNOO 0x000080 +#define NTB_CTL_I2E_BAR45_SNOOP 0x000100 +#define NTB_CTL_I2E_BAR45_NOSNOOP 0x000200 +#define NTB_CTL_BUSNO_DIS_INC 0x000400 +#define NTB_CTL_LINK_DOWN 0x010000 + +#define NTB_SJC_FORCEDETECT 0x000004 + +ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp); +int gen4_init_dev(struct intel_ntb_dev *ndev); +ssize_t ndev_ntb4_debugfs_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp); + +extern const struct ntb_dev_ops intel_ntb4_ops; + +static inline int pdev_is_ICX(struct pci_dev *pdev) +{ + if (pdev_is_gen4(pdev) && + pdev->revision >= PCI_DEVICE_REVISION_ICX_MIN && + pdev->revision <= PCI_DEVICE_REVISION_ICX_MAX) + return 1; + return 0; +} + +#endif diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h index e071e28bca3f..d61fcd91714b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.h +++ b/drivers/ntb/hw/intel/ntb_hw_intel.h @@ -72,6 +72,7 @@ #define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E #define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F #define PCI_DEVICE_ID_INTEL_NTB_B2B_SKX 0x201C +#define PCI_DEVICE_ID_INTEL_NTB_B2B_ICX 0x347e /* Ntb control and link status */ #define NTB_CTL_CFG_LOCK BIT(0) @@ -120,6 +121,7 @@ struct intel_ntb_xlat_reg { unsigned long bar0_base; unsigned long bar2_xlat; unsigned long bar2_limit; + unsigned short bar2_idx; }; struct intel_b2b_addr { @@ -182,6 +184,9 @@ struct intel_ntb_dev { struct dentry *debugfs_dir; struct dentry *debugfs_info; + + /* gen4 entries */ + int dev_up; }; #define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb) @@ -219,4 +224,11 @@ static inline int pdev_is_gen3(struct pci_dev *pdev) return 0; } +static inline int pdev_is_gen4(struct pci_dev *pdev) +{ + if (pdev->device == PCI_DEVICE_ID_INTEL_NTB_B2B_ICX) + return 1; + + return 0; +} #endif diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 972f6d984f6d..89df1350fefd 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -101,8 +101,8 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool"); #define DMA_MDELAY 10 #define MSG_TRIES 1000 -#define MSG_UDELAY_LOW 1000 -#define MSG_UDELAY_HIGH 2000 +#define MSG_UDELAY_LOW 1000000 +#define MSG_UDELAY_HIGH 2000000 #define PERF_BUF_LEN 1024 @@ -159,6 +159,8 @@ struct perf_peer { /* NTB connection setup service */ struct work_struct service; unsigned long sts; + + struct completion init_comp; }; #define to_peer_service(__work) \ container_of(__work, struct perf_peer, service) @@ -547,6 +549,7 @@ static int perf_setup_outbuf(struct perf_peer *peer) /* Initialization is finally done */ set_bit(PERF_STS_DONE, &peer->sts); + complete_all(&peer->init_comp); return 0; } @@ -557,7 +560,7 @@ static void perf_free_inbuf(struct perf_peer *peer) return; (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx); - dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size, + dma_free_coherent(&peer->perf->ntb->pdev->dev, peer->inbuf_size, peer->inbuf, peer->inbuf_xlat); peer->inbuf = NULL; } @@ -586,8 +589,9 @@ static int perf_setup_inbuf(struct perf_peer *peer) perf_free_inbuf(peer); - peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size, - &peer->inbuf_xlat, GFP_KERNEL); + peer->inbuf = dma_alloc_coherent(&perf->ntb->pdev->dev, + peer->inbuf_size, &peer->inbuf_xlat, + GFP_KERNEL); if (!peer->inbuf) { dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n", &peer->inbuf_size); @@ -637,6 +641,7 @@ static void perf_service_work(struct work_struct *work) perf_setup_outbuf(peer); if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) { + init_completion(&peer->init_comp); clear_bit(PERF_STS_DONE, &peer->sts); if (test_bit(0, &peer->perf->busy_flag) && peer == peer->perf->test_peer) { @@ -653,7 +658,7 @@ static int perf_init_service(struct perf_ctx *perf) { u64 mask; - if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) { + if (ntb_peer_mw_count(perf->ntb) < perf->pcnt) { dev_err(&perf->ntb->dev, "Not enough memory windows\n"); return -EINVAL; } @@ -803,7 +808,7 @@ static int perf_copy_chunk(struct perf_thread *pthr, dst_vaddr = dst; dst_dma_addr = peer->dma_dst_addr + (dst_vaddr - vbase); - unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT); + unmap = dmaengine_get_unmap_data(dma_dev, 1, GFP_NOWAIT); if (!unmap) return -ENOMEM; @@ -816,15 +821,8 @@ static int perf_copy_chunk(struct perf_thread *pthr, } unmap->to_cnt = 1; - unmap->addr[1] = dst_dma_addr; - if (dma_mapping_error(dma_dev, unmap->addr[1])) { - ret = -EIO; - goto err_free_resource; - } - unmap->from_cnt = 1; - do { - tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1], + tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, dst_dma_addr, unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!tx) msleep(DMA_MDELAY); @@ -1010,8 +1008,8 @@ static void perf_clear_test(struct perf_thread *pthr) pthr->perf->test_peer->dma_dst_addr, pthr->perf->test_peer->outbuf_size, DMA_FROM_DEVICE, 0); - if (pthr->dma_chan) - dma_release_channel(pthr->dma_chan); + + dma_release_channel(pthr->dma_chan); no_dma_notify: atomic_dec(&perf->tsync); @@ -1083,8 +1081,9 @@ static int perf_submit_test(struct perf_peer *peer) struct perf_thread *pthr; int tidx, ret; - if (!test_bit(PERF_STS_DONE, &peer->sts)) - return -ENOLINK; + ret = wait_for_completion_interruptible(&peer->init_comp); + if (ret < 0) + return ret; if (test_and_set_bit_lock(0, &perf->busy_flag)) return -EBUSY; @@ -1455,10 +1454,21 @@ static int perf_init_peers(struct perf_ctx *perf) peer->gidx = pidx; } INIT_WORK(&peer->service, perf_service_work); + init_completion(&peer->init_comp); } if (perf->gidx == -1) perf->gidx = pidx; + /* + * Hardware with only two ports may not have unique port + * numbers. In this case, the gidxs should all be zero. + */ + if (perf->pcnt == 1 && ntb_port_number(perf->ntb) == 0 && + ntb_peer_port_number(perf->ntb, 0) == 0) { + perf->gidx = 0; + perf->peers[0].gidx = 0; + } + for (pidx = 0; pidx < perf->pcnt; pidx++) { ret = perf_setup_peer_mw(&perf->peers[pidx]); if (ret) @@ -1554,4 +1564,3 @@ static void __exit perf_exit(void) destroy_workqueue(perf_wq); } module_exit(perf_exit); - diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index 04dd46647db3..2164e8492772 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -121,15 +121,14 @@ static int pp_find_next_peer(struct pp_ctx *pp) link = ntb_link_is_up(pp->ntb, NULL, NULL); /* Find next available peer */ - if (link & pp->nmask) { + if (link & pp->nmask) pidx = __ffs64(link & pp->nmask); - out_db = BIT_ULL(pidx + 1); - } else if (link & pp->pmask) { + else if (link & pp->pmask) pidx = __ffs64(link & pp->pmask); - out_db = BIT_ULL(pidx); - } else { + else return -ENODEV; - } + + out_db = BIT_ULL(ntb_peer_port_number(pp->ntb, pidx)); spin_lock(&pp->lock); pp->out_pidx = pidx; @@ -303,7 +302,7 @@ static void pp_init_flds(struct pp_ctx *pp) break; } - pp->in_db = BIT_ULL(pidx); + pp->in_db = BIT_ULL(lport); pp->pmask = GENMASK_ULL(pidx, 0) >> 1; pp->nmask = GENMASK_ULL(pcnt - 1, pidx); @@ -432,4 +431,3 @@ static void __exit pp_exit(void) debugfs_remove_recursive(pp_dbgfs_topdir); } module_exit(pp_exit); - diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 69da758fe64c..b7bf3f863d79 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -504,7 +504,7 @@ static ssize_t tool_peer_link_read(struct file *filep, char __user *ubuf, buf[1] = '\n'; buf[2] = '\0'; - return simple_read_from_buffer(ubuf, size, offp, buf, 3); + return simple_read_from_buffer(ubuf, size, offp, buf, 2); } static TOOL_FOPS_RDWR(tool_peer_link_fops, @@ -590,7 +590,7 @@ static int tool_setup_mw(struct tool_ctx *tc, int pidx, int widx, inmw->size = min_t(resource_size_t, req_size, size); inmw->size = round_up(inmw->size, addr_align); inmw->size = round_up(inmw->size, size_align); - inmw->mm_base = dma_alloc_coherent(&tc->ntb->dev, inmw->size, + inmw->mm_base = dma_alloc_coherent(&tc->ntb->pdev->dev, inmw->size, &inmw->dma_base, GFP_KERNEL); if (!inmw->mm_base) return -ENOMEM; @@ -612,7 +612,7 @@ static int tool_setup_mw(struct tool_ctx *tc, int pidx, int widx, return 0; err_free_dma: - dma_free_coherent(&tc->ntb->dev, inmw->size, inmw->mm_base, + dma_free_coherent(&tc->ntb->pdev->dev, inmw->size, inmw->mm_base, inmw->dma_base); inmw->mm_base = NULL; inmw->dma_base = 0; @@ -629,7 +629,7 @@ static void tool_free_mw(struct tool_ctx *tc, int pidx, int widx) if (inmw->mm_base != NULL) { ntb_mw_clear_trans(tc->ntb, pidx, widx); - dma_free_coherent(&tc->ntb->dev, inmw->size, + dma_free_coherent(&tc->ntb->pdev->dev, inmw->size, inmw->mm_base, inmw->dma_base); } @@ -1690,4 +1690,3 @@ static void __exit tool_exit(void) debugfs_remove_recursive(tool_dbgfs_topdir); } module_exit(tool_exit); - |