diff options
-rw-r--r-- | Documentation/PCI/pci-error-recovery.rst | 2 | ||||
-rw-r--r-- | MAINTAINERS | 6 | ||||
-rw-r--r-- | arch/x86/pci/mmconfig-shared.c | 10 | ||||
-rw-r--r-- | drivers/pci/ecam.c | 54 | ||||
-rw-r--r-- | drivers/pci/hotplug/cpci_hotplug.h | 3 | ||||
-rw-r--r-- | drivers/pci/hotplug/cpci_hotplug_pci.c | 2 | ||||
-rw-r--r-- | drivers/pci/hotplug/pciehp_hpc.c | 36 | ||||
-rw-r--r-- | drivers/pci/p2pdma.c | 372 | ||||
-rw-r--r-- | drivers/pci/pci.c | 20 | ||||
-rw-r--r-- | drivers/pci/pci.h | 4 | ||||
-rw-r--r-- | drivers/pci/pcie/aer.c | 4 | ||||
-rw-r--r-- | drivers/pci/pcie/dpc.c | 74 | ||||
-rw-r--r-- | drivers/pci/probe.c | 1 | ||||
-rw-r--r-- | drivers/pci/quirks.c | 76 | ||||
-rw-r--r-- | include/linux/pci-ecam.h | 1 | ||||
-rw-r--r-- | include/linux/pci.h | 2 |
16 files changed, 461 insertions, 206 deletions
diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 84ceebb08cac..187f43a03200 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -295,7 +295,7 @@ and let the driver restart normal I/O processing. A driver can still return a critical failure for this function if it can't get the device operational after reset. If the platform previously tried a soft reset, it might now try a hard reset (power -cycle) and then call slot_reset() again. It the device still can't +cycle) and then call slot_reset() again. If the device still can't be recovered, there is nothing more that can be done; the platform will typically report a "permanent failure" in such a case. The device will be considered "dead" in this case. diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..2f2fb63246c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13942,8 +13942,7 @@ F: Documentation/devicetree/bindings/pci/aardvark-pci.txt F: drivers/pci/controller/pci-aardvark.c PCI DRIVER FOR ALTERA PCIE IP -M: Ley Foon Tan <ley.foon.tan@intel.com> -L: rfi@lists.rocketboards.org (moderated for non-subscribers) +M: Joyce Ooi <joyce.ooi@intel.com> L: linux-pci@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/pci/altera-pcie.txt @@ -14140,8 +14139,7 @@ S: Supported F: Documentation/PCI/pci-error-recovery.rst PCI MSI DRIVER FOR ALTERA MSI IP -M: Ley Foon Tan <ley.foon.tan@intel.com> -L: rfi@lists.rocketboards.org (moderated for non-subscribers) +M: Joyce Ooi <joyce.ooi@intel.com> L: linux-pci@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/pci/altera-pcie-msi.txt diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index de6bf0e7e8f8..758cbfe55daa 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -461,7 +461,7 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, } if (size < (16UL<<20) && size != old_size) - return 0; + return false; if (dev) dev_info(dev, "MMCONFIG at %pR reserved in %s\n", @@ -493,7 +493,7 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, &cfg->res, (unsigned long) cfg->address); } - return 1; + return true; } static bool __ref @@ -501,7 +501,7 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e { if (!early && !acpi_disabled) { if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) - return 1; + return true; if (dev) dev_info(dev, FW_INFO @@ -522,14 +522,14 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e * _CBA method, just assume it's reserved. */ if (pci_mmcfg_running_state) - return 1; + return true; /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1); - return 0; + return false; } static void __init pci_mmcfg_reject_broken(int early) diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index d2a1920bb055..1c40d2506aef 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -32,7 +32,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, struct pci_config_window *cfg; unsigned int bus_range, bus_range_max, bsz; struct resource *conflict; - int i, err; + int err; if (busr->start > busr->end) return ERR_PTR(-EINVAL); @@ -50,6 +50,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, cfg->busr.start = busr->start; cfg->busr.end = busr->end; cfg->busr.flags = IORESOURCE_BUS; + cfg->bus_shift = bus_shift; bus_range = resource_size(&cfg->busr); bus_range_max = resource_size(cfgres) >> bus_shift; if (bus_range > bus_range_max) { @@ -77,13 +78,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev, cfg->winp = kcalloc(bus_range, sizeof(*cfg->winp), GFP_KERNEL); if (!cfg->winp) goto err_exit_malloc; - for (i = 0; i < bus_range; i++) { - cfg->winp[i] = - pci_remap_cfgspace(cfgres->start + i * bsz, - bsz); - if (!cfg->winp[i]) - goto err_exit_iomap; - } } else { cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz); if (!cfg->win) @@ -129,6 +123,44 @@ void pci_ecam_free(struct pci_config_window *cfg) } EXPORT_SYMBOL_GPL(pci_ecam_free); +static int pci_ecam_add_bus(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + unsigned int bsz = 1 << cfg->bus_shift; + unsigned int busn = bus->number; + phys_addr_t start; + + if (!per_bus_mapping) + return 0; + + if (busn < cfg->busr.start || busn > cfg->busr.end) + return -EINVAL; + + busn -= cfg->busr.start; + start = cfg->res.start + busn * bsz; + + cfg->winp[busn] = pci_remap_cfgspace(start, bsz); + if (!cfg->winp[busn]) + return -ENOMEM; + + return 0; +} + +static void pci_ecam_remove_bus(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + unsigned int busn = bus->number; + + if (!per_bus_mapping || busn < cfg->busr.start || busn > cfg->busr.end) + return; + + busn -= cfg->busr.start; + if (cfg->winp[busn]) { + iounmap(cfg->winp[busn]); + cfg->winp[busn] = NULL; + } +} + /* * Function to implement the pci_ops ->map_bus method */ @@ -167,6 +199,8 @@ EXPORT_SYMBOL_GPL(pci_ecam_map_bus); /* ECAM ops */ const struct pci_ecam_ops pci_generic_ecam_ops = { .pci_ops = { + .add_bus = pci_ecam_add_bus, + .remove_bus = pci_ecam_remove_bus, .map_bus = pci_ecam_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, @@ -178,6 +212,8 @@ EXPORT_SYMBOL_GPL(pci_generic_ecam_ops); /* ECAM ops for 32-bit access only (non-compliant) */ const struct pci_ecam_ops pci_32b_ops = { .pci_ops = { + .add_bus = pci_ecam_add_bus, + .remove_bus = pci_ecam_remove_bus, .map_bus = pci_ecam_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write32, @@ -187,6 +223,8 @@ const struct pci_ecam_ops pci_32b_ops = { /* ECAM ops for 32-bit read only (non-compliant) */ const struct pci_ecam_ops pci_32b_read_ops = { .pci_ops = { + .add_bus = pci_ecam_add_bus, + .remove_bus = pci_ecam_remove_bus, .map_bus = pci_ecam_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write, diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h index f33ff2bca414..3fdd1b9bd8c3 100644 --- a/drivers/pci/hotplug/cpci_hotplug.h +++ b/drivers/pci/hotplug/cpci_hotplug.h @@ -75,6 +75,9 @@ int cpci_hp_unregister_bus(struct pci_bus *bus); int cpci_hp_start(void); int cpci_hp_stop(void); +/* Global variables */ +extern int cpci_debug; + /* * Internal function prototypes, these functions should not be used by * board/chassis drivers. diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c index 2c16adb7f4ec..6c48066acb44 100644 --- a/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -19,8 +19,6 @@ #define MY_NAME "cpci_hotplug" -extern int cpci_debug; - #define dbg(format, arg...) \ do { \ if (cpci_debug) \ diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index fb3840e222ad..9d06939736c0 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -563,6 +563,32 @@ void pciehp_power_off_slot(struct controller *ctrl) PCI_EXP_SLTCTL_PWR_OFF); } +static void pciehp_ignore_dpc_link_change(struct controller *ctrl, + struct pci_dev *pdev, int irq) +{ + /* + * Ignore link changes which occurred while waiting for DPC recovery. + * Could be several if DPC triggered multiple times consecutively. + */ + synchronize_hardirq(irq); + atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events); + if (pciehp_poll_mode) + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_DLLSC); + ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", + slot_name(ctrl)); + + /* + * If the link is unexpectedly down after successful recovery, + * the corresponding link change may have been ignored above. + * Synthesize it to ensure that it is acted on. + */ + down_read(&ctrl->reset_lock); + if (!pciehp_check_link_active(ctrl)) + pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + up_read(&ctrl->reset_lock); +} + static irqreturn_t pciehp_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; @@ -707,6 +733,16 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) } /* + * Ignore Link Down/Up events caused by Downstream Port Containment + * if recovery from the error succeeded. + */ + if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && + ctrl->state == ON_STATE) { + events &= ~PCI_EXP_SLTSTA_DLLSC; + pciehp_ignore_dpc_link_change(ctrl, pdev, irq); + } + + /* * Disable requests have higher priority than Presence Detect Changed * or Data Link Layer State Changed events. */ diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 196382630363..69c25e71590a 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -48,10 +48,14 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; size_t size = 0; - if (pdev->p2pdma->pool) - size = gen_pool_size(pdev->p2pdma->pool); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma && p2pdma->pool) + size = gen_pool_size(p2pdma->pool); + rcu_read_unlock(); return scnprintf(buf, PAGE_SIZE, "%zd\n", size); } @@ -61,10 +65,14 @@ static ssize_t available_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; size_t avail = 0; - if (pdev->p2pdma->pool) - avail = gen_pool_avail(pdev->p2pdma->pool); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma && p2pdma->pool) + avail = gen_pool_avail(p2pdma->pool); + rcu_read_unlock(); return scnprintf(buf, PAGE_SIZE, "%zd\n", avail); } @@ -74,9 +82,16 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; + bool published = false; - return scnprintf(buf, PAGE_SIZE, "%d\n", - pdev->p2pdma->p2pmem_published); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) + published = p2pdma->p2pmem_published; + rcu_read_unlock(); + + return scnprintf(buf, PAGE_SIZE, "%d\n", published); } static DEVICE_ATTR_RO(published); @@ -95,8 +110,9 @@ static const struct attribute_group p2pmem_group = { static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; - struct pci_p2pdma *p2pdma = pdev->p2pdma; + struct pci_p2pdma *p2pdma; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); if (!p2pdma) return; @@ -128,16 +144,14 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (error) goto out_pool_destroy; - pdev->p2pdma = p2p; - error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); if (error) goto out_pool_destroy; + rcu_assign_pointer(pdev->p2pdma, p2p); return 0; out_pool_destroy: - pdev->p2pdma = NULL; gen_pool_destroy(p2p->pool); out: devm_kfree(&pdev->dev, p2p); @@ -159,6 +173,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, { struct pci_p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; + struct pci_p2pdma *p2pdma; void *addr; int error; @@ -200,7 +215,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } - error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, range_len(&pgmap->range), dev_to_node(&pdev->dev), pgmap->ref); @@ -308,10 +324,41 @@ static const struct pci_p2pdma_whitelist_entry { {} }; +/* + * This lookup function tries to find the PCI device corresponding to a given + * host bridge. + * + * It assumes the host bridge device is the first PCI device in the + * bus->devices list and that the devfn is 00.0. These assumptions should hold + * for all the devices in the whitelist above. + * + * This function is equivalent to pci_get_slot(host->bus, 0), however it does + * not take the pci_bus_sem lock seeing __host_bridge_whitelist() must not + * sleep. + * + * For this to be safe, the caller should hold a reference to a device on the + * bridge, which should ensure the host_bridge device will not be freed + * or removed from the head of the devices list. + */ +static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host) +{ + struct pci_dev *root; + + root = list_first_entry_or_null(&host->bus->devices, + struct pci_dev, bus_list); + + if (!root) + return NULL; + if (root->devfn != PCI_DEVFN(0, 0)) + return NULL; + + return root; +} + static bool __host_bridge_whitelist(struct pci_host_bridge *host, - bool same_host_bridge) + bool same_host_bridge, bool warn) { - struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); + struct pci_dev *root = pci_host_bridge_dev(host); const struct pci_p2pdma_whitelist_entry *entry; unsigned short vendor, device; @@ -320,7 +367,6 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, vendor = root->vendor; device = root->device; - pci_dev_put(root); for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) { if (vendor != entry->vendor || device != entry->device) @@ -331,6 +377,10 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, return true; } + if (warn) + pci_warn(root, "Host bridge not in P2PDMA whitelist: %04x:%04x\n", + vendor, device); + return false; } @@ -338,44 +388,90 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, * If we can't find a common upstream bridge take a look at the root * complex and compare it to a whitelist of known good hardware. */ -static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b) +static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, + bool warn) { struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus); struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus); if (host_a == host_b) - return __host_bridge_whitelist(host_a, true); + return __host_bridge_whitelist(host_a, true, warn); - if (__host_bridge_whitelist(host_a, false) && - __host_bridge_whitelist(host_b, false)) + if (__host_bridge_whitelist(host_a, false, warn) && + __host_bridge_whitelist(host_b, false, warn)) return true; return false; } +static unsigned long map_types_idx(struct pci_dev *client) +{ + return (pci_domain_nr(client->bus) << 16) | + (client->bus->number << 8) | client->devfn; +} + +/* + * Calculate the P2PDMA mapping type and distance between two PCI devices. + * + * If the two devices are the same PCI function, return + * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 0. + * + * If they are two functions of the same device, return + * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 2 (one hop up to the bridge, + * then one hop back down to another function of the same device). + * + * In the case where two devices are connected to the same PCIe switch, + * return a distance of 4. This corresponds to the following PCI tree: + * + * -+ Root Port + * \+ Switch Upstream Port + * +-+ Switch Downstream Port 0 + * + \- Device A + * \-+ Switch Downstream Port 1 + * \- Device B + * + * The distance is 4 because we traverse from Device A to Downstream Port 0 + * to the common Switch Upstream Port, back down to Downstream Port 1 and + * then to Device B. The mapping type returned depends on the ACS + * redirection setting of the ports along the path. + * + * If ACS redirect is set on any port in the path, traffic between the + * devices will go through the host bridge, so return + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; otherwise return + * PCI_P2PDMA_MAP_BUS_ADDR. + * + * Any two devices that have a data path that goes through the host bridge + * will consult a whitelist. If the host bridge is in the whitelist, return + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of + * ports per above. If the device is not in the whitelist, return + * PCI_P2PDMA_MAP_NOT_SUPPORTED. + */ static enum pci_p2pdma_map_type -__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, - int *dist, bool *acs_redirects, struct seq_buf *acs_list) +calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, + int *dist, bool verbose) { + enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; struct pci_dev *a = provider, *b = client, *bb; + bool acs_redirects = false; + struct pci_p2pdma *p2pdma; + struct seq_buf acs_list; + int acs_cnt = 0; int dist_a = 0; int dist_b = 0; - int acs_cnt = 0; + char buf[128]; - if (acs_redirects) - *acs_redirects = false; + seq_buf_init(&acs_list, buf, sizeof(buf)); /* * Note, we don't need to take references to devices returned by * pci_upstream_bridge() seeing we hold a reference to a child * device which will already hold a reference to the upstream bridge. */ - while (a) { dist_b = 0; if (pci_bridge_has_acs_redir(a)) { - seq_buf_print_bus_devfn(acs_list, a); + seq_buf_print_bus_devfn(&acs_list, a); acs_cnt++; } @@ -393,10 +489,8 @@ __upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, dist_a++; } - if (dist) - *dist = dist_a + dist_b; - - return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; + *dist = dist_a + dist_b; + goto map_through_host_bridge; check_b_path_acs: bb = b; @@ -406,124 +500,45 @@ check_b_path_acs: break; if (pci_bridge_has_acs_redir(bb)) { - seq_buf_print_bus_devfn(acs_list, bb); + seq_buf_print_bus_devfn(&acs_list, bb); acs_cnt++; } bb = pci_upstream_bridge(bb); } - if (dist) - *dist = dist_a + dist_b; - - if (acs_cnt) { - if (acs_redirects) - *acs_redirects = true; - - return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; - } - - return PCI_P2PDMA_MAP_BUS_ADDR; -} - -static unsigned long map_types_idx(struct pci_dev *client) -{ - return (pci_domain_nr(client->bus) << 16) | - (client->bus->number << 8) | client->devfn; -} - -/* - * Find the distance through the nearest common upstream bridge between - * two PCI devices. - * - * If the two devices are the same device then 0 will be returned. - * - * If there are two virtual functions of the same device behind the same - * bridge port then 2 will be returned (one step down to the PCIe switch, - * then one step back to the same device). - * - * In the case where two devices are connected to the same PCIe switch, the - * value 4 will be returned. This corresponds to the following PCI tree: - * - * -+ Root Port - * \+ Switch Upstream Port - * +-+ Switch Downstream Port - * + \- Device A - * \-+ Switch Downstream Port - * \- Device B - * - * The distance is 4 because we traverse from Device A through the downstream - * port of the switch, to the common upstream port, back up to the second - * downstream port and then to Device B. - * - * Any two devices that cannot communicate using p2pdma will return - * PCI_P2PDMA_MAP_NOT_SUPPORTED. - * - * Any two devices that have a data path that goes through the host bridge - * will consult a whitelist. If the host bridges are on the whitelist, - * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE. - * - * If either bridge is not on the whitelist this function returns - * PCI_P2PDMA_MAP_NOT_SUPPORTED. - * - * If a bridge which has any ACS redirection bits set is in the path, - * acs_redirects will be set to true. In this case, a list of all infringing - * bridge addresses will be populated in acs_list (assuming it's non-null) - * for printk purposes. - */ -static enum pci_p2pdma_map_type -upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, - int *dist, bool *acs_redirects, struct seq_buf *acs_list) -{ - enum pci_p2pdma_map_type map_type; - - map_type = __upstream_bridge_distance(provider, client, dist, - acs_redirects, acs_list); + *dist = dist_a + dist_b; - if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { - if (!cpu_supports_p2pdma() && - !host_bridge_whitelist(provider, client)) - map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; + if (!acs_cnt) { + map_type = PCI_P2PDMA_MAP_BUS_ADDR; + goto done; } - if (provider->p2pdma) - xa_store(&provider->p2pdma->map_types, map_types_idx(client), - xa_mk_value(map_type), GFP_KERNEL); - - return map_type; -} - -static enum pci_p2pdma_map_type -upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, - int *dist) -{ - struct seq_buf acs_list; - bool acs_redirects; - int ret; - - seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); - if (!acs_list.buffer) - return -ENOMEM; - - ret = upstream_bridge_distance(provider, client, dist, &acs_redirects, - &acs_list); - if (acs_redirects) { + if (verbose) { + acs_list.buffer[acs_list.len-1] = 0; /* drop final semicolon */ pci_warn(client, "ACS redirect is set between the client and provider (%s)\n", pci_name(provider)); - /* Drop final semicolon */ - acs_list.buffer[acs_list.len-1] = 0; pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n", acs_list.buffer); } + acs_redirects = true; - if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) { - pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", - pci_name(provider)); +map_through_host_bridge: + if (!cpu_supports_p2pdma() && + !host_bridge_whitelist(provider, client, acs_redirects)) { + if (verbose) + pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", + pci_name(provider)); + map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } - - kfree(acs_list.buffer); - - return ret; +done: + rcu_read_lock(); + p2pdma = rcu_dereference(provider->p2pdma); + if (p2pdma) + xa_store(&p2pdma->map_types, map_types_idx(client), + xa_mk_value(map_type), GFP_KERNEL); + rcu_read_unlock(); + return map_type; } /** @@ -546,11 +561,11 @@ upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose) { + enum pci_p2pdma_map_type map; bool not_supported = false; struct pci_dev *pci_client; int total_dist = 0; - int distance; - int i, ret; + int i, distance; if (num_clients == 0) return -1; @@ -564,16 +579,12 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; } - if (verbose) - ret = upstream_bridge_distance_warn(provider, - pci_client, &distance); - else - ret = upstream_bridge_distance(provider, pci_client, - &distance, NULL, NULL); + map = calc_map_type_and_dist(provider, pci_client, &distance, + verbose); pci_dev_put(pci_client); - if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) + if (map == PCI_P2PDMA_MAP_NOT_SUPPORTED) not_supported = true; if (not_supported && !verbose) @@ -595,7 +606,15 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); */ bool pci_has_p2pmem(struct pci_dev *pdev) { - return pdev->p2pdma && pdev->p2pdma->p2pmem_published; + struct pci_p2pdma *p2pdma; + bool res; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + res = p2pdma && p2pdma->p2pmem_published; + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(pci_has_p2pmem); @@ -675,6 +694,7 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) { void *ret = NULL; struct percpu_ref *ref; + struct pci_p2pdma *p2pdma; /* * Pairs with synchronize_rcu() in pci_p2pdma_release() to @@ -682,16 +702,16 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) * read-lock. */ rcu_read_lock(); - if (unlikely(!pdev->p2pdma)) + p2pdma = rcu_dereference(pdev->p2pdma); + if (unlikely(!p2pdma)) goto out; - ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, - (void **) &ref); + ret = (void *)gen_pool_alloc_owner(p2pdma->pool, size, (void **) &ref); if (!ret) goto out; if (unlikely(!percpu_ref_tryget_live(ref))) { - gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); + gen_pool_free(p2pdma->pool, (unsigned long) ret, size); ret = NULL; goto out; } @@ -710,8 +730,9 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) { struct percpu_ref *ref; + struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); - gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, + gen_pool_free_owner(p2pdma->pool, (uintptr_t)addr, size, (void **) &ref); percpu_ref_put(ref); } @@ -725,9 +746,13 @@ EXPORT_SYMBOL_GPL(pci_free_p2pmem); */ pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) { + struct pci_p2pdma *p2pdma; + if (!addr) return 0; - if (!pdev->p2pdma) + + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + if (!p2pdma) return 0; /* @@ -735,7 +760,7 @@ pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) * bus address as the physical address. So gen_pool_virt_to_phys() * actually returns the bus address despite the misleading name. */ - return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr); + return gen_pool_virt_to_phys(p2pdma->pool, (unsigned long)addr); } EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus); @@ -806,19 +831,40 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl); */ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) { - if (pdev->p2pdma) - pdev->p2pdma->p2pmem_published = publish; + struct pci_p2pdma *p2pdma; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) + p2pdma->p2pmem_published = publish; + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); -static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider, - struct pci_dev *client) +static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, + struct device *dev) { + enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; + struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; + struct pci_dev *client; + struct pci_p2pdma *p2pdma; + if (!provider->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; - return xa_to_value(xa_load(&provider->p2pdma->map_types, - map_types_idx(client))); + if (!dev_is_pci(dev)) + return PCI_P2PDMA_MAP_NOT_SUPPORTED; + + client = to_pci_dev(dev); + + rcu_read_lock(); + p2pdma = rcu_dereference(provider->p2pdma); + + if (p2pdma) + type = xa_to_value(xa_load(&p2pdma->map_types, + map_types_idx(client))); + rcu_read_unlock(); + return type; } static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, @@ -853,14 +899,8 @@ int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, { struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(sg_page(sg)->pgmap); - struct pci_dev *client; - - if (WARN_ON_ONCE(!dev_is_pci(dev))) - return 0; - client = to_pci_dev(dev); - - switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) { + switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) { case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: return dma_map_sg_attrs(dev, sg, nents, dir, attrs); case PCI_P2PDMA_MAP_BUS_ADDR: @@ -884,17 +924,9 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct pci_p2pdma_pagemap *p2p_pgmap = - to_p2p_pgmap(sg_page(sg)->pgmap); enum pci_p2pdma_map_type map_type; - struct pci_dev *client; - - if (WARN_ON_ONCE(!dev_is_pci(dev))) - return; - - client = to_pci_dev(dev); - map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client); + map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev); if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b717680377a9..452351025a09 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5020,6 +5020,16 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) return pci_reset_hotplug_slot(dev->slot->hotplug, probe); } +static int pci_reset_bus_function(struct pci_dev *dev, int probe) +{ + int rc; + + rc = pci_dev_reset_slot_function(dev, probe); + if (rc != -ENOTTY) + return rc; + return pci_parent_bus_reset(dev, probe); +} + static void pci_dev_lock(struct pci_dev *dev) { pci_cfg_access_lock(dev); @@ -5140,10 +5150,7 @@ int __pci_reset_function_locked(struct pci_dev *dev) rc = pci_pm_reset(dev, 0); if (rc != -ENOTTY) return rc; - rc = pci_dev_reset_slot_function(dev, 0); - if (rc != -ENOTTY) - return rc; - return pci_parent_bus_reset(dev, 0); + return pci_reset_bus_function(dev, 0); } EXPORT_SYMBOL_GPL(__pci_reset_function_locked); @@ -5175,11 +5182,8 @@ int pci_probe_reset_function(struct pci_dev *dev) rc = pci_pm_reset(dev, 1); if (rc != -ENOTTY) return rc; - rc = pci_dev_reset_slot_function(dev, 1); - if (rc != -ENOTTY) - return rc; - return pci_parent_bus_reset(dev, 1); + return pci_reset_bus_function(dev, 1); } /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 37c913bbc6e1..dac6922553b4 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -385,6 +385,8 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 +#define PCI_DPC_RECOVERED 1 +#define PCI_DPC_RECOVERING 2 static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) { @@ -439,10 +441,12 @@ void pci_restore_dpc_state(struct pci_dev *dev); void pci_dpc_init(struct pci_dev *pdev); void dpc_process_error(struct pci_dev *pdev); pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); +bool pci_dpc_recovered(struct pci_dev *pdev); #else static inline void pci_save_dpc_state(struct pci_dev *dev) {} static inline void pci_restore_dpc_state(struct pci_dev *dev) {} static inline void pci_dpc_init(struct pci_dev *pdev) {} +static inline bool pci_dpc_recovered(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_PCIEPORTBUS diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index ec943cee5ecc..9ae012ef9266 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -983,7 +983,7 @@ static void aer_recover_work_func(struct work_struct *work) pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, entry.devfn); if (!pdev) { - pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", + pr_err("no pci_dev for %04x:%02x:%02x.%x\n", entry.domain, entry.bus, PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; @@ -1022,7 +1022,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, &aer_recover_ring_lock)) schedule_work(&aer_recover_work); else - pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", + pr_err("buffer overflow in recovery for %04x:%02x:%02x.%x\n", domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); } EXPORT_SYMBOL_GPL(aer_recover_queue); diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e05aba86a317..c556e7beafe3 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -71,6 +71,58 @@ void pci_restore_dpc_state(struct pci_dev *dev) pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap); } +static DECLARE_WAIT_QUEUE_HEAD(dpc_completed_waitqueue); + +#ifdef CONFIG_HOTPLUG_PCI_PCIE +static bool dpc_completed(struct pci_dev *pdev) +{ + u16 status; + + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status); + if ((status != 0xffff) && (status & PCI_EXP_DPC_STATUS_TRIGGER)) + return false; + + if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags)) + return false; + + return true; +} + +/** + * pci_dpc_recovered - whether DPC triggered and has recovered successfully + * @pdev: PCI device + * + * Return true if DPC was triggered for @pdev and has recovered successfully. + * Wait for recovery if it hasn't completed yet. Called from the PCIe hotplug + * driver to recognize and ignore Link Down/Up events caused by DPC. + */ +bool pci_dpc_recovered(struct pci_dev *pdev) +{ + struct pci_host_bridge *host; + + if (!pdev->dpc_cap) + return false; + + /* + * Synchronization between hotplug and DPC is not supported + * if DPC is owned by firmware and EDR is not enabled. + */ + host = pci_find_host_bridge(pdev->bus); + if (!host->native_dpc && !IS_ENABLED(CONFIG_PCIE_EDR)) + return false; + + /* + * Need a timeout in case DPC never completes due to failure of + * dpc_wait_rp_inactive(). The spec doesn't mandate a time limit, + * but reports indicate that DPC completes within 4 seconds. + */ + wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev), + msecs_to_jiffies(4000)); + + return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); +} +#endif /* CONFIG_HOTPLUG_PCI_PCIE */ + static int dpc_wait_rp_inactive(struct pci_dev *pdev) { unsigned long timeout = jiffies + HZ; @@ -91,8 +143,11 @@ static int dpc_wait_rp_inactive(struct pci_dev *pdev) pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { + pci_ers_result_t ret; u16 cap; + set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); + /* * DPC disables the Link automatically in hardware, so it has * already been reset by the time we get here. @@ -106,18 +161,27 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) if (!pcie_wait_for_link(pdev, false)) pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); - if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) - return PCI_ERS_RESULT_DISCONNECT; + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) { + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_DISCONNECT; + goto out; + } pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, PCI_EXP_DPC_STATUS_TRIGGER); if (!pcie_wait_for_link(pdev, true)) { pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n"); - return PCI_ERS_RESULT_DISCONNECT; + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_DISCONNECT; + } else { + set_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_RECOVERED; } - - return PCI_ERS_RESULT_RECOVERED; +out: + clear_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); + wake_up_all(&dpc_completed_waitqueue); + return ret; } static void dpc_process_rp_pio_error(struct pci_dev *pdev) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index bd862b612633..72fccb86330f 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2268,6 +2268,7 @@ static void pci_release_dev(struct device *dev) pci_bus_put(pci_dev->bus); kfree(pci_dev->driver_override); bitmap_free(pci_dev->dma_alias_mask); + dev_dbg(dev, "device released\n"); kfree(pci_dev); } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dcb229de1acb..8a4f56eebcef 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -27,6 +27,7 @@ #include <linux/nvme.h> #include <linux/platform_data/x86/apple.h> #include <linux/pm_runtime.h> +#include <linux/suspend.h> #include <linux/switchtec.h> #include <asm/dma.h> /* isa_dma_bridge_buggy */ #include "pci.h" @@ -3634,6 +3635,16 @@ static void quirk_apple_poweroff_thunderbolt(struct pci_dev *dev) return; if (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) return; + + /* + * SXIO/SXFP/SXLF turns off power to the Thunderbolt controller. + * We don't know how to turn it back on again, but firmware does, + * so we can only use SXIO/SXFP/SXLF if we're suspending via + * firmware. + */ + if (!pm_suspend_via_firmware()) + return; + bridge = ACPI_HANDLE(&dev->dev); if (!bridge) return; @@ -3901,6 +3912,69 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe) return 0; } +#define PCI_DEVICE_ID_HINIC_VF 0x375E +#define HINIC_VF_FLR_TYPE 0x1000 +#define HINIC_VF_FLR_CAP_BIT (1UL << 30) +#define HINIC_VF_OP 0xE80 +#define HINIC_VF_FLR_PROC_BIT (1UL << 18) +#define HINIC_OPERATION_TIMEOUT 15000 /* 15 seconds */ + +/* Device-specific reset method for Huawei Intelligent NIC virtual functions */ +static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe) +{ + unsigned long timeout; + void __iomem *bar; + u32 val; + + if (probe) + return 0; + + bar = pci_iomap(pdev, 0, 0); + if (!bar) + return -ENOTTY; + + /* Get and check firmware capabilities */ + val = ioread32be(bar + HINIC_VF_FLR_TYPE); + if (!(val & HINIC_VF_FLR_CAP_BIT)) { + pci_iounmap(pdev, bar); + return -ENOTTY; + } + + /* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */ + val = ioread32be(bar + HINIC_VF_OP); + val = val | HINIC_VF_FLR_PROC_BIT; + iowrite32be(val, bar + HINIC_VF_OP); + + pcie_flr(pdev); + + /* + * The device must recapture its Bus and Device Numbers after FLR + * in order generate Completions. Issue a config write to let the + * device capture this information. + */ + pci_write_config_word(pdev, PCI_VENDOR_ID, 0); + + /* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */ + timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT); + do { + val = ioread32be(bar + HINIC_VF_OP); + if (!(val & HINIC_VF_FLR_PROC_BIT)) + goto reset_complete; + msleep(20); + } while (time_before(jiffies, timeout)); + + val = ioread32be(bar + HINIC_VF_OP); + if (!(val & HINIC_VF_FLR_PROC_BIT)) + goto reset_complete; + + pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val); + +reset_complete: + pci_iounmap(pdev, bar); + + return 0; +} + static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF, reset_intel_82599_sfp_virtfn }, @@ -3913,6 +3987,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr }, { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, reset_chelsio_generic_dev }, + { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF, + reset_hinic_vf_dev }, { 0 } }; diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index fbdadd4d8377..adea5a4771cf 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -55,6 +55,7 @@ struct pci_ecam_ops { struct pci_config_window { struct resource res; struct resource busr; + unsigned int bus_shift; void *priv; const struct pci_ecam_ops *ops; union { diff --git a/include/linux/pci.h b/include/linux/pci.h index c20211e59a57..58a39c7239f3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -497,7 +497,7 @@ struct pci_dev { u16 pasid_features; #endif #ifdef CONFIG_PCI_P2PDMA - struct pci_p2pdma *p2pdma; + struct pci_p2pdma __rcu *p2pdma; #endif u16 acs_cap; /* ACS Capability offset */ phys_addr_t rom; /* Physical address if not from BAR */ |