From 6389d43745228de128e7b1a66eb18c0ccf43e6b4 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:04 -0600 Subject: PCI/P2PDMA: Rename upstream_bridge_distance() and rework doc The function upstream_bridge_distance() has evolved such that its name is no longer entirely reflective of what the function does. It not only calculates the distance between two peers but also calculates how the DMA addresses for those two peers should be mapped. Rename it to calc_map_type_and_dist() and rework the documentation to better describe the two pieces of information the function returns. [bhelgaas: tweak comment wording] Link: https://lore.kernel.org/r/20210610160609.28447-2-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 73 +++++++++++++++++++++++++++------------------------- 1 file changed, 38 insertions(+), 35 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 196382630363..a860137fac89 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -354,7 +354,7 @@ static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b) } static enum pci_p2pdma_map_type -__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, +__calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, int *dist, bool *acs_redirects, struct seq_buf *acs_list) { struct pci_dev *a = provider, *b = client, *bb; @@ -433,52 +433,55 @@ static unsigned long map_types_idx(struct pci_dev *client) } /* - * Find the distance through the nearest common upstream bridge between - * two PCI devices. + * Calculate the P2PDMA mapping type and distance between two PCI devices. * - * If the two devices are the same device then 0 will be returned. + * If the two devices are the same PCI function, return + * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 0. * - * If there are two virtual functions of the same device behind the same - * bridge port then 2 will be returned (one step down to the PCIe switch, - * then one step back to the same device). + * If they are two functions of the same device, return + * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 2 (one hop up to the bridge, + * then one hop back down to another function of the same device). * - * In the case where two devices are connected to the same PCIe switch, the - * value 4 will be returned. This corresponds to the following PCI tree: + * In the case where two devices are connected to the same PCIe switch, + * return a distance of 4. This corresponds to the following PCI tree: * * -+ Root Port * \+ Switch Upstream Port - * +-+ Switch Downstream Port + * +-+ Switch Downstream Port 0 * + \- Device A - * \-+ Switch Downstream Port + * \-+ Switch Downstream Port 1 * \- Device B * - * The distance is 4 because we traverse from Device A through the downstream - * port of the switch, to the common upstream port, back up to the second - * downstream port and then to Device B. + * The distance is 4 because we traverse from Device A to Downstream Port 0 + * to the common Switch Upstream Port, back down to Downstream Port 1 and + * then to Device B. The mapping type returned depends on the ACS + * redirection setting of the ports along the path. * - * Any two devices that cannot communicate using p2pdma will return - * PCI_P2PDMA_MAP_NOT_SUPPORTED. + * If ACS redirect is set on any port in the path, traffic between the + * devices will go through the host bridge, so return + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; otherwise return + * PCI_P2PDMA_MAP_BUS_ADDR. * * Any two devices that have a data path that goes through the host bridge - * will consult a whitelist. If the host bridges are on the whitelist, - * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE. - * - * If either bridge is not on the whitelist this function returns + * will consult a whitelist. If the host bridge is in the whitelist, return + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of + * ports per above. If the device is not in the whitelist, return * PCI_P2PDMA_MAP_NOT_SUPPORTED. * - * If a bridge which has any ACS redirection bits set is in the path, - * acs_redirects will be set to true. In this case, a list of all infringing - * bridge addresses will be populated in acs_list (assuming it's non-null) - * for printk purposes. + * If any ACS redirect bits are set, then acs_redirects boolean will be set + * to true and their PCI device names will be appended to the acs_list + * seq_buf. This seq_buf is used to print a warning informing the user how + * to disable ACS using a command line parameter. (See + * calc_map_type_and_dist_warn() below) */ static enum pci_p2pdma_map_type -upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, +calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, int *dist, bool *acs_redirects, struct seq_buf *acs_list) { enum pci_p2pdma_map_type map_type; - map_type = __upstream_bridge_distance(provider, client, dist, - acs_redirects, acs_list); + map_type = __calc_map_type_and_dist(provider, client, dist, + acs_redirects, acs_list); if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { if (!cpu_supports_p2pdma() && @@ -494,8 +497,8 @@ upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, } static enum pci_p2pdma_map_type -upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, - int *dist) +calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, + int *dist) { struct seq_buf acs_list; bool acs_redirects; @@ -505,8 +508,8 @@ upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, if (!acs_list.buffer) return -ENOMEM; - ret = upstream_bridge_distance(provider, client, dist, &acs_redirects, - &acs_list); + ret = calc_map_type_and_dist(provider, client, dist, &acs_redirects, + &acs_list); if (acs_redirects) { pci_warn(client, "ACS redirect is set between the client and provider (%s)\n", pci_name(provider)); @@ -565,11 +568,11 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, } if (verbose) - ret = upstream_bridge_distance_warn(provider, - pci_client, &distance); + ret = calc_map_type_and_dist_warn(provider, pci_client, + &distance); else - ret = upstream_bridge_distance(provider, pci_client, - &distance, NULL, NULL); + ret = calc_map_type_and_dist(provider, pci_client, + &distance, NULL, NULL); pci_dev_put(pci_client); -- cgit v1.2.3 From e4ece59abd70d8f54e2163274dc996bb442832a6 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:05 -0600 Subject: PCI/P2PDMA: Collect acs list in stack buffer to avoid sleeping In order to call the calc_map_type_and_dist_warn() function from a dma_map operation, the function must not sleep. The only reason it sleeps is to allocate memory for the seq_buf to print a verbose warning telling the user how to disable ACS for that path. Instead of allocating the memory with kmalloc(), allocate a smaller buffer on the stack. A 128 byte buffer is enough to print 10 PCI device names. A system with 10 bridge ports between two devices that have ACS enabled would be unusually large, so this should still be a reasonable limit. This also cleans up the awkward (and broken) return with -ENOMEM which contradicts the return type and the caller was not prepared for. Link: https://lore.kernel.org/r/20210610160609.28447-3-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index a860137fac89..109bd7321962 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -502,11 +502,10 @@ calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, { struct seq_buf acs_list; bool acs_redirects; + char buf[128]; int ret; - seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); - if (!acs_list.buffer) - return -ENOMEM; + seq_buf_init(&acs_list, buf, sizeof(buf)); ret = calc_map_type_and_dist(provider, client, dist, &acs_redirects, &acs_list); @@ -524,8 +523,6 @@ calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, pci_name(provider)); } - kfree(acs_list.buffer); - return ret; } -- cgit v1.2.3 From f9c125b9eb30650356cf582003365b1ecbd7003b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:06 -0600 Subject: PCI/P2PDMA: Use correct calc_map_type_and_dist() return type Instead of using an int for the return value of this function, use the correct enum pci_p2pdma_map_type. Link: https://lore.kernel.org/r/20210610160609.28447-4-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 109bd7321962..abcdb2a6b1c0 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -546,11 +546,11 @@ calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose) { + enum pci_p2pdma_map_type map; bool not_supported = false; struct pci_dev *pci_client; int total_dist = 0; - int distance; - int i, ret; + int i, distance; if (num_clients == 0) return -1; @@ -565,15 +565,15 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, } if (verbose) - ret = calc_map_type_and_dist_warn(provider, pci_client, + map = calc_map_type_and_dist_warn(provider, pci_client, &distance); else - ret = calc_map_type_and_dist(provider, pci_client, + map = calc_map_type_and_dist(provider, pci_client, &distance, NULL, NULL); pci_dev_put(pci_client); - if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) + if (map == PCI_P2PDMA_MAP_NOT_SUPPORTED) not_supported = true; if (not_supported && !verbose) -- cgit v1.2.3 From cf201bfe8cdc9ba11c4f312945b908ed24c7b7b5 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:07 -0600 Subject: PCI/P2PDMA: Warn if host bridge not in whitelist If the host bridge is not in the whitelist print a warning in the calc_map_type_and_dist_warn() path detailing the vendor and device IDs that would need to be added to the whitelist. Suggested-by: Don Dutile Link: https://lore.kernel.org/r/20210610160609.28447-5-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index abcdb2a6b1c0..c07c928e3952 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -309,7 +309,7 @@ static const struct pci_p2pdma_whitelist_entry { }; static bool __host_bridge_whitelist(struct pci_host_bridge *host, - bool same_host_bridge) + bool same_host_bridge, bool warn) { struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); const struct pci_p2pdma_whitelist_entry *entry; @@ -331,6 +331,10 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, return true; } + if (warn) + pci_warn(root, "Host bridge not in P2PDMA whitelist: %04x:%04x\n", + vendor, device); + return false; } @@ -338,16 +342,17 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, * If we can't find a common upstream bridge take a look at the root * complex and compare it to a whitelist of known good hardware. */ -static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b) +static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, + bool warn) { struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus); struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus); if (host_a == host_b) - return __host_bridge_whitelist(host_a, true); + return __host_bridge_whitelist(host_a, true, warn); - if (__host_bridge_whitelist(host_a, false) && - __host_bridge_whitelist(host_b, false)) + if (__host_bridge_whitelist(host_a, false, warn) && + __host_bridge_whitelist(host_b, false, warn)) return true; return false; @@ -485,7 +490,7 @@ calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { if (!cpu_supports_p2pdma() && - !host_bridge_whitelist(provider, client)) + !host_bridge_whitelist(provider, client, acs_redirects)) map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } -- cgit v1.2.3 From 7e2faa1710c408712185bb6463eaa0ee4776350f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:08 -0600 Subject: PCI/P2PDMA: Refactor pci_p2pdma_map_type() All callers of pci_p2pdma_map_type() have a struct dev_pgmap and a struct device (of the client doing the DMA transfer). Thus move the conversion to struct pci_devs for the provider and client into this function. Link: https://lore.kernel.org/r/20210610160609.28447-6-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index c07c928e3952..0bb6792b828c 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -816,12 +816,20 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); -static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider, - struct pci_dev *client) +static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, + struct device *dev) { + struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; + struct pci_dev *client; + if (!provider->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; + if (!dev_is_pci(dev)) + return PCI_P2PDMA_MAP_NOT_SUPPORTED; + + client = to_pci_dev(dev); + return xa_to_value(xa_load(&provider->p2pdma->map_types, map_types_idx(client))); } @@ -858,14 +866,8 @@ int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, { struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(sg_page(sg)->pgmap); - struct pci_dev *client; - - if (WARN_ON_ONCE(!dev_is_pci(dev))) - return 0; - client = to_pci_dev(dev); - - switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) { + switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) { case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: return dma_map_sg_attrs(dev, sg, nents, dir, attrs); case PCI_P2PDMA_MAP_BUS_ADDR: @@ -889,17 +891,9 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct pci_p2pdma_pagemap *p2p_pgmap = - to_p2p_pgmap(sg_page(sg)->pgmap); enum pci_p2pdma_map_type map_type; - struct pci_dev *client; - - if (WARN_ON_ONCE(!dev_is_pci(dev))) - return; - - client = to_pci_dev(dev); - map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client); + map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev); if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); -- cgit v1.2.3 From 3ec0c3ec2d92c09465534a1ff9c6f9d9506ffef6 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:09 -0600 Subject: PCI/P2PDMA: Avoid pci_get_slot(), which may sleep In order to use upstream_bridge_distance_warn() from a dma_map function, it must not sleep. However, pci_get_slot() takes the pci_bus_sem so it might sleep. In order to avoid this, try to get the host bridge's device from the first element in the device list. It should be impossible for the host bridge's device to go away while references are held on child devices, so the first element should not be able to change and, thus, this should be safe. Introduce a static function called pci_host_bridge_dev() to obtain the host bridge's root device. Link: https://lore.kernel.org/r/20210610160609.28447-7-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 0bb6792b828c..deb097ceaf41 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -308,10 +308,41 @@ static const struct pci_p2pdma_whitelist_entry { {} }; +/* + * This lookup function tries to find the PCI device corresponding to a given + * host bridge. + * + * It assumes the host bridge device is the first PCI device in the + * bus->devices list and that the devfn is 00.0. These assumptions should hold + * for all the devices in the whitelist above. + * + * This function is equivalent to pci_get_slot(host->bus, 0), however it does + * not take the pci_bus_sem lock seeing __host_bridge_whitelist() must not + * sleep. + * + * For this to be safe, the caller should hold a reference to a device on the + * bridge, which should ensure the host_bridge device will not be freed + * or removed from the head of the devices list. + */ +static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host) +{ + struct pci_dev *root; + + root = list_first_entry_or_null(&host->bus->devices, + struct pci_dev, bus_list); + + if (!root) + return NULL; + if (root->devfn != PCI_DEVFN(0, 0)) + return NULL; + + return root; +} + static bool __host_bridge_whitelist(struct pci_host_bridge *host, bool same_host_bridge, bool warn) { - struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); + struct pci_dev *root = pci_host_bridge_dev(host); const struct pci_p2pdma_whitelist_entry *entry; unsigned short vendor, device; @@ -320,7 +351,6 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, vendor = root->vendor; device = root->device; - pci_dev_put(root); for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) { if (vendor != entry->vendor || device != entry->device) -- cgit v1.2.3 From d1b8dc09dd71248f5098792af98caa497ec66d19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 07:53:10 +0200 Subject: PCI/P2PDMA: Simplify distance calculation Merge __calc_map_type_and_dist() and calc_map_type_and_dist_warn() into calc_map_type_and_dist() to simplify the code a bit. This now means we add the devfn strings to the acs_buf unconditionally even if the buffer is not printed, but that is not a lot of overhead and keeps the code much simpler. Link: https://lore.kernel.org/r/20210614055310.3960791-1-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/p2pdma.c | 190 ++++++++++++++++++++------------------------------- 1 file changed, 73 insertions(+), 117 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index deb097ceaf41..ca2574debb2d 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -388,79 +388,6 @@ static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, return false; } -static enum pci_p2pdma_map_type -__calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, - int *dist, bool *acs_redirects, struct seq_buf *acs_list) -{ - struct pci_dev *a = provider, *b = client, *bb; - int dist_a = 0; - int dist_b = 0; - int acs_cnt = 0; - - if (acs_redirects) - *acs_redirects = false; - - /* - * Note, we don't need to take references to devices returned by - * pci_upstream_bridge() seeing we hold a reference to a child - * device which will already hold a reference to the upstream bridge. - */ - - while (a) { - dist_b = 0; - - if (pci_bridge_has_acs_redir(a)) { - seq_buf_print_bus_devfn(acs_list, a); - acs_cnt++; - } - - bb = b; - - while (bb) { - if (a == bb) - goto check_b_path_acs; - - bb = pci_upstream_bridge(bb); - dist_b++; - } - - a = pci_upstream_bridge(a); - dist_a++; - } - - if (dist) - *dist = dist_a + dist_b; - - return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; - -check_b_path_acs: - bb = b; - - while (bb) { - if (a == bb) - break; - - if (pci_bridge_has_acs_redir(bb)) { - seq_buf_print_bus_devfn(acs_list, bb); - acs_cnt++; - } - - bb = pci_upstream_bridge(bb); - } - - if (dist) - *dist = dist_a + dist_b; - - if (acs_cnt) { - if (acs_redirects) - *acs_redirects = true; - - return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; - } - - return PCI_P2PDMA_MAP_BUS_ADDR; -} - static unsigned long map_types_idx(struct pci_dev *client) { return (pci_domain_nr(client->bus) << 16) | @@ -502,63 +429,96 @@ static unsigned long map_types_idx(struct pci_dev *client) * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of * ports per above. If the device is not in the whitelist, return * PCI_P2PDMA_MAP_NOT_SUPPORTED. - * - * If any ACS redirect bits are set, then acs_redirects boolean will be set - * to true and their PCI device names will be appended to the acs_list - * seq_buf. This seq_buf is used to print a warning informing the user how - * to disable ACS using a command line parameter. (See - * calc_map_type_and_dist_warn() below) */ static enum pci_p2pdma_map_type calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, - int *dist, bool *acs_redirects, struct seq_buf *acs_list) + int *dist, bool verbose) { - enum pci_p2pdma_map_type map_type; + enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; + struct pci_dev *a = provider, *b = client, *bb; + bool acs_redirects = false; + struct seq_buf acs_list; + int acs_cnt = 0; + int dist_a = 0; + int dist_b = 0; + char buf[128]; + + seq_buf_init(&acs_list, buf, sizeof(buf)); + + /* + * Note, we don't need to take references to devices returned by + * pci_upstream_bridge() seeing we hold a reference to a child + * device which will already hold a reference to the upstream bridge. + */ + while (a) { + dist_b = 0; - map_type = __calc_map_type_and_dist(provider, client, dist, - acs_redirects, acs_list); + if (pci_bridge_has_acs_redir(a)) { + seq_buf_print_bus_devfn(&acs_list, a); + acs_cnt++; + } - if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { - if (!cpu_supports_p2pdma() && - !host_bridge_whitelist(provider, client, acs_redirects)) - map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; + bb = b; + + while (bb) { + if (a == bb) + goto check_b_path_acs; + + bb = pci_upstream_bridge(bb); + dist_b++; + } + + a = pci_upstream_bridge(a); + dist_a++; } - if (provider->p2pdma) - xa_store(&provider->p2pdma->map_types, map_types_idx(client), - xa_mk_value(map_type), GFP_KERNEL); + *dist = dist_a + dist_b; + goto map_through_host_bridge; - return map_type; -} +check_b_path_acs: + bb = b; -static enum pci_p2pdma_map_type -calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, - int *dist) -{ - struct seq_buf acs_list; - bool acs_redirects; - char buf[128]; - int ret; + while (bb) { + if (a == bb) + break; - seq_buf_init(&acs_list, buf, sizeof(buf)); + if (pci_bridge_has_acs_redir(bb)) { + seq_buf_print_bus_devfn(&acs_list, bb); + acs_cnt++; + } - ret = calc_map_type_and_dist(provider, client, dist, &acs_redirects, - &acs_list); - if (acs_redirects) { + bb = pci_upstream_bridge(bb); + } + + *dist = dist_a + dist_b; + + if (!acs_cnt) { + map_type = PCI_P2PDMA_MAP_BUS_ADDR; + goto done; + } + + if (verbose) { + acs_list.buffer[acs_list.len-1] = 0; /* drop final semicolon */ pci_warn(client, "ACS redirect is set between the client and provider (%s)\n", pci_name(provider)); - /* Drop final semicolon */ - acs_list.buffer[acs_list.len-1] = 0; pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n", acs_list.buffer); } + acs_redirects = true; - if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) { - pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", - pci_name(provider)); +map_through_host_bridge: + if (!cpu_supports_p2pdma() && + !host_bridge_whitelist(provider, client, acs_redirects)) { + if (verbose) + pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", + pci_name(provider)); + map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } - - return ret; +done: + if (provider->p2pdma) + xa_store(&provider->p2pdma->map_types, map_types_idx(client), + xa_mk_value(map_type), GFP_KERNEL); + return map_type; } /** @@ -599,12 +559,8 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; } - if (verbose) - map = calc_map_type_and_dist_warn(provider, pci_client, - &distance); - else - map = calc_map_type_and_dist(provider, pci_client, - &distance, NULL, NULL); + map = calc_map_type_and_dist(provider, pci_client, &distance, + verbose); pci_dev_put(pci_client); -- cgit v1.2.3 From ae21f835a5bda0ef1d00940373445693a764d89e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Jul 2021 16:48:23 -0500 Subject: PCI/P2PDMA: Finish RCU conversion of pdev->p2pdma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking at pci_alloc_p2pmem() I found RCU protection was not properly applied there, as pdev->p2pdma was potentially read multiple times. Fix pci_alloc_p2pmem(), add __rcu qualifier to p2pdma field of struct pci_dev, and fix all other accesses to this field with proper RCU verbs. Link: https://lore.kernel.org/r/20210701095621.3129283-1-eric.dumazet@gmail.com Fixes: 1570175abd16 ("PCI/P2PDMA: track pgmap references per resource, not globally") Signed-off-by: Eric Dumazet Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Cc: Dan Williams Cc: Ira Weiny Cc: Greg Kroah-Hartman Cc: "Jérôme Glisse" Cc: "Rafael J. Wysocki" --- drivers/pci/p2pdma.c | 97 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 25 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index ca2574debb2d..69c25e71590a 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -48,10 +48,14 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; size_t size = 0; - if (pdev->p2pdma->pool) - size = gen_pool_size(pdev->p2pdma->pool); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma && p2pdma->pool) + size = gen_pool_size(p2pdma->pool); + rcu_read_unlock(); return scnprintf(buf, PAGE_SIZE, "%zd\n", size); } @@ -61,10 +65,14 @@ static ssize_t available_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; size_t avail = 0; - if (pdev->p2pdma->pool) - avail = gen_pool_avail(pdev->p2pdma->pool); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma && p2pdma->pool) + avail = gen_pool_avail(p2pdma->pool); + rcu_read_unlock(); return scnprintf(buf, PAGE_SIZE, "%zd\n", avail); } @@ -74,9 +82,16 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; + bool published = false; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) + published = p2pdma->p2pmem_published; + rcu_read_unlock(); - return scnprintf(buf, PAGE_SIZE, "%d\n", - pdev->p2pdma->p2pmem_published); + return scnprintf(buf, PAGE_SIZE, "%d\n", published); } static DEVICE_ATTR_RO(published); @@ -95,8 +110,9 @@ static const struct attribute_group p2pmem_group = { static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; - struct pci_p2pdma *p2pdma = pdev->p2pdma; + struct pci_p2pdma *p2pdma; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); if (!p2pdma) return; @@ -128,16 +144,14 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (error) goto out_pool_destroy; - pdev->p2pdma = p2p; - error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); if (error) goto out_pool_destroy; + rcu_assign_pointer(pdev->p2pdma, p2p); return 0; out_pool_destroy: - pdev->p2pdma = NULL; gen_pool_destroy(p2p->pool); out: devm_kfree(&pdev->dev, p2p); @@ -159,6 +173,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, { struct pci_p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; + struct pci_p2pdma *p2pdma; void *addr; int error; @@ -200,7 +215,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } - error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, range_len(&pgmap->range), dev_to_node(&pdev->dev), pgmap->ref); @@ -437,6 +453,7 @@ calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; struct pci_dev *a = provider, *b = client, *bb; bool acs_redirects = false; + struct pci_p2pdma *p2pdma; struct seq_buf acs_list; int acs_cnt = 0; int dist_a = 0; @@ -515,9 +532,12 @@ map_through_host_bridge: map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } done: - if (provider->p2pdma) - xa_store(&provider->p2pdma->map_types, map_types_idx(client), + rcu_read_lock(); + p2pdma = rcu_dereference(provider->p2pdma); + if (p2pdma) + xa_store(&p2pdma->map_types, map_types_idx(client), xa_mk_value(map_type), GFP_KERNEL); + rcu_read_unlock(); return map_type; } @@ -586,7 +606,15 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); */ bool pci_has_p2pmem(struct pci_dev *pdev) { - return pdev->p2pdma && pdev->p2pdma->p2pmem_published; + struct pci_p2pdma *p2pdma; + bool res; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + res = p2pdma && p2pdma->p2pmem_published; + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(pci_has_p2pmem); @@ -666,6 +694,7 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) { void *ret = NULL; struct percpu_ref *ref; + struct pci_p2pdma *p2pdma; /* * Pairs with synchronize_rcu() in pci_p2pdma_release() to @@ -673,16 +702,16 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) * read-lock. */ rcu_read_lock(); - if (unlikely(!pdev->p2pdma)) + p2pdma = rcu_dereference(pdev->p2pdma); + if (unlikely(!p2pdma)) goto out; - ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, - (void **) &ref); + ret = (void *)gen_pool_alloc_owner(p2pdma->pool, size, (void **) &ref); if (!ret) goto out; if (unlikely(!percpu_ref_tryget_live(ref))) { - gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); + gen_pool_free(p2pdma->pool, (unsigned long) ret, size); ret = NULL; goto out; } @@ -701,8 +730,9 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) { struct percpu_ref *ref; + struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); - gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, + gen_pool_free_owner(p2pdma->pool, (uintptr_t)addr, size, (void **) &ref); percpu_ref_put(ref); } @@ -716,9 +746,13 @@ EXPORT_SYMBOL_GPL(pci_free_p2pmem); */ pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) { + struct pci_p2pdma *p2pdma; + if (!addr) return 0; - if (!pdev->p2pdma) + + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + if (!p2pdma) return 0; /* @@ -726,7 +760,7 @@ pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) * bus address as the physical address. So gen_pool_virt_to_phys() * actually returns the bus address despite the misleading name. */ - return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr); + return gen_pool_virt_to_phys(p2pdma->pool, (unsigned long)addr); } EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus); @@ -797,16 +831,23 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl); */ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) { - if (pdev->p2pdma) - pdev->p2pdma->p2pmem_published = publish; + struct pci_p2pdma *p2pdma; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) + p2pdma->p2pmem_published = publish; + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, struct device *dev) { + enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; struct pci_dev *client; + struct pci_p2pdma *p2pdma; if (!provider->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; @@ -816,8 +857,14 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, client = to_pci_dev(dev); - return xa_to_value(xa_load(&provider->p2pdma->map_types, - map_types_idx(client))); + rcu_read_lock(); + p2pdma = rcu_dereference(provider->p2pdma); + + if (p2pdma) + type = xa_to_value(xa_load(&p2pdma->map_types, + map_types_idx(client))); + rcu_read_unlock(); + return type; } static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, -- cgit v1.2.3