diff options
author | Jens Axboe <axboe@kernel.dk> | 2020-07-08 08:02:13 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-07-08 08:02:13 -0600 |
commit | 482c6b614a4750f71ed9c928bb5b2007a05dd694 (patch) | |
tree | 60f9140b0c24fb4b2b1059afcce8ac77ecc3d3c1 /drivers/nvme | |
parent | 0e6e255e7a58cdf4ee4163f83deeb5ce4946051e (diff) | |
parent | dcb7fd82c75ee2d6e6f9d8cc71c52519ed52e258 (diff) |
Merge tag 'v5.8-rc4' into for-5.9/drivers
Merge in 5.8-rc4 for-5.9/block to setup for-5.9/drivers, to provide
a clean base and making the life for the NVMe changes easier.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
* tag 'v5.8-rc4': (732 commits)
Linux 5.8-rc4
x86/ldt: use "pr_info_once()" instead of open-coding it badly
MIPS: Do not use smp_processor_id() in preemptible code
MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen
.gitignore: Do not track `defconfig` from `make savedefconfig`
io_uring: fix regression with always ignoring signals in io_cqring_wait()
x86/ldt: Disable 16-bit segments on Xen PV
x86/entry/32: Fix #MC and #DB wiring on x86_32
x86/entry/xen: Route #DB correctly on Xen PV
x86/entry, selftests: Further improve user entry sanity checks
x86/entry/compat: Clear RAX high bits on Xen PV SYSENTER
i2c: mlxcpld: check correct size of maximum RECV_LEN packet
i2c: add Kconfig help text for slave mode
i2c: slave-eeprom: update documentation
i2c: eg20t: Load module automatically if ID matches
i2c: designware: platdrv: Set class based on DMI
i2c: algo-pca: Add 0x78 as SCL stuck low status for PCA9665
mm/page_alloc: fix documentation error
vmalloc: fix the owner argument for the new __vmalloc_node_range callers
mm/cma.c: use exact_nid true to fix possible per-numa cma leak
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 14 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 53 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 2 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 4 |
7 files changed, 54 insertions, 31 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5192a024dc1b..cd763f31864b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1116,10 +1116,16 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, dev_warn(ctrl->device, "Identify Descriptors failed (%d)\n", status); /* - * Don't treat an error as fatal, as we potentially already - * have a NGUID or EUI-64. + * Don't treat non-retryable errors as fatal, as we potentially + * already have a NGUID or EUI-64. If we failed with DNR set, + * we want to silently ignore the error as we can still + * identify the device, but if the status has DNR set, we want + * to propagate the error back specifically for the disk + * revalidation flow to make sure we don't abandon the + * device just because of a temporal retry-able error (such + * as path of transport errors). */ - if (status > 0 && !(status & NVME_SC_DNR)) + if (status > 0 && (status & NVME_SC_DNR)) status = 0; goto free_data; } @@ -1974,7 +1980,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); - revalidate_disk(ns->head->disk); } #endif return 0; @@ -4175,6 +4180,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; + ctrl->numa_node = NUMA_NO_NODE; INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a986ac52c4cc..5a37a595411e 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -407,15 +407,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) { struct nvme_ns_head *head = ns->head; - lockdep_assert_held(&ns->head->lock); - if (!head->disk) return; - if (!(head->disk->flags & GENHD_FL_UP)) + if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { int node, srcu_idx; @@ -424,9 +423,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) __nvme_find_path(head, node); srcu_read_unlock(&head->srcu, srcu_idx); } + mutex_unlock(&head->lock); - synchronize_srcu(&ns->head->srcu); - kblockd_schedule_work(&ns->head->requeue_work); + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); } static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, @@ -481,14 +481,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, struct nvme_ns *ns) { - mutex_lock(&ns->head->lock); ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); if (nvme_state_is_live(ns->ana_state)) nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); } static int nvme_update_ana_state(struct nvme_ctrl *ctrl, @@ -638,38 +636,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); -static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl, +static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { - struct nvme_ns *ns = data; + struct nvme_ana_group_desc *dst = data; - if (ns->ana_grpid == le32_to_cpu(desc->grpid)) { - nvme_update_ns_ana_state(desc, ns); - return -ENXIO; /* just break out of the loop */ - } + if (desc->grpid != dst->grpid) + return 0; - return 0; + *dst = *desc; + return -ENXIO; /* just break out of the loop */ } void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) { if (nvme_ctrl_use_ana(ns->ctrl)) { + struct nvme_ana_group_desc desc = { + .grpid = id->anagrpid, + .state = 0, + }; + mutex_lock(&ns->ctrl->ana_lock); ns->ana_grpid = le32_to_cpu(id->anagrpid); - nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state); + nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc); mutex_unlock(&ns->ctrl->ana_lock); + if (desc.state) { + /* found the group desc: update */ + nvme_update_ns_ana_state(&desc, ns); + } } else { - mutex_lock(&ns->head->lock); ns->ana_state = NVME_ANA_OPTIMIZED; nvme_mpath_set_live(ns); - mutex_unlock(&ns->head->lock); } if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct backing_dev_info *info = - ns->head->disk->queue->backing_dev_info; + struct gendisk *disk = ns->head->disk; - info->capabilities |= BDI_CAP_STABLE_WRITES; + if (disk) + disk->queue->backing_dev_info->capabilities |= + BDI_CAP_STABLE_WRITES; } } @@ -684,6 +689,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); blk_cleanup_queue(head->disk->queue); + if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + /* + * if device_add_disk wasn't called, prevent + * disk release to put a bogus reference on the + * request queue + */ + head->disk->queue = NULL; + } put_disk(head->disk); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9f2b0e0b4558..b5a2e8b7e0be 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -364,6 +364,8 @@ struct nvme_ns_head { spinlock_t requeue_lock; struct work_struct requeue_work; struct mutex lock; + unsigned long flags; +#define NVME_NSHEAD_DISK_LIVE 0 struct nvme_ns __rcu *current_path[]; #endif }; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0b3fedc9c6fe..74a2e2e00794 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1594,7 +1594,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; dev->admin_tagset.timeout = ADMIN_TIMEOUT; - dev->admin_tagset.numa_node = dev_to_node(dev->dev); + dev->admin_tagset.numa_node = dev->ctrl.numa_node; dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.driver_data = dev; @@ -1670,6 +1670,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) if (result) return result; + dev->ctrl.numa_node = dev_to_node(dev->dev); + nvmeq = &dev->queues[0]; aqa = nvmeq->q_depth - 1; aqa |= aqa << 16; @@ -2258,7 +2260,7 @@ static void nvme_dev_add(struct nvme_dev *dev) if (dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.nr_maps++; dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev_to_node(dev->dev); + dev->tagset.numa_node = dev->ctrl.numa_node; dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; dev->tagset.cmd_size = sizeof(struct nvme_iod); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6d5c4495f352..e881f879ac63 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -471,7 +471,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) * Spread I/O queues completion vectors according their queue index. * Admin queues can always go on completion vector 0. */ - comp_vector = idx == 0 ? idx : idx - 1; + comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors; /* Polling queues need direct cq polling context */ if (nvme_rdma_poll_queue(queue)) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 157b5b2f1a1f..7006aca89456 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1534,7 +1534,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->ops = &nvme_tcp_admin_mq_ops; set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ - set->numa_node = NUMA_NO_NODE; + set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; @@ -1546,7 +1546,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->ops = &nvme_tcp_mq_ops; set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ - set->numa_node = NUMA_NO_NODE; + set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 1b9c246a849b..8a0d4fe7bc18 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -341,7 +341,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); ctrl->admin_tag_set.driver_data = ctrl; @@ -513,7 +513,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->tag_set.ops = &nvme_loop_mq_ops; ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); |