summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-07-08 08:02:13 -0600
committerJens Axboe <axboe@kernel.dk>2020-07-08 08:02:13 -0600
commit482c6b614a4750f71ed9c928bb5b2007a05dd694 (patch)
tree60f9140b0c24fb4b2b1059afcce8ac77ecc3d3c1 /drivers/nvme
parent0e6e255e7a58cdf4ee4163f83deeb5ce4946051e (diff)
parentdcb7fd82c75ee2d6e6f9d8cc71c52519ed52e258 (diff)
Merge tag 'v5.8-rc4' into for-5.9/drivers
Merge in 5.8-rc4 for-5.9/block to setup for-5.9/drivers, to provide a clean base and making the life for the NVMe changes easier. Signed-off-by: Jens Axboe <axboe@kernel.dk> * tag 'v5.8-rc4': (732 commits) Linux 5.8-rc4 x86/ldt: use "pr_info_once()" instead of open-coding it badly MIPS: Do not use smp_processor_id() in preemptible code MIPS: Add missing EHB in mtc0 -> mfc0 sequence for DSPen .gitignore: Do not track `defconfig` from `make savedefconfig` io_uring: fix regression with always ignoring signals in io_cqring_wait() x86/ldt: Disable 16-bit segments on Xen PV x86/entry/32: Fix #MC and #DB wiring on x86_32 x86/entry/xen: Route #DB correctly on Xen PV x86/entry, selftests: Further improve user entry sanity checks x86/entry/compat: Clear RAX high bits on Xen PV SYSENTER i2c: mlxcpld: check correct size of maximum RECV_LEN packet i2c: add Kconfig help text for slave mode i2c: slave-eeprom: update documentation i2c: eg20t: Load module automatically if ID matches i2c: designware: platdrv: Set class based on DMI i2c: algo-pca: Add 0x78 as SCL stuck low status for PCA9665 mm/page_alloc: fix documentation error vmalloc: fix the owner argument for the new __vmalloc_node_range callers mm/cma.c: use exact_nid true to fix possible per-numa cma leak ...
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c14
-rw-r--r--drivers/nvme/host/multipath.c53
-rw-r--r--drivers/nvme/host/nvme.h2
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/host/rdma.c2
-rw-r--r--drivers/nvme/host/tcp.c4
-rw-r--r--drivers/nvme/target/loop.c4
7 files changed, 54 insertions, 31 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 5192a024dc1b..cd763f31864b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1116,10 +1116,16 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
dev_warn(ctrl->device,
"Identify Descriptors failed (%d)\n", status);
/*
- * Don't treat an error as fatal, as we potentially already
- * have a NGUID or EUI-64.
+ * Don't treat non-retryable errors as fatal, as we potentially
+ * already have a NGUID or EUI-64. If we failed with DNR set,
+ * we want to silently ignore the error as we can still
+ * identify the device, but if the status has DNR set, we want
+ * to propagate the error back specifically for the disk
+ * revalidation flow to make sure we don't abandon the
+ * device just because of a temporal retry-able error (such
+ * as path of transport errors).
*/
- if (status > 0 && !(status & NVME_SC_DNR))
+ if (status > 0 && (status & NVME_SC_DNR))
status = 0;
goto free_data;
}
@@ -1974,7 +1980,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
- revalidate_disk(ns->head->disk);
}
#endif
return 0;
@@ -4175,6 +4180,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->dev = dev;
ctrl->ops = ops;
ctrl->quirks = quirks;
+ ctrl->numa_node = NUMA_NO_NODE;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index a986ac52c4cc..5a37a595411e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -407,15 +407,14 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
- lockdep_assert_held(&ns->head->lock);
-
if (!head->disk)
return;
- if (!(head->disk->flags & GENHD_FL_UP))
+ if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_id_attr_groups);
+ mutex_lock(&head->lock);
if (nvme_path_is_optimized(ns)) {
int node, srcu_idx;
@@ -424,9 +423,10 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
__nvme_find_path(head, node);
srcu_read_unlock(&head->srcu, srcu_idx);
}
+ mutex_unlock(&head->lock);
- synchronize_srcu(&ns->head->srcu);
- kblockd_schedule_work(&ns->head->requeue_work);
+ synchronize_srcu(&head->srcu);
+ kblockd_schedule_work(&head->requeue_work);
}
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
@@ -481,14 +481,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns)
{
- mutex_lock(&ns->head->lock);
ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state))
nvme_mpath_set_live(ns);
- mutex_unlock(&ns->head->lock);
}
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
@@ -638,38 +636,45 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
}
DEVICE_ATTR_RO(ana_state);
-static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
+static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
- struct nvme_ns *ns = data;
+ struct nvme_ana_group_desc *dst = data;
- if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
- nvme_update_ns_ana_state(desc, ns);
- return -ENXIO; /* just break out of the loop */
- }
+ if (desc->grpid != dst->grpid)
+ return 0;
- return 0;
+ *dst = *desc;
+ return -ENXIO; /* just break out of the loop */
}
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{
if (nvme_ctrl_use_ana(ns->ctrl)) {
+ struct nvme_ana_group_desc desc = {
+ .grpid = id->anagrpid,
+ .state = 0,
+ };
+
mutex_lock(&ns->ctrl->ana_lock);
ns->ana_grpid = le32_to_cpu(id->anagrpid);
- nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
+ nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
mutex_unlock(&ns->ctrl->ana_lock);
+ if (desc.state) {
+ /* found the group desc: update */
+ nvme_update_ns_ana_state(&desc, ns);
+ }
} else {
- mutex_lock(&ns->head->lock);
ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns);
- mutex_unlock(&ns->head->lock);
}
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
- struct backing_dev_info *info =
- ns->head->disk->queue->backing_dev_info;
+ struct gendisk *disk = ns->head->disk;
- info->capabilities |= BDI_CAP_STABLE_WRITES;
+ if (disk)
+ disk->queue->backing_dev_info->capabilities |=
+ BDI_CAP_STABLE_WRITES;
}
}
@@ -684,6 +689,14 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
kblockd_schedule_work(&head->requeue_work);
flush_work(&head->requeue_work);
blk_cleanup_queue(head->disk->queue);
+ if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
+ /*
+ * if device_add_disk wasn't called, prevent
+ * disk release to put a bogus reference on the
+ * request queue
+ */
+ head->disk->queue = NULL;
+ }
put_disk(head->disk);
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9f2b0e0b4558..b5a2e8b7e0be 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -364,6 +364,8 @@ struct nvme_ns_head {
spinlock_t requeue_lock;
struct work_struct requeue_work;
struct mutex lock;
+ unsigned long flags;
+#define NVME_NSHEAD_DISK_LIVE 0
struct nvme_ns __rcu *current_path[];
#endif
};
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0b3fedc9c6fe..74a2e2e00794 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1594,7 +1594,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
- dev->admin_tagset.numa_node = dev_to_node(dev->dev);
+ dev->admin_tagset.numa_node = dev->ctrl.numa_node;
dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
dev->admin_tagset.driver_data = dev;
@@ -1670,6 +1670,8 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
if (result)
return result;
+ dev->ctrl.numa_node = dev_to_node(dev->dev);
+
nvmeq = &dev->queues[0];
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
@@ -2258,7 +2260,7 @@ static void nvme_dev_add(struct nvme_dev *dev)
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT;
- dev->tagset.numa_node = dev_to_node(dev->dev);
+ dev->tagset.numa_node = dev->ctrl.numa_node;
dev->tagset.queue_depth =
min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
dev->tagset.cmd_size = sizeof(struct nvme_iod);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 6d5c4495f352..e881f879ac63 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -471,7 +471,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
* Spread I/O queues completion vectors according their queue index.
* Admin queues can always go on completion vector 0.
*/
- comp_vector = idx == 0 ? idx : idx - 1;
+ comp_vector = (idx == 0 ? idx : idx - 1) % ibdev->num_comp_vectors;
/* Polling queues need direct cq polling context */
if (nvme_rdma_poll_queue(queue))
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 157b5b2f1a1f..7006aca89456 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1534,7 +1534,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->ops = &nvme_tcp_admin_mq_ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */
- set->numa_node = NUMA_NO_NODE;
+ set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
@@ -1546,7 +1546,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->ops = &nvme_tcp_mq_ops;
set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = 1; /* fabric connect */
- set->numa_node = NUMA_NO_NODE;
+ set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 1b9c246a849b..8a0d4fe7bc18 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -341,7 +341,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
- ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
@@ -513,7 +513,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
ctrl->tag_set.ops = &nvme_loop_mq_ops;
ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
ctrl->tag_set.reserved_tags = 1; /* fabric connect */
- ctrl->tag_set.numa_node = NUMA_NO_NODE;
+ ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
NVME_INLINE_SG_CNT * sizeof(struct scatterlist);