summaryrefslogtreecommitdiff
path: root/drivers/nvme
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-24 16:02:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-24 16:02:14 -0700
commit7fbc78e3155a0c464bd832efc07fb3c2355fe9bd (patch)
tree729b33cb5762e890fa228e2a618be2a3d459ce0d /drivers/nvme
parent7f8b40e3dbcd7dbeabe6be8f157376ef0b890e06 (diff)
parent096c7a6d90082586ff265d99e8e4a052dee3a403 (diff)
Merge tag 'for-linus-20190524' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - NVMe pull request from Keith, with fixes from a few folks. - bio and sbitmap before atomic barrier fixes (Andrea) - Hang fix for blk-mq freeze and unfreeze (Bob) - Single segment count regression fix (Christoph) - AoE now has a new maintainer - tools/io_uring/ Makefile fix, and sync with liburing (me) * tag 'for-linus-20190524' of git://git.kernel.dk/linux-block: (23 commits) tools/io_uring: sync with liburing tools/io_uring: fix Makefile for pthread library link blk-mq: fix hang caused by freeze/unfreeze sequence block: remove the bi_seg_{front,back}_size fields in struct bio block: remove the segment size check in bio_will_gap block: force an unlimited segment size on queues with a virt boundary block: don't decrement nr_phys_segments for physically contigous segments sbitmap: fix improper use of smp_mb__before_atomic() bio: fix improper use of smp_mb__before_atomic() aoe: list new maintainer for aoe driver nvme-pci: use blk-mq mapping for unmanaged irqs nvme: update MAINTAINERS nvme: copy MTFA field from identify controller nvme: fix memory leak for power latency tolerance nvme: release namespace SRCU protection before performing controller ioctls nvme: merge nvme_ns_ioctl into nvme_ioctl nvme: remove the ifdef around nvme_nvm_ioctl nvme: fix srcu locking on error return in nvme_get_ns_from_disk nvme: Fix known effects nvme-pci: Sync queues on reset ...
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/core.c89
-rw-r--r--drivers/nvme/host/nvme.h1
-rw-r--r--drivers/nvme/host/pci.c27
3 files changed, 76 insertions, 41 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7da80f375315..1b7c2afd84cb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1257,9 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return 0;
}
- effects |= nvme_known_admin_effects(opcode);
if (ctrl->effects)
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+ effects |= nvme_known_admin_effects(opcode);
/*
* For simplicity, IO to all namespaces is quiesced even if the command
@@ -1361,9 +1361,14 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
{
#ifdef CONFIG_NVME_MULTIPATH
if (disk->fops == &nvme_ns_head_ops) {
+ struct nvme_ns *ns;
+
*head = disk->private_data;
*srcu_idx = srcu_read_lock(&(*head)->srcu);
- return nvme_find_path(*head);
+ ns = nvme_find_path(*head);
+ if (!ns)
+ srcu_read_unlock(&(*head)->srcu, *srcu_idx);
+ return ns;
}
#endif
*head = NULL;
@@ -1377,42 +1382,56 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
srcu_read_unlock(&head->srcu, idx);
}
-static int nvme_ns_ioctl(struct nvme_ns *ns, unsigned cmd, unsigned long arg)
+static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
{
+ struct nvme_ns_head *head = NULL;
+ void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+ int srcu_idx, ret;
+
+ ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+ if (unlikely(!ns))
+ return -EWOULDBLOCK;
+
+ /*
+ * Handle ioctls that apply to the controller instead of the namespace
+ * seperately and drop the ns SRCU reference early. This avoids a
+ * deadlock when deleting namespaces using the passthrough interface.
+ */
+ if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
+ struct nvme_ctrl *ctrl = ns->ctrl;
+
+ nvme_get_ctrl(ns->ctrl);
+ nvme_put_ns_from_disk(head, srcu_idx);
+
+ if (cmd == NVME_IOCTL_ADMIN_CMD)
+ ret = nvme_user_cmd(ctrl, NULL, argp);
+ else
+ ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
+
+ nvme_put_ctrl(ctrl);
+ return ret;
+ }
+
switch (cmd) {
case NVME_IOCTL_ID:
force_successful_syscall_return();
- return ns->head->ns_id;
- case NVME_IOCTL_ADMIN_CMD:
- return nvme_user_cmd(ns->ctrl, NULL, (void __user *)arg);
+ ret = ns->head->ns_id;
+ break;
case NVME_IOCTL_IO_CMD:
- return nvme_user_cmd(ns->ctrl, ns, (void __user *)arg);
+ ret = nvme_user_cmd(ns->ctrl, ns, argp);
+ break;
case NVME_IOCTL_SUBMIT_IO:
- return nvme_submit_io(ns, (void __user *)arg);
+ ret = nvme_submit_io(ns, argp);
+ break;
default:
-#ifdef CONFIG_NVM
if (ns->ndev)
- return nvme_nvm_ioctl(ns, cmd, arg);
-#endif
- if (is_sed_ioctl(cmd))
- return sed_ioctl(ns->ctrl->opal_dev, cmd,
- (void __user *) arg);
- return -ENOTTY;
+ ret = nvme_nvm_ioctl(ns, cmd, arg);
+ else
+ ret = -ENOTTY;
}
-}
-
-static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- struct nvme_ns_head *head = NULL;
- struct nvme_ns *ns;
- int srcu_idx, ret;
- ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
- if (unlikely(!ns))
- ret = -EWOULDBLOCK;
- else
- ret = nvme_ns_ioctl(ns, cmd, arg);
nvme_put_ns_from_disk(head, srcu_idx);
return ret;
}
@@ -2557,6 +2576,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oacs = le16_to_cpu(id->oacs);
ctrl->oncs = le16_to_cpu(id->oncs);
+ ctrl->mtfa = le16_to_cpu(id->mtfa);
ctrl->oaes = le32_to_cpu(id->oaes);
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
@@ -3681,6 +3701,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
+ dev_pm_qos_hide_latency_tolerance(ctrl->device);
cdev_device_del(&ctrl->cdev, ctrl->device);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
@@ -3880,6 +3901,18 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
+
+void nvme_sync_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ down_read(&ctrl->namespaces_rwsem);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ blk_sync_queue(ns->queue);
+ up_read(&ctrl->namespaces_rwsem);
+}
+EXPORT_SYMBOL_GPL(nvme_sync_queues);
+
/*
* Check we didn't inadvertently grow the command structure sizes:
*/
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5ee75b5ff83f..55553d293a98 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -441,6 +441,7 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
void nvme_kill_queues(struct nvme_ctrl *ctrl);
+void nvme_sync_queues(struct nvme_ctrl *ctrl);
void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2a8708c9ac18..f562154551ce 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -464,7 +464,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
* affinity), so use the regular blk-mq cpu mapping
*/
map->queue_offset = qoff;
- if (i != HCTX_TYPE_POLL)
+ if (i != HCTX_TYPE_POLL && offset)
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
else
blk_mq_map_queues(map);
@@ -1257,7 +1257,6 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_dev *dev = nvmeq->dev;
struct request *abort_req;
struct nvme_command cmd;
- bool shutdown = false;
u32 csts = readl(dev->bar + NVME_REG_CSTS);
/* If PCI error recovery process is happening, we cannot reset or
@@ -1294,17 +1293,18 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
* shutdown, so we return BLK_EH_DONE.
*/
switch (dev->ctrl.state) {
- case NVME_CTRL_DELETING:
- shutdown = true;
- /* fall through */
case NVME_CTRL_CONNECTING:
- case NVME_CTRL_RESETTING:
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
+ /* fall through */
+ case NVME_CTRL_DELETING:
dev_warn_ratelimited(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
- nvme_dev_disable(dev, shutdown);
+ nvme_dev_disable(dev, true);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_DONE;
+ case NVME_CTRL_RESETTING:
+ return BLK_EH_RESET_TIMER;
default:
break;
}
@@ -2376,7 +2376,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- bool dead = true;
+ bool dead = true, freeze = false;
struct pci_dev *pdev = to_pci_dev(dev->dev);
mutex_lock(&dev->shutdown_lock);
@@ -2384,8 +2384,10 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
u32 csts = readl(dev->bar + NVME_REG_CSTS);
if (dev->ctrl.state == NVME_CTRL_LIVE ||
- dev->ctrl.state == NVME_CTRL_RESETTING)
+ dev->ctrl.state == NVME_CTRL_RESETTING) {
+ freeze = true;
nvme_start_freeze(&dev->ctrl);
+ }
dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
pdev->error_state != pci_channel_io_normal);
}
@@ -2394,10 +2396,8 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
* Give the controller a chance to complete all entered requests if
* doing a safe shutdown.
*/
- if (!dead) {
- if (shutdown)
- nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
- }
+ if (!dead && shutdown && freeze)
+ nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
nvme_stop_queues(&dev->ctrl);
@@ -2492,6 +2492,7 @@ static void nvme_reset_work(struct work_struct *work)
*/
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
+ nvme_sync_queues(&dev->ctrl);
mutex_lock(&dev->shutdown_lock);
result = nvme_pci_enable(dev);