diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 12:49:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-09 12:49:01 -0700 |
commit | 126e76ffbf78d9e948b641aadb265d16c57f5a3d (patch) | |
tree | 656e7838f0ec057936b80e15a774911df05c6005 /drivers/nvme/host | |
parent | fbd01410e89a66f346ba1b3c0161e1198449b746 (diff) | |
parent | 175206cf9ab63161dec74d9cd7f9992e062491f5 (diff) |
Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block
Pull followup block layer updates from Jens Axboe:
"I ended up splitting the main pull request for this series into two,
mainly because of clashes between NVMe fixes that went into 4.13 after
the for-4.14 branches were split off. This pull request is mostly
NVMe, but not exclusively. In detail, it contains:
- Two pull request for NVMe changes from Christoph. Nothing new on
the feature front, basically just fixes all over the map for the
core bits, transport, rdma, etc.
- Series from Bart, cleaning up various bits in the BFQ scheduler.
- Series of bcache fixes, which has been lingering for a release or
two. Coly sent this in, but patches from various people in this
area.
- Set of patches for BFQ from Paolo himself, updating both
documentation and fixing some corner cases in performance.
- Series from Omar, attempting to now get the 4k loop support
correct. Our confidence level is higher this time.
- Series from Shaohua for loop as well, improving O_DIRECT
performance and fixing a use-after-free"
* 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-block: (74 commits)
bcache: initialize dirty stripes in flash_dev_run()
loop: set physical block size to logical block size
bcache: fix bch_hprint crash and improve output
bcache: Update continue_at() documentation
bcache: silence static checker warning
bcache: fix for gc and write-back race
bcache: increase the number of open buckets
bcache: Correct return value for sysfs attach errors
bcache: correct cache_dirty_target in __update_writeback_rate()
bcache: gc does not work when triggering by manual command
bcache: Don't reinvent the wheel but use existing llist API
bcache: do not subtract sectors_to_gc for bypassed IO
bcache: fix sequential large write IO bypass
bcache: Fix leak of bdev reference
block/loop: remove unused field
block/loop: fix use after free
bfq: Use icq_to_bic() consistently
bfq: Suppress compiler warnings about comparisons
bfq: Check kstrtoul() return value
bfq: Declare local functions static
...
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 339 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 23 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 145 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 17 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 9 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 564 |
6 files changed, 654 insertions, 443 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c596dd3c58b1..277a7a02cba5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -76,6 +76,11 @@ static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static __le32 nvme_get_log_dw10(u8 lid, size_t size) +{ + return cpu_to_le32((((size / 4) - 1) << 16) | lid); +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -108,7 +113,16 @@ static blk_status_t nvme_error_status(struct request *req) case NVME_SC_WRITE_FAULT: case NVME_SC_READ_ERROR: case NVME_SC_UNWRITTEN_BLOCK: + case NVME_SC_ACCESS_DENIED: + case NVME_SC_READ_ONLY: return BLK_STS_MEDIUM; + case NVME_SC_GUARD_CHECK: + case NVME_SC_APPTAG_CHECK: + case NVME_SC_REFTAG_CHECK: + case NVME_SC_INVALID_PI: + return BLK_STS_PROTECTION; + case NVME_SC_RESERVATION_CONFLICT: + return BLK_STS_NEXUS; default: return BLK_STS_IOERR; } @@ -162,9 +176,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { enum nvme_ctrl_state old_state; + unsigned long flags; bool changed = false; - spin_lock_irq(&ctrl->lock); + spin_lock_irqsave(&ctrl->lock, flags); old_state = ctrl->state; switch (new_state) { @@ -225,7 +240,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, if (changed) ctrl->state = new_state; - spin_unlock_irq(&ctrl->lock); + spin_unlock_irqrestore(&ctrl->lock, flags); return changed; } @@ -307,7 +322,7 @@ static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) memset(&c, 0, sizeof(c)); c.directive.opcode = nvme_admin_directive_send; - c.directive.nsid = cpu_to_le32(0xffffffff); + c.directive.nsid = cpu_to_le32(NVME_NSID_ALL); c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE; c.directive.dtype = NVME_DIR_IDENTIFY; c.directive.tdtype = NVME_DIR_STREAMS; @@ -357,7 +372,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_get_stream_params(ctrl, &s, 0xffffffff); + ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) return ret; @@ -585,10 +600,44 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); -int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, - void __user *meta_buffer, unsigned meta_len, u32 meta_seed, - u32 *result, unsigned timeout) +static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, + unsigned len, u32 seed, bool write) +{ + struct bio_integrity_payload *bip; + int ret = -ENOMEM; + void *buf; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + goto out; + + ret = -EFAULT; + if (write && copy_from_user(buf, ubuf, len)) + goto out_free_meta; + + bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); + if (IS_ERR(bip)) { + ret = PTR_ERR(bip); + goto out_free_meta; + } + + bip->bip_iter.bi_size = len; + bip->bip_iter.bi_sector = seed; + ret = bio_integrity_add_page(bio, virt_to_page(buf), len, + offset_in_page(buf)); + if (ret == len) + return buf; + ret = -ENOMEM; +out_free_meta: + kfree(buf); +out: + return ERR_PTR(ret); +} + +static int nvme_submit_user_cmd(struct request_queue *q, + struct nvme_command *cmd, void __user *ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, u32 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -610,46 +659,17 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (ret) goto out; bio = req->bio; - - if (!disk) - goto submit; bio->bi_disk = disk; - - if (meta_buffer && meta_len) { - struct bio_integrity_payload *bip; - - meta = kmalloc(meta_len, GFP_KERNEL); - if (!meta) { - ret = -ENOMEM; + if (disk && meta_buffer && meta_len) { + meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, + meta_seed, write); + if (IS_ERR(meta)) { + ret = PTR_ERR(meta); goto out_unmap; } - - if (write) { - if (copy_from_user(meta, meta_buffer, - meta_len)) { - ret = -EFAULT; - goto out_free_meta; - } - } - - bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); - if (IS_ERR(bip)) { - ret = PTR_ERR(bip); - goto out_free_meta; - } - - bip->bip_iter.bi_size = meta_len; - bip->bip_iter.bi_sector = meta_seed; - - ret = bio_integrity_add_page(bio, virt_to_page(meta), - meta_len, offset_in_page(meta)); - if (ret != meta_len) { - ret = -ENOMEM; - goto out_free_meta; - } } } - submit: + blk_execute_rq(req->q, disk, req, 0); if (nvme_req(req)->flags & NVME_REQ_CANCELLED) ret = -EINTR; @@ -661,7 +681,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; } - out_free_meta: kfree(meta); out_unmap: if (bio) @@ -671,14 +690,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, return ret; } -int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, u32 *result, - unsigned timeout) -{ - return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, - result, timeout); -} - static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; @@ -768,7 +779,8 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) return error; } -static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) +static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, + u8 *eui64, u8 *nguid, uuid_t *uuid) { struct nvme_command c = { }; int status; @@ -784,7 +796,7 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) if (!data) return -ENOMEM; - status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data, + status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data, NVME_IDENTIFY_DATA_SIZE); if (status) goto free_data; @@ -798,33 +810,33 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) switch (cur->nidt) { case NVME_NIDT_EUI64: if (cur->nidl != NVME_NIDT_EUI64_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n", cur->nidl); goto free_data; } len = NVME_NIDT_EUI64_LEN; - memcpy(ns->eui, data + pos + sizeof(*cur), len); + memcpy(eui64, data + pos + sizeof(*cur), len); break; case NVME_NIDT_NGUID: if (cur->nidl != NVME_NIDT_NGUID_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n", cur->nidl); goto free_data; } len = NVME_NIDT_NGUID_LEN; - memcpy(ns->nguid, data + pos + sizeof(*cur), len); + memcpy(nguid, data + pos + sizeof(*cur), len); break; case NVME_NIDT_UUID: if (cur->nidl != NVME_NIDT_UUID_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_UUID\n", cur->nidl); goto free_data; } len = NVME_NIDT_UUID_LEN; - uuid_copy(&ns->uuid, data + pos + sizeof(*cur)); + uuid_copy(uuid, data + pos + sizeof(*cur)); break; default: /* Skip unnkown types */ @@ -849,9 +861,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } -static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, - struct nvme_id_ns **id) +static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, + unsigned nsid) { + struct nvme_id_ns *id; struct nvme_command c = { }; int error; @@ -860,15 +873,18 @@ static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, c.identify.nsid = cpu_to_le32(nsid); c.identify.cns = NVME_ID_CNS_NS; - *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); - if (!*id) - return -ENOMEM; + id = kmalloc(sizeof(*id), GFP_KERNEL); + if (!id) + return NULL; - error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, - sizeof(struct nvme_id_ns)); - if (error) - kfree(*id); - return error; + error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + if (error) { + dev_warn(ctrl->device, "Identify namespace failed\n"); + kfree(id); + return NULL; + } + + return id; } static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, @@ -963,7 +979,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - return __nvme_submit_user_cmd(ns->queue, &c, + return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, metadata, meta_len, io.slba, NULL, 0); } @@ -1001,7 +1017,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - &cmd.result, timeout); + (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, + 0, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) return -EFAULT; @@ -1159,32 +1176,21 @@ static void nvme_config_discard(struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } -static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) +static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, + struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid) { - if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { - dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__); - return -ENODEV; - } - - if ((*id)->ncap == 0) { - kfree(*id); - return -ENODEV; - } - - if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) - memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); - if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) - memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid)); - if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) { + if (ctrl->vs >= NVME_VS(1, 1, 0)) + memcpy(eui64, id->eui64, sizeof(id->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0)) + memcpy(nguid, id->nguid, sizeof(id->nguid)); + if (ctrl->vs >= NVME_VS(1, 3, 0)) { /* Don't treat error as fatal we potentially * already have a NGUID or EUI-64 */ - if (nvme_identify_ns_descs(ns, ns->ns_id)) - dev_warn(ns->ctrl->device, + if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid)) + dev_warn(ctrl->device, "%s: Identify Descriptors failed\n", __func__); } - - return 0; } static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) @@ -1225,22 +1231,38 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static int nvme_revalidate_disk(struct gendisk *disk) { struct nvme_ns *ns = disk->private_data; - struct nvme_id_ns *id = NULL; - int ret; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_id_ns *id; + u8 eui64[8] = { 0 }, nguid[16] = { 0 }; + uuid_t uuid = uuid_null; + int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { set_capacity(disk, 0); return -ENODEV; } - ret = nvme_revalidate_ns(ns, &id); - if (ret) - return ret; + id = nvme_identify_ns(ctrl, ns->ns_id); + if (!id) + return -ENODEV; - __nvme_revalidate_disk(disk, id); - kfree(id); + if (id->ncap == 0) { + ret = -ENODEV; + goto out; + } - return 0; + nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid); + if (!uuid_equal(&ns->uuid, &uuid) || + memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) || + memcmp(&ns->eui, &eui64, sizeof(ns->eui))) { + dev_err(ctrl->device, + "identifiers changed for nsid %d\n", ns->ns_id); + ret = -ENODEV; + } + +out: + kfree(id); + return ret; } static char nvme_pr_type(enum pr_type type) @@ -1440,7 +1462,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) ctrl->ctrl_config = NVME_CC_CSS_NVM; ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; - ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; + ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ctrl->ctrl_config |= NVME_CC_ENABLE; @@ -1453,7 +1475,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) { - unsigned long timeout = jiffies + (shutdown_timeout * HZ); + unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ); u32 csts; int ret; @@ -1502,6 +1524,23 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } +static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) +{ + __le64 ts; + int ret; + + if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP)) + return 0; + + ts = cpu_to_le64(ktime_to_ms(ktime_get_real())); + ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts), + NULL); + if (ret) + dev_warn_once(ctrl->device, + "could not set timestamp (%d)\n", ret); + return ret; +} + static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* @@ -1804,6 +1843,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); + if (id->rtd3e) { + /* us -> s */ + u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000; + + ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time, + shutdown_timeout, 60); + + if (ctrl->shutdown_timeout != shutdown_timeout) + dev_warn(ctrl->device, + "Shutdown timeout set to %u seconds\n", + ctrl->shutdown_timeout); + } else + ctrl->shutdown_timeout = shutdown_timeout; + ctrl->npss = id->npss; ctrl->apsta = id->apsta; prev_apst_enabled = ctrl->apst_enabled; @@ -1856,6 +1909,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; + + ret = nvme_configure_timestamp(ctrl); + if (ret < 0) + return ret; ret = nvme_configure_directives(ctrl); if (ret < 0) @@ -2311,9 +2368,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); - if (nvme_revalidate_ns(ns, &id)) + id = nvme_identify_ns(ctrl, nsid); + if (!id) goto out_free_queue; + if (id->ncap == 0) + goto out_free_id; + + nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); + if (nvme_nvm_ns_supported(ns, id) && nvme_nvm_register(ns, disk_name, node)) { dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__); @@ -2534,6 +2597,71 @@ static void nvme_async_event_work(struct work_struct *work) spin_unlock_irq(&ctrl->lock); } +static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) +{ + + u32 csts; + + if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) + return false; + + if (csts == ~0) + return false; + + return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP)); +} + +static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) +{ + struct nvme_command c = { }; + struct nvme_fw_slot_info_log *log; + + log = kmalloc(sizeof(*log), GFP_KERNEL); + if (!log) + return; + + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(NVME_NSID_ALL); + c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log)); + + if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log))) + dev_warn(ctrl->device, + "Get FW SLOT INFO log error\n"); + kfree(log); +} + +static void nvme_fw_act_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = container_of(work, + struct nvme_ctrl, fw_act_work); + unsigned long fw_act_timeout; + + if (ctrl->mtfa) + fw_act_timeout = jiffies + + msecs_to_jiffies(ctrl->mtfa * 100); + else + fw_act_timeout = jiffies + + msecs_to_jiffies(admin_timeout * 1000); + + nvme_stop_queues(ctrl); + while (nvme_ctrl_pp_status(ctrl)) { + if (time_after(jiffies, fw_act_timeout)) { + dev_warn(ctrl->device, + "Fw activation timeout, reset controller\n"); + nvme_reset_ctrl(ctrl); + break; + } + msleep(100); + } + + if (ctrl->state != NVME_CTRL_LIVE) + return; + + nvme_start_queues(ctrl); + /* read FW slot informationi to clear the AER*/ + nvme_get_fw_slot_info(ctrl); +} + void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res) { @@ -2560,6 +2688,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, dev_info(ctrl->device, "rescanning\n"); nvme_queue_scan(ctrl); break; + case NVME_AER_NOTICE_FW_ACT_STARTING: + schedule_work(&ctrl->fw_act_work); + break; default: dev_warn(ctrl->device, "async event result %08x\n", result); } @@ -2607,6 +2738,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); + cancel_work_sync(&ctrl->fw_act_work); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -2670,6 +2802,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->quirks = quirks; INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); + INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); ret = nvme_set_instance(ctrl); if (ret) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5f5cd306f76d..47307752dc65 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -22,7 +22,7 @@ #include "fabrics.h" static LIST_HEAD(nvmf_transports); -static DEFINE_MUTEX(nvmf_transports_mutex); +static DECLARE_RWSEM(nvmf_transports_rwsem); static LIST_HEAD(nvmf_hosts); static DEFINE_MUTEX(nvmf_hosts_mutex); @@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void) kref_init(&host->ref); snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); + "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); @@ -495,9 +495,9 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops) if (!ops->create_ctrl) return -EINVAL; - mutex_lock(&nvmf_transports_mutex); + down_write(&nvmf_transports_rwsem); list_add_tail(&ops->entry, &nvmf_transports); - mutex_unlock(&nvmf_transports_mutex); + up_write(&nvmf_transports_rwsem); return 0; } @@ -514,9 +514,9 @@ EXPORT_SYMBOL_GPL(nvmf_register_transport); */ void nvmf_unregister_transport(struct nvmf_transport_ops *ops) { - mutex_lock(&nvmf_transports_mutex); + down_write(&nvmf_transports_rwsem); list_del(&ops->entry); - mutex_unlock(&nvmf_transports_mutex); + up_write(&nvmf_transports_rwsem); } EXPORT_SYMBOL_GPL(nvmf_unregister_transport); @@ -525,7 +525,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( { struct nvmf_transport_ops *ops; - lockdep_assert_held(&nvmf_transports_mutex); + lockdep_assert_held(&nvmf_transports_rwsem); list_for_each_entry(ops, &nvmf_transports, entry) { if (strcmp(ops->name, opts->transport) == 0) @@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, goto out; } if (uuid_parse(p, &hostid)) { + pr_err("Invalid hostid %s\n", p); ret = -EINVAL; goto out; } @@ -850,7 +851,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_free_opts; opts->mask &= ~NVMF_REQUIRED_OPTS; - mutex_lock(&nvmf_transports_mutex); + down_read(&nvmf_transports_rwsem); ops = nvmf_lookup_transport(opts); if (!ops) { pr_info("no handler found for transport %s.\n", @@ -877,16 +878,16 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", ctrl->subnqn); - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); ctrl->ops->delete_ctrl(ctrl); return ERR_PTR(-EINVAL); } - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); return ctrl; out_unlock: - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); out_free_opts: nvmf_free_options(opts); return ERR_PTR(ret); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1438be649866..d2e882c0f496 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void +nvme_fc_free_lport(struct kref *ref) +{ + struct nvme_fc_lport *lport = + container_of(ref, struct nvme_fc_lport, ref); + unsigned long flags; + + WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&lport->endp_list)); + + /* remove from transport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&lport->port_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + /* let the LLDD know we've finished tearing it down */ + lport->ops->localport_delete(&lport->localport); + + ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_destroy(&lport->endp_cnt); + + put_device(lport->dev); + + kfree(lport); +} + +static void +nvme_fc_lport_put(struct nvme_fc_lport *lport) +{ + kref_put(&lport->ref, nvme_fc_free_lport); +} + +static int +nvme_fc_lport_get(struct nvme_fc_lport *lport) +{ + return kref_get_unless_zero(&lport->ref); +} + + +static struct nvme_fc_lport * +nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo) +{ + struct nvme_fc_lport *lport; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + if (lport->localport.node_name != pinfo->node_name || + lport->localport.port_name != pinfo->port_name) + continue; + + if (lport->localport.port_state != FC_OBJSTATE_DELETED) { + lport = ERR_PTR(-EEXIST); + goto out_done; + } + + if (!nvme_fc_lport_get(lport)) { + /* + * fails if ref cnt already 0. If so, + * act as if lport already deleted + */ + lport = NULL; + goto out_done; + } + + /* resume the lport */ + + lport->localport.port_role = pinfo->port_role; + lport->localport.port_id = pinfo->port_id; + lport->localport.port_state = FC_OBJSTATE_ONLINE; + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; + } + + lport = NULL; + +out_done: + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; +} /** * nvme_fc_register_localport - transport entry point called by an @@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } + /* + * look to see if there is already a localport that had been + * deregistered and in the process of waiting for all the + * references to fully be removed. If the references haven't + * expired, we can simply re-enable the localport. Remoteports + * and controller reconnections should resume naturally. + */ + newrec = nvme_fc_attach_to_unreg_lport(pinfo); + + /* found an lport, but something about its state is bad */ + if (IS_ERR(newrec)) { + ret = PTR_ERR(newrec); + goto out_reghost_failed; + + /* found existing lport, which was resumed */ + } else if (newrec) { + *portptr = &newrec->localport; + return 0; + } + + /* nothing found - allocate a new localport struct */ + newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), GFP_KERNEL); if (!newrec) { @@ -310,44 +416,6 @@ out_reghost_failed: } EXPORT_SYMBOL_GPL(nvme_fc_register_localport); -static void -nvme_fc_free_lport(struct kref *ref) -{ - struct nvme_fc_lport *lport = - container_of(ref, struct nvme_fc_lport, ref); - unsigned long flags; - - WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); - WARN_ON(!list_empty(&lport->endp_list)); - - /* remove from transport list */ - spin_lock_irqsave(&nvme_fc_lock, flags); - list_del(&lport->port_list); - spin_unlock_irqrestore(&nvme_fc_lock, flags); - - /* let the LLDD know we've finished tearing it down */ - lport->ops->localport_delete(&lport->localport); - - ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); - ida_destroy(&lport->endp_cnt); - - put_device(lport->dev); - - kfree(lport); -} - -static void -nvme_fc_lport_put(struct nvme_fc_lport *lport) -{ - kref_put(&lport->ref, nvme_fc_free_lport); -} - -static int -nvme_fc_lport_get(struct nvme_fc_lport *lport) -{ - return kref_get_unless_zero(&lport->ref); -} - /** * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously @@ -2731,6 +2799,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) goto out_free_queues; + ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8f2a168ddc01..a19a587d60ed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -125,6 +125,7 @@ struct nvme_ctrl { struct kref kref; int instance; struct blk_mq_tag_set *tagset; + struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; struct mutex namespaces_mutex; struct device *device; /* char device */ @@ -142,6 +143,7 @@ struct nvme_ctrl { u16 cntlid; u32 ctrl_config; + u16 mtfa; u32 queue_count; u64 cap; @@ -160,6 +162,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + unsigned int shutdown_timeout; unsigned int kato; bool subsystem; unsigned long quirks; @@ -167,6 +170,7 @@ struct nvme_ctrl { struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + struct work_struct fw_act_work; /* Power saving configuration */ u64 ps_max_latency_us; @@ -207,13 +211,9 @@ struct nvme_ns { bool ext; u8 pi_type; unsigned long flags; - u16 noiob; - #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 - - u64 mode_select_num_blocks; - u32 mode_select_block_len; + u16 noiob; }; struct nvme_ctrl_ops { @@ -314,13 +314,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags); -int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, u32 *result, - unsigned timeout); -int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, - void __user *meta_buffer, unsigned meta_len, u32 meta_seed, - u32 *result, unsigned timeout); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ea892e732268..198245faba6b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -556,8 +556,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) int nprps, i; length -= (page_size - offset); - if (length <= 0) + if (length <= 0) { + iod->first_dma = 0; return BLK_STS_OK; + } dma_len -= (page_size - offset); if (dma_len) { @@ -667,7 +669,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) goto out_unmap; - if (rq_data_dir(req)) + if (req_op(req) == REQ_OP_WRITE) nvme_dif_remap(req, nvme_dif_prep); if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) @@ -695,7 +697,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) if (iod->nents) { dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); if (blk_integrity_rq(req)) { - if (!rq_data_dir(req)) + if (req_op(req) == REQ_OP_READ) nvme_dif_remap(req, nvme_dif_complete); dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); } @@ -1377,6 +1379,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->admin_tagset)) return -ENOMEM; + dev->ctrl.admin_tagset = &dev->admin_tagset; dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bf42d31484d4..58983000964b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -37,8 +37,6 @@ #define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ -#define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ - #define NVME_RDMA_MAX_SEGMENTS 256 #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 @@ -152,6 +150,9 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static const struct blk_mq_ops nvme_rdma_mq_ops; +static const struct blk_mq_ops nvme_rdma_admin_mq_ops; + /* XXX: really should move to a generic header sooner or later.. */ static inline void put_unaligned_le24(u32 val, u8 *p) { @@ -500,7 +501,7 @@ out_put_dev: return ret; } -static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, +static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, int idx, size_t queue_size) { struct nvme_rdma_queue *queue; @@ -558,54 +559,74 @@ out_destroy_cm_id: static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) + return; + rdma_disconnect(queue->cm_id); ib_drain_qp(queue->qp); } static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) { + if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) + return; + nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } -static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) +static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) { - if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) - return; - nvme_rdma_stop_queue(queue); - nvme_rdma_free_queue(queue); + int i; + + for (i = 1; i < ctrl->ctrl.queue_count; i++) + nvme_rdma_free_queue(&ctrl->queues[i]); } -static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) { int i; for (i = 1; i < ctrl->ctrl.queue_count; i++) - nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); + nvme_rdma_stop_queue(&ctrl->queues[i]); } -static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) +{ + int ret; + + if (idx) + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); + else + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + + if (!ret) + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); + else + dev_info(ctrl->ctrl.device, + "failed to connect queue: %d ret=%d\n", idx, ret); + return ret; +} + +static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) { int i, ret = 0; for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) { - dev_info(ctrl->ctrl.device, - "failed to connect i/o queue: %d\n", ret); - goto out_free_queues; - } - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); + ret = nvme_rdma_start_queue(ctrl, i); + if (ret) + goto out_stop_queues; } return 0; -out_free_queues: - nvme_rdma_free_io_queues(ctrl); +out_stop_queues: + for (i--; i >= 1; i--) + nvme_rdma_stop_queue(&ctrl->queues[i]); return ret; } -static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; struct ib_device *ibdev = ctrl->device->dev; @@ -634,32 +655,230 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) "creating %d I/O queues.\n", nr_io_queues); for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvme_rdma_init_queue(ctrl, i, - ctrl->ctrl.opts->queue_size); - if (ret) { - dev_info(ctrl->ctrl.device, - "failed to initialize i/o queue: %d\n", ret); + ret = nvme_rdma_alloc_queue(ctrl, i, + ctrl->ctrl.sqsize + 1); + if (ret) goto out_free_queues; - } } return 0; out_free_queues: for (i--; i >= 1; i--) - nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); + nvme_rdma_free_queue(&ctrl->queues[i]); return ret; } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + struct blk_mq_tag_set *set = admin ? + &ctrl->admin_tag_set : &ctrl->tag_set; + + blk_mq_free_tag_set(set); + nvme_rdma_dev_put(ctrl->device); +} + +static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, + bool admin) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + struct blk_mq_tag_set *set; + int ret; + + if (admin) { + set = &ctrl->admin_tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_rdma_admin_mq_ops; + set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; + set->reserved_tags = 2; /* connect + keep-alive */ + set->numa_node = NUMA_NO_NODE; + set->cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = ADMIN_TIMEOUT; + } else { + set = &ctrl->tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_rdma_mq_ops; + set->queue_depth = nctrl->opts->queue_size; + set->reserved_tags = 1; /* fabric connect */ + set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_SHOULD_MERGE; + set->cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + set->driver_data = ctrl; + set->nr_hw_queues = nctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + } + + ret = blk_mq_alloc_tag_set(set); + if (ret) + goto out; + + /* + * We need a reference on the device as long as the tag_set is alive, + * as the MRs in the request structures need a valid ib_device. + */ + ret = nvme_rdma_dev_get(ctrl->device); + if (!ret) { + ret = -EINVAL; + goto out_free_tagset; + } + + return set; + +out_free_tagset: + blk_mq_free_tag_set(set); +out: + return ERR_PTR(ret); +} + +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool remove) { nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); - nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_rdma_dev_put(ctrl->device); + nvme_rdma_stop_queue(&ctrl->queues[0]); + if (remove) { + blk_cleanup_queue(ctrl->ctrl.admin_q); + nvme_rdma_free_tagset(&ctrl->ctrl, true); + } + nvme_rdma_free_queue(&ctrl->queues[0]); +} + +static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool new) +{ + int error; + + error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); + if (error) + return error; + + ctrl->device = ctrl->queues[0].device; + + ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, + ctrl->device->dev->attrs.max_fast_reg_page_list_len); + + if (new) { + ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); + if (IS_ERR(ctrl->ctrl.admin_tagset)) + goto out_free_queue; + + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + error = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_tagset; + } + } else { + error = blk_mq_reinit_tagset(&ctrl->admin_tag_set, + nvme_rdma_reinit_request); + if (error) + goto out_free_queue; + } + + error = nvme_rdma_start_queue(ctrl, 0); + if (error) + goto out_cleanup_queue; + + error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP, + &ctrl->ctrl.cap); + if (error) { + dev_err(ctrl->ctrl.device, + "prop_get NVME_REG_CAP failed\n"); + goto out_cleanup_queue; + } + + ctrl->ctrl.sqsize = + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); + + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + if (error) + goto out_cleanup_queue; + + ctrl->ctrl.max_hw_sectors = + (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); + + error = nvme_init_identify(&ctrl->ctrl); + if (error) + goto out_cleanup_queue; + + error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, + &ctrl->async_event_sqe, sizeof(struct nvme_command), + DMA_TO_DEVICE); + if (error) + goto out_cleanup_queue; + + return 0; + +out_cleanup_queue: + if (new) + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_tagset: + if (new) + nvme_rdma_free_tagset(&ctrl->ctrl, true); +out_free_queue: + nvme_rdma_free_queue(&ctrl->queues[0]); + return error; +} + +static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, + bool remove) +{ + nvme_rdma_stop_io_queues(ctrl); + if (remove) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + nvme_rdma_free_tagset(&ctrl->ctrl, false); + } + nvme_rdma_free_io_queues(ctrl); +} + +static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) +{ + int ret; + + ret = nvme_rdma_alloc_io_queues(ctrl); + if (ret) + return ret; + + if (new) { + ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false); + if (IS_ERR(ctrl->ctrl.tagset)) + goto out_free_io_queues; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + } else { + ret = blk_mq_reinit_tagset(&ctrl->tag_set, + nvme_rdma_reinit_request); + if (ret) + goto out_free_io_queues; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); + } + + ret = nvme_rdma_start_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; + + return 0; + +out_cleanup_connect_q: + if (new) + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + if (new) + nvme_rdma_free_tagset(&ctrl->ctrl, false); +out_free_io_queues: + nvme_rdma_free_io_queues(ctrl); + return ret; } static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) @@ -708,47 +927,18 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ++ctrl->ctrl.nr_reconnects; - if (ctrl->ctrl.queue_count > 1) { - nvme_rdma_free_io_queues(ctrl); - - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); - if (ret) - goto requeue; - } - - nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - - ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set, - nvme_rdma_reinit_request); - if (ret) - goto requeue; - - ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); - if (ret) - goto requeue; - - ret = nvmf_connect_admin_queue(&ctrl->ctrl); - if (ret) - goto requeue; - - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + if (ctrl->ctrl.queue_count > 1) + nvme_rdma_destroy_io_queues(ctrl, false); - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + nvme_rdma_destroy_admin_queue(ctrl, false); + ret = nvme_rdma_configure_admin_queue(ctrl, false); if (ret) goto requeue; if (ctrl->ctrl.queue_count > 1) { - ret = nvme_rdma_init_io_queues(ctrl); - if (ret) - goto requeue; - - ret = nvme_rdma_connect_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) goto requeue; - - blk_mq_update_nr_hw_queues(&ctrl->tag_set, - ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -771,16 +961,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); - int i; nvme_stop_ctrl(&ctrl->ctrl); - for (i = 0; i < ctrl->ctrl.queue_count; i++) - clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); - - if (ctrl->ctrl.queue_count > 1) + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); + } blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); /* We must take care of fastfail/requeue all our inflight requests */ if (ctrl->ctrl.queue_count > 1) @@ -865,7 +1054,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (req->mr->need_inval) { res = nvme_rdma_inv_rkey(queue, req); - if (res < 0) { + if (unlikely(res < 0)) { dev_err(ctrl->ctrl.device, "Queueing INV WR for rkey %#x failed (%d)\n", req->mr->rkey, res); @@ -934,7 +1123,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, * the block virtual boundary. */ nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); - if (nr < count) { + if (unlikely(nr < count)) { if (nr < 0) return nr; return -EINVAL; @@ -1070,7 +1259,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, first = ≀ ret = ib_post_send(queue->qp, first, &bad_wr); - if (ret) { + if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); } @@ -1096,7 +1285,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, wr.num_sge = 1; ret = ib_post_recv(queue->qp, &wr, &bad_wr); - if (ret) { + if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); } @@ -1456,7 +1645,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); err = nvme_rdma_map_data(queue, rq, c); - if (err < 0) { + if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", err); nvme_cleanup_cmd(rq); @@ -1470,7 +1659,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, flush = true; err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); - if (err) { + if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; } @@ -1538,98 +1727,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .timeout = nvme_rdma_timeout, }; -static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) -{ - int error; - - error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); - if (error) - return error; - - ctrl->device = ctrl->queues[0].device; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - error = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) - goto out_free_queue; - - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ctrl->device->dev->attrs.max_fast_reg_page_list_len); - - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_put_dev; - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; - } - - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) - goto out_cleanup_queue; - - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, - &ctrl->ctrl.cap); - if (error) { - dev_err(ctrl->ctrl.device, - "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; - } - - ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); - if (error) - goto out_cleanup_queue; - - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); - - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_cleanup_queue; - - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, - &ctrl->async_event_sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - if (error) - goto out_cleanup_queue; - - return 0; - -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - /* disconnect and drain the queue before freeing the tagset */ - nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_free_tag_set(&ctrl->admin_tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); -out_free_queue: - nvme_rdma_free_queue(&ctrl->queues[0]); - return error; -} - -static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->reconnect_work); @@ -1638,33 +1736,26 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - nvme_rdma_free_io_queues(ctrl); + nvme_rdma_destroy_io_queues(ctrl, shutdown); } - if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags)) + if (shutdown) nvme_shutdown_ctrl(&ctrl->ctrl); + else + nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_destroy_admin_queue(ctrl, shutdown); } -static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) +static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) { - nvme_stop_ctrl(&ctrl->ctrl); nvme_remove_namespaces(&ctrl->ctrl); - if (shutdown) - nvme_rdma_shutdown_ctrl(ctrl); - + nvme_rdma_shutdown_ctrl(ctrl, true); nvme_uninit_ctrl(&ctrl->ctrl); - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_rdma_dev_put(ctrl->device); - } - nvme_put_ctrl(&ctrl->ctrl); } @@ -1673,7 +1764,8 @@ static void nvme_rdma_del_ctrl_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - __nvme_rdma_remove_ctrl(ctrl, true); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_rdma_remove_ctrl(ctrl); } static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) @@ -1705,14 +1797,6 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) return ret; } -static void nvme_rdma_remove_ctrl_work(struct work_struct *work) -{ - struct nvme_rdma_ctrl *ctrl = container_of(work, - struct nvme_rdma_ctrl, delete_work); - - __nvme_rdma_remove_ctrl(ctrl, false); -} - static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = @@ -1721,31 +1805,16 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) bool changed; nvme_stop_ctrl(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl); + nvme_rdma_shutdown_ctrl(ctrl, false); - ret = nvme_rdma_configure_admin_queue(ctrl); - if (ret) { - /* ctrl is already shutdown, just remove the ctrl */ - INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work); - goto del_dead_ctrl; - } + ret = nvme_rdma_configure_admin_queue(ctrl, false); + if (ret) + goto out_fail; if (ctrl->ctrl.queue_count > 1) { - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); - if (ret) - goto del_dead_ctrl; - - ret = nvme_rdma_init_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) - goto del_dead_ctrl; - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto del_dead_ctrl; - - blk_mq_update_nr_hw_queues(&ctrl->tag_set, - ctrl->ctrl.queue_count - 1); + goto out_fail; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -1755,10 +1824,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; -del_dead_ctrl: - /* Deleting this dead controller... */ +out_fail: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); - WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work)); + nvme_rdma_remove_ctrl(ctrl); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { @@ -1774,62 +1842,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .get_address = nvmf_get_address, }; -static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) -{ - int ret; - - ret = nvme_rdma_init_io_queues(ctrl); - if (ret) - return ret; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - ret = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) - goto out_free_io_queues; - - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_rdma_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); - if (ret) - goto out_put_dev; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto out_cleanup_connect_q; - - return 0; - -out_cleanup_connect_q: - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); -out_free_io_queues: - nvme_rdma_free_io_queues(ctrl); - return ret; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -1887,7 +1899,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (!ctrl->queues) goto out_uninit_ctrl; - ret = nvme_rdma_configure_admin_queue(ctrl); + ret = nvme_rdma_configure_admin_queue(ctrl, true); if (ret) goto out_kfree_queues; @@ -1922,7 +1934,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, } if (opts->nr_io_queues) { - ret = nvme_rdma_create_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, true); if (ret) goto out_remove_admin_queue; } @@ -1944,7 +1956,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_destroy_admin_queue(ctrl, true); out_kfree_queues: kfree(ctrl->queues); out_uninit_ctrl: |