From 82654b6b8ef8b93ee87a97fc562f87f081fc2f91 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 2 Jun 2017 16:32:08 +0800 Subject: nvme: fix hang in remove path We need to start admin queues too in nvme_kill_queues() for avoiding hang in remove path[1]. This patch is very similar with 806f026f9b901eaf(nvme: use blk_mq_start_hw_queues() in nvme_kill_queues()). [1] hang stack trace [] blk_execute_rq+0x56/0x80 [] __nvme_submit_sync_cmd+0x89/0xf0 [] nvme_set_features+0x5e/0x90 [] nvme_configure_apst+0x166/0x200 [] nvme_set_latency_tolerance+0x35/0x50 [] apply_constraint+0xb1/0xc0 [] dev_pm_qos_constraints_destroy+0xf4/0x1f0 [] dpm_sysfs_remove+0x2a/0x60 [] device_del+0x101/0x320 [] device_unregister+0x1a/0x60 [] device_destroy+0x3c/0x50 [] nvme_uninit_ctrl+0x45/0xa0 [] nvme_remove+0x78/0x110 [] pci_device_remove+0x39/0xb0 [] device_release_driver_internal+0x155/0x210 [] device_release_driver+0x12/0x20 [] nvme_remove_dead_ctrl_work+0x6b/0x70 [] process_one_work+0x18c/0x3a0 [] worker_thread+0x4e/0x3b0 [] kthread+0x109/0x140 [] ret_from_fork+0x2c/0x40 [] 0xffffffffffffffff Fixes: c5552fde102fc("nvme: Enable autonomous power state transitions") Reported-by: Rakesh Pandit Tested-by: Rakesh Pandit Reviewed-by: Sagi Grimberg Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a60926410438..0f9cc0c55e15 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2438,6 +2438,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) struct nvme_ns *ns; mutex_lock(&ctrl->namespaces_mutex); + + /* Forcibly start all queues to avoid having stuck requests */ + blk_mq_start_hw_queues(ctrl->admin_q); + list_for_each_entry(ns, &ctrl->namespaces, list) { /* * Revalidating a dead namespace sets capacity to 0. This will -- cgit v1.2.3 From da87591bea92204fcb921bac927666eb7141908e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 7 Jun 2017 15:25:42 +0800 Subject: nvme: only consider exit latency when choosing useful non-op power states When a NVMe is in non-op states, the latency is exlat. The latency will be enlat + exlat only when the NVMe tries to transit from operational state right atfer it begins to transit to non-operational state, which should be a rare case. Therefore, as Andy Lutomirski suggests, use exlat only when deciding power states to trainsit to. Signed-off-by: Kai-Heng Feng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0f9cc0c55e15..c07d8d4e18c9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power * non-operational state after waiting 50 * (enlat + exlat) - * microseconds, as long as that state's total latency is under + * microseconds, as long as that state's exit latency is under * the requested maximum latency. * * We will not autonomously enter any non-operational state for @@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * lowest-power state, not the number of states. */ for (state = (int)ctrl->npss; state >= 0; state--) { - u64 total_latency_us, transition_ms; + u64 total_latency_us, exit_latency_us, transition_ms; if (target) table->entries[state] = target; @@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) NVME_PS_FLAGS_NON_OP_STATE)) continue; - total_latency_us = - (u64)le32_to_cpu(ctrl->psd[state].entry_lat) + - + le32_to_cpu(ctrl->psd[state].exit_lat); - if (total_latency_us > ctrl->ps_max_latency_us) + exit_latency_us = + (u64)le32_to_cpu(ctrl->psd[state].exit_lat); + if (exit_latency_us > ctrl->ps_max_latency_us) continue; + total_latency_us = + exit_latency_us + + le32_to_cpu(ctrl->psd[state].entry_lat); + /* * This state is good. Use it as the APST idle * target for higher power states. -- cgit v1.2.3 From 9947d6a09cd71937dade2fc14640e4843ae19802 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 7 Jun 2017 15:25:43 +0800 Subject: nvme: relax APST default max latency to 100ms Christoph Hellwig suggests we should to make APST work out of the box. Hence relax the the default max latency to make them able to enter deepest power state on default. Here are id-ctrl excerpts from two high latency NVMes: vid : 0x14a4 ssvid : 0x1b4b mn : CX2-GB1024-Q11 NVMe LITEON 1024GB ps 3 : mp:0.1000W non-operational enlat:5000 exlat:5000 rrt:3 rrl:3 rwt:3 rwl:3 idle_power:- active_power:- ps 4 : mp:0.0100W non-operational enlat:50000 exlat:100000 rrt:4 rrl:4 rwt:4 rwl:4 idle_power:- active_power:- vid : 0x15b7 ssvid : 0x1b4b mn : A400 NVMe SanDisk 512GB ps 3 : mp:0.0500W non-operational enlat:51000 exlat:10000 rrt:0 rrl:0 rwt:0 rwl:0 idle_power:- active_power:- ps 4 : mp:0.0055W non-operational enlat:1000000 exlat:100000 rrt:0 rrl:0 rwt:0 rwl:0 idle_power:- active_power:- Signed-off-by: Kai-Heng Feng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/host/core.c') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c07d8d4e18c9..903d5813023a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); static int nvme_char_major; module_param(nvme_char_major, int, 0); -static unsigned long default_ps_max_latency_us = 25000; +static unsigned long default_ps_max_latency_us = 100000; module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); -- cgit v1.2.3