From 766639cab01ed84968524ce6ad25b03ba6b15c1a Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:08:49 -0700 Subject: scsi: qedf: Add check for offload before flushing I/Os for target We need to check that a fcport is offloaded before we try to flush any requests. No doing so could lead to undefined results and most likely a crash. Fixes the oops: [ 343.971886] [0000:42:00.3]:[qedf_execute_tmf:2070]:8: wait for tm_cmpl timeout! [ 343.971933] BUG: unable to handle kernel paging request at 00000000000024a8 [ 343.971949] IP: [] qedf_flush_active_ios+0x46/0x260 [qedf] [ 343.971952] PGD 42c569067 PUD 4160fe067 PMD 0 [ 343.971954] Oops: 0000 [#1] SMP [ 343.972008] Modules linked in: qedf(OEX) qed(OEX) bnx2i cnic fuse af_packet iscsi_ibft msr xfs intel_rapl sb_edac edac_core x86_pkg_temp_thermal bnx2x geneve intel_powerclamp vxlan coretemp ipmi_ssif ipmi_devintf kvm_intel kvm libiscsi joydev irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel tg3 ip6_udp_tunnel udp_tunnel mdio libcrc32c iTCO_wdt scsi_transport_iscsi uio drbg iTCO_vendor_support iscsi_boot_sysfs dcdbas(X) ipmi_si ansi_cprng aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper ptp pps_core pcspkr libphy lpc_ich mfd_core cryptd fjes wmi ipmi_msghandler button crc8 libfcoe libfc scsi_transport_fc mei_me mei shpchp processor acpi_pad btrfs xor hid_generic usbhid raid6_pq sd_mod sr_mod cdrom mgag200 crc32c_intel i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt [ 343.972020] fb_sys_fops ttm ahci ehci_pci libahci ehci_hcd drm libata usbcore megaraid_sas usb_common sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 [last unloaded: qedf] [ 343.972022] Supported: Yes, External [ 343.972026] CPU: 30 PID: 12777 Comm: sg_reset Tainted: G W OE X 4.4.73-5-default #1 [ 343.972027] Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.1.3 11/20/2013 [ 343.972029] task: ffff88018dfc0e80 ti: ffff88042bd7c000 task.ti: ffff88042bd7c000 [ 343.972036] RIP: 0010:[] [] qedf_flush_active_ios+0x46/0x260 [qedf] [ 343.972038] RSP: 0018:ffff88042bd7fbe0 EFLAGS: 00010286 [ 343.972039] RAX: 0000000000000000 RBX: ffff88042ce37800 RCX: 0000000000000400 [ 343.972040] RDX: 000000000000060e RSI: ffffffffa06be830 RDI: ffff8807e5072cc0 [ 343.972041] RBP: 0000000000001000 R08: ffffffffa06bff4d R09: ffff88018dd84580 [ 343.972042] R10: 000000000000018b R11: 0000000000000002 R12: 0000000000002003 [ 343.972043] R13: 0000000000000000 R14: 0000000000000000 R15: ffff8807e5072cc0 [ 343.972046] FS: 00007fc1c8809700(0000) GS:ffff88042fbc0000(0000) knlGS:0000000000000000 [ 343.972048] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 343.972049] CR2: 00000000000024a8 CR3: 00000004236ec000 CR4: 00000000001406e0 [ 343.972050] Stack: [ 343.972053] 504c78750607e154 ffffffff810a7d10 ffff88042ce37800 0000000000000010 [ 343.972055] 0000000000002003 ffff8807ff480c48 ffff8807e5072cc0 ffffc90004ec4ff8 [ 343.972057] ffffffffa06b9b86 ffff880800000010 0000000000000282 ffff88042ce37800 [ 343.972058] Call Trace: [ 343.972094] [] qedf_initiate_tmf+0x346/0x3e0 [qedf] [ 343.972120] [] scsi_try_bus_device_reset+0x26/0x40 [scsi_mod] [ 343.972133] [] scsi_ioctl_reset+0x13e/0x260 [scsi_mod] [ 343.972145] [] scsi_ioctl+0x136/0x3d0 [scsi_mod] [ 343.972154] [] blkdev_ioctl+0x6bb/0x950 [ 343.972164] [] block_ioctl+0x3d/0x40 [ 343.972170] [] do_vfs_ioctl+0x2cd/0x4a0 [ 343.972186] [] SyS_ioctl+0x74/0x80 [ 343.972193] [] entry_SYSCALL_64_fastpath+0x12/0x6d [ 343.975285] DWARF2 unwinder stuck at entry_SYSCALL_64_fastpath+0x12/0x6d Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/scsi/qedf/qedf_io.c') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 50a50c4249d0..94d6455c9ddc 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1414,6 +1414,12 @@ void qedf_flush_active_ios(struct qedf_rport *fcport, int lun) if (!fcport) return; + /* Check that fcport is still offloaded */ + if (!test_bit(QEDF_RPORT_SESSION_READY, &fcport->flags)) { + QEDF_ERR(NULL, "fcport is no longer offloaded.\n"); + return; + } + qedf = fcport->qedf; cmd_mgr = qedf->cmd_mgr; -- cgit v1.2.3 From a8f192bce17430106157100746699bfabbd6e892 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:08:54 -0700 Subject: scsi: qedf: Return request as DID_NO_CONNECT if MSI-X is not enabled Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/scsi/qedf/qedf_io.c') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 94d6455c9ddc..78cbd126c7e5 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -931,6 +931,15 @@ qedf_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc_cmd) return 0; } + if (!qedf->pdev->msix_enabled) { + QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, + "Completing sc_cmd=%p DID_NO_CONNECT as MSI-X is not enabled.\n", + sc_cmd); + sc_cmd->result = DID_NO_CONNECT << 16; + sc_cmd->scsi_done(sc_cmd); + return 0; + } + rval = fc_remote_port_chkready(rport); if (rval) { sc_cmd->result = rval; -- cgit v1.2.3 From 8025c84208ef53a8a57f04d3d44a091e272510d3 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:08:56 -0700 Subject: scsi: qedf: Add task id to kref_get_unless_zero() debug messages when flushing requests Helps to corroborate which requests we can't get reference on and if it's real bug or not. Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/scsi/qedf/qedf_io.c') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 78cbd126c7e5..589414f06376 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1445,8 +1445,8 @@ void qedf_flush_active_ios(struct qedf_rport *fcport, int lun) rc = kref_get_unless_zero(&io_req->refcount); if (!rc) { QEDF_ERR(&(qedf->dbg_ctx), - "Could not get kref for io_req=0x%p.\n", - io_req); + "Could not get kref for ELS io_req=0x%p xid=0x%x.\n", + io_req, io_req->xid); continue; } qedf_flush_els_req(qedf, io_req); @@ -1472,7 +1472,7 @@ void qedf_flush_active_ios(struct qedf_rport *fcport, int lun) rc = kref_get_unless_zero(&io_req->refcount); if (!rc) { QEDF_ERR(&(qedf->dbg_ctx), "Could not get kref for " - "io_req=0x%p\n", io_req); + "io_req=0x%p xid=0x%x\n", io_req, io_req->xid); continue; } QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, -- cgit v1.2.3 From 92bbccdf72ae7f605dbbae050e92dda8971c592d Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:09:01 -0700 Subject: scsi: qedf: Add additional checks when restarting an rport due to ABTS timeout There are a couple of kernel cases when we restart a remote port due to ABTS timeout that we need to handle: 1. Flush any outstanding ABTS requests when flushing I/Os so that we do not hold up the eh_abort handler indefinitely causing process hangs. 2. Check if we are currently uploading a connection before issuing an ABTS. Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'drivers/scsi/qedf/qedf_io.c') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 589414f06376..f669df03f37d 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1457,6 +1457,31 @@ void qedf_flush_active_ios(struct qedf_rport *fcport, int lun) goto free_cmd; } + if (io_req->cmd_type == QEDF_ABTS) { + rc = kref_get_unless_zero(&io_req->refcount); + if (!rc) { + QEDF_ERR(&(qedf->dbg_ctx), + "Could not get kref for abort io_req=0x%p xid=0x%x.\n", + io_req, io_req->xid); + continue; + } + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_IO, + "Flushing abort xid=0x%x.\n", io_req->xid); + + clear_bit(QEDF_CMD_IN_ABORT, &io_req->flags); + + if (io_req->sc_cmd) { + if (io_req->return_scsi_cmd_on_abts) + qedf_scsi_done(qedf, io_req, DID_ERROR); + } + + /* Notify eh_abort handler that ABTS is complete */ + complete(&io_req->abts_done); + kref_put(&io_req->refcount, qedf_release_cmd); + + goto free_cmd; + } + if (!io_req->sc_cmd) continue; if (lun > 0) { @@ -1534,6 +1559,11 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) goto abts_err; } + if (test_bit(QEDF_RPORT_UPLOADING_CONNECTION, &fcport->flags)) { + QEDF_ERR(&qedf->dbg_ctx, "fcport is uploading.\n"); + rc = 1; + goto out; + } kref_get(&io_req->refcount); @@ -1573,6 +1603,7 @@ abts_err: * task at the firmware. */ qedf_initiate_cleanup(io_req, return_scsi_cmd_on_abts); +out: return rc; } -- cgit v1.2.3 From f3690a89f918031a5eed17da78af51c329efe93a Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:09:03 -0700 Subject: scsi: qedf: Add more defensive checks for concurrent error conditions During an uplink toggle test all error handling is done via timeout and firmware error conditions which can occur concurrently: - SCSI layer timeouts - Error detect CQEs - Firmware detected underruns - ABTS timeouts All these concurrent events require more defensive checks in the driver including: - Check both internally and externally generated aborts to make sure the xid is not already been aborted in another context or in cleanup. - Check back pointers in qedf_cmd_timeout to verify the context of the io_req, fcport and qedf_ctx - Check rport state in host reset handler to not reset the whole host if the rport is already uploaded or in the process of relogin - Check to state for an fcport before initiating a middle path ELS request Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'drivers/scsi/qedf/qedf_io.c') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index f669df03f37d..07925f8e65bf 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -23,12 +23,31 @@ static void qedf_cmd_timeout(struct work_struct *work) struct qedf_ioreq *io_req = container_of(work, struct qedf_ioreq, timeout_work.work); - struct qedf_ctx *qedf = io_req->fcport->qedf; - struct qedf_rport *fcport = io_req->fcport; + struct qedf_ctx *qedf; + struct qedf_rport *fcport; u8 op = 0; + if (io_req == NULL) { + QEDF_INFO(NULL, QEDF_LOG_IO, "io_req is NULL.\n"); + return; + } + + fcport = io_req->fcport; + if (io_req->fcport == NULL) { + QEDF_INFO(NULL, QEDF_LOG_IO, "fcport is NULL.\n"); + return; + } + + qedf = fcport->qedf; + switch (io_req->cmd_type) { case QEDF_ABTS: + if (qedf == NULL) { + QEDF_INFO(NULL, QEDF_LOG_IO, "qedf is NULL for xid=0x%x.\n", + io_req->xid); + return; + } + QEDF_ERR((&qedf->dbg_ctx), "ABTS timeout, xid=0x%x.\n", io_req->xid); /* Cleanup timed out ABTS */ @@ -1565,6 +1584,16 @@ int qedf_initiate_abts(struct qedf_ioreq *io_req, bool return_scsi_cmd_on_abts) goto out; } + if (!test_bit(QEDF_CMD_OUTSTANDING, &io_req->flags) || + test_bit(QEDF_CMD_IN_CLEANUP, &io_req->flags) || + test_bit(QEDF_CMD_IN_ABORT, &io_req->flags)) { + QEDF_ERR(&(qedf->dbg_ctx), "io_req xid=0x%x already in " + "cleanup or abort processing or already " + "completed.\n", io_req->xid); + rc = 1; + goto out; + } + kref_get(&io_req->refcount); xid = io_req->xid; -- cgit v1.2.3 From 5d1c8b5ba074a45abd545d3e1087b3864931f341 Mon Sep 17 00:00:00 2001 From: Chad Dupuis Date: Wed, 25 Apr 2018 06:09:04 -0700 Subject: scsi: qedf: Update copyright for 2018 Signed-off-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/scsi/qedf/qedf_io.c') diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 07925f8e65bf..690df97432d2 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1,6 +1,6 @@ /* * QLogic FCoE Offload Driver - * Copyright (c) 2016-2017 Cavium Inc. + * Copyright (c) 2016-2018 Cavium Inc. * * This software is available under the terms of the GNU General Public License * (GPL) Version 2, available from the file COPYING in the main directory of -- cgit v1.2.3