diff options
author | Kashyap, Desai <kashyap.desai@lsi.com> | 2011-06-14 10:56:43 +0530 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2011-06-29 16:15:51 -0500 |
commit | f93213de5c28d4aeda51b3c03daf7e27f6dd2b7a (patch) | |
tree | 424e6e1e95065387f38b8160febf8866c72c549a /drivers/scsi/mpt2sas/mpt2sas_scsih.c | |
parent | a3e1e55e4b5ca4d49618c592b4d10814e21e12a7 (diff) |
[SCSI] mpt2sas: fix broadcast AEN and task management issue
Properly handling of target reset in multi-initiator environment
Clean up in broadcast change handling:
(1) Need to look at the status of each task management request, and retry
the TM when there are failures.
(2) Need quiescence IO so the driver doesn't take on more IO request while
it's in the middle of sending TM request to firmware
(3) Add support to keep track of how many pending broadcast AEN events
are received while the broadcast handling is active, then loop back at
the end of this routine if there were any events received.
Clean up in mpt2sas_scsih_issue_tm routine:
(1) Make sure proper status is returned when host reset fails
(2) Clean up sanity checks near end of routine, insuring all outstanding
IOs were completed.
Signed-off-by: Kashyap Desai <kashyap.desai@lsi.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi/mpt2sas/mpt2sas_scsih.c')
-rw-r--r-- | drivers/scsi/mpt2sas/mpt2sas_scsih.c | 241 |
1 files changed, 188 insertions, 53 deletions
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c index f86f2b1418e7..c20f590c77b3 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c +++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c @@ -2240,6 +2240,8 @@ mpt2sas_scsih_clear_tm_flag(struct MPT2SAS_ADAPTER *ioc, u16 handle) * @type: MPI2_SCSITASKMGMT_TASKTYPE__XXX (defined in mpi2_init.h) * @smid_task: smid assigned to the task * @timeout: timeout in seconds + * @serial_number: the serial_number from scmd + * @m_type: TM_MUTEX_ON or TM_MUTEX_OFF * Context: user * * A generic API for sending task management requests to firmware. @@ -2251,17 +2253,18 @@ mpt2sas_scsih_clear_tm_flag(struct MPT2SAS_ADAPTER *ioc, u16 handle) int mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, uint id, uint lun, u8 type, u16 smid_task, ulong timeout, - struct scsi_cmnd *scmd) + unsigned long serial_number, enum mutex_type m_type) { Mpi2SCSITaskManagementRequest_t *mpi_request; Mpi2SCSITaskManagementReply_t *mpi_reply; u16 smid = 0; u32 ioc_state; unsigned long timeleft; - struct scsi_cmnd *scmd_lookup; + struct scsiio_tracker *scsi_lookup = NULL; int rc; - mutex_lock(&ioc->tm_cmds.mutex); + if (m_type == TM_MUTEX_ON) + mutex_lock(&ioc->tm_cmds.mutex); if (ioc->tm_cmds.status != MPT2_CMD_NOT_USED) { printk(MPT2SAS_INFO_FMT "%s: tm_cmd busy!!!\n", __func__, ioc->name); @@ -2281,18 +2284,18 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, if (ioc_state & MPI2_DOORBELL_USED) { dhsprintk(ioc, printk(MPT2SAS_INFO_FMT "unexpected doorbell " "active!\n", ioc->name)); - mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, + rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); - rc = SUCCESS; + rc = (!rc) ? SUCCESS : FAILED; goto err_out; } if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { mpt2sas_base_fault_info(ioc, ioc_state & MPI2_DOORBELL_DATA_MASK); - mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, + rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); - rc = SUCCESS; + rc = (!rc) ? SUCCESS : FAILED; goto err_out; } @@ -2304,6 +2307,9 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, goto err_out; } + if (type == MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK) + scsi_lookup = &ioc->scsi_lookup[smid_task - 1]; + dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "sending tm: handle(0x%04x)," " task_type(0x%02x), smid(%d)\n", ioc->name, handle, type, smid_task)); @@ -2311,6 +2317,7 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, mpi_request = mpt2sas_base_get_msg_frame(ioc, smid); ioc->tm_cmds.smid = smid; memset(mpi_request, 0, sizeof(Mpi2SCSITaskManagementRequest_t)); + memset(ioc->tm_cmds.reply, 0, sizeof(Mpi2SCSITaskManagementReply_t)); mpi_request->Function = MPI2_FUNCTION_SCSI_TASK_MGMT; mpi_request->DevHandle = cpu_to_le16(handle); mpi_request->TaskType = type; @@ -2326,9 +2333,9 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, _debug_dump_mf(mpi_request, sizeof(Mpi2SCSITaskManagementRequest_t)/4); if (!(ioc->tm_cmds.status & MPT2_CMD_RESET)) { - mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, + rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); - rc = SUCCESS; + rc = (!rc) ? SUCCESS : FAILED; ioc->tm_cmds.status = MPT2_CMD_NOT_USED; mpt2sas_scsih_clear_tm_flag(ioc, handle); goto err_out; @@ -2350,20 +2357,12 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, } } - /* sanity check: - * Check to see the commands were terminated. - * This is only needed for eh callbacks, hence the scmd check. - */ - rc = FAILED; - if (scmd == NULL) - goto bypass_sanity_checks; switch (type) { case MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK: - scmd_lookup = _scsih_scsi_lookup_get(ioc, smid_task); - if (scmd_lookup) - rc = FAILED; - else - rc = SUCCESS; + rc = SUCCESS; + if (scsi_lookup->scmd == NULL) + break; + rc = FAILED; break; case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET: @@ -2373,24 +2372,31 @@ mpt2sas_scsih_issue_tm(struct MPT2SAS_ADAPTER *ioc, u16 handle, uint channel, rc = SUCCESS; break; + case MPI2_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET: case MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET: if (_scsih_scsi_lookup_find_by_lun(ioc, id, lun, channel)) rc = FAILED; else rc = SUCCESS; break; + case MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK: + rc = SUCCESS; + break; + default: + rc = FAILED; + break; } - bypass_sanity_checks: - mpt2sas_scsih_clear_tm_flag(ioc, handle); ioc->tm_cmds.status = MPT2_CMD_NOT_USED; - mutex_unlock(&ioc->tm_cmds.mutex); + if (m_type == TM_MUTEX_ON) + mutex_unlock(&ioc->tm_cmds.mutex); return rc; err_out: - mutex_unlock(&ioc->tm_cmds.mutex); + if (m_type == TM_MUTEX_ON) + mutex_unlock(&ioc->tm_cmds.mutex); return rc; } @@ -2500,7 +2506,8 @@ _scsih_abort(struct scsi_cmnd *scmd) handle = sas_device_priv_data->sas_target->handle; r = mpt2sas_scsih_issue_tm(ioc, handle, scmd->device->channel, scmd->device->id, scmd->device->lun, - MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30, scmd); + MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30, + scmd->serial_number, TM_MUTEX_ON); out: sdev_printk(KERN_INFO, scmd->device, "task abort: %s scmd(%p)\n", @@ -2561,7 +2568,8 @@ _scsih_dev_reset(struct scsi_cmnd *scmd) r = mpt2sas_scsih_issue_tm(ioc, handle, scmd->device->channel, scmd->device->id, scmd->device->lun, - MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 30, scmd); + MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET, 0, 30, 0, + TM_MUTEX_ON); out: sdev_printk(KERN_INFO, scmd->device, "device reset: %s scmd(%p)\n", @@ -2621,7 +2629,7 @@ _scsih_target_reset(struct scsi_cmnd *scmd) r = mpt2sas_scsih_issue_tm(ioc, handle, scmd->device->channel, scmd->device->id, 0, MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET, 0, - 30, scmd); + 30, 0, TM_MUTEX_ON); out: starget_printk(KERN_INFO, starget, "target reset: %s scmd(%p)\n", @@ -2754,6 +2762,31 @@ _scsih_fw_event_cleanup_queue(struct MPT2SAS_ADAPTER *ioc) } /** + * _scsih_ublock_io_all_device - unblock every device + * @ioc: per adapter object + * + * change the device state from block to running + */ +static void +_scsih_ublock_io_all_device(struct MPT2SAS_ADAPTER *ioc) +{ + struct MPT2SAS_DEVICE *sas_device_priv_data; + struct scsi_device *sdev; + + shost_for_each_device(sdev, ioc->shost) { + sas_device_priv_data = sdev->hostdata; + if (!sas_device_priv_data) + continue; + if (!sas_device_priv_data->block) + continue; + sas_device_priv_data->block = 0; + dewtprintk(ioc, sdev_printk(KERN_INFO, sdev, "device_running, " + "handle(0x%04x)\n", + sas_device_priv_data->sas_target->handle)); + scsi_internal_device_unblock(sdev); + } +} +/** * _scsih_ublock_io_device - set the device state to SDEV_RUNNING * @ioc: per adapter object * @handle: device handle @@ -2783,6 +2816,34 @@ _scsih_ublock_io_device(struct MPT2SAS_ADAPTER *ioc, u16 handle) } /** + * _scsih_block_io_all_device - set the device state to SDEV_BLOCK + * @ioc: per adapter object + * @handle: device handle + * + * During device pull we need to appropiately set the sdev state. + */ +static void +_scsih_block_io_all_device(struct MPT2SAS_ADAPTER *ioc) +{ + struct MPT2SAS_DEVICE *sas_device_priv_data; + struct scsi_device *sdev; + + shost_for_each_device(sdev, ioc->shost) { + sas_device_priv_data = sdev->hostdata; + if (!sas_device_priv_data) + continue; + if (sas_device_priv_data->block) + continue; + sas_device_priv_data->block = 1; + dewtprintk(ioc, sdev_printk(KERN_INFO, sdev, "device_blocked, " + "handle(0x%04x)\n", + sas_device_priv_data->sas_target->handle)); + scsi_internal_device_block(sdev); + } +} + + +/** * _scsih_block_io_device - set the device state to SDEV_BLOCK * @ioc: per adapter object * @handle: device handle @@ -5384,9 +5445,10 @@ _scsih_sas_device_status_change_event_debug(struct MPT2SAS_ADAPTER *ioc, break; } printk(MPT2SAS_INFO_FMT "device status change: (%s)\n" - "\thandle(0x%04x), sas address(0x%016llx)", ioc->name, - reason_str, le16_to_cpu(event_data->DevHandle), - (unsigned long long)le64_to_cpu(event_data->SASAddress)); + "\thandle(0x%04x), sas address(0x%016llx), tag(%d)", + ioc->name, reason_str, le16_to_cpu(event_data->DevHandle), + (unsigned long long)le64_to_cpu(event_data->SASAddress), + le16_to_cpu(event_data->TaskTag)); if (event_data->ReasonCode == MPI2_EVENT_SAS_DEV_STAT_RC_SMART_DATA) printk(MPT2SAS_INFO_FMT ", ASC(0x%x), ASCQ(0x%x)\n", ioc->name, event_data->ASC, event_data->ASCQ); @@ -5526,25 +5588,38 @@ _scsih_sas_broadcast_primative_event(struct MPT2SAS_ADAPTER *ioc, u32 termination_count; u32 query_count; Mpi2SCSITaskManagementReply_t *mpi_reply; -#ifdef CONFIG_SCSI_MPT2SAS_LOGGING Mpi2EventDataSasBroadcastPrimitive_t *event_data = fw_event->event_data; -#endif u16 ioc_status; unsigned long flags; int r; + u8 max_retries = 0; + u8 task_abort_retries; - dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "broadcast primitive: " - "phy number(%d), width(%d)\n", ioc->name, event_data->PhyNum, - event_data->PortWidth)); - dtmprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: enter\n", ioc->name, - __func__)); + mutex_lock(&ioc->tm_cmds.mutex); + dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: enter: phy number(%d), " + "width(%d)\n", ioc->name, __func__, event_data->PhyNum, + event_data->PortWidth)); + + _scsih_block_io_all_device(ioc); spin_lock_irqsave(&ioc->scsi_lookup_lock, flags); - ioc->broadcast_aen_busy = 0; + mpi_reply = ioc->tm_cmds.reply; +broadcast_aen_retry: + + /* sanity checks for retrying this loop */ + if (max_retries++ == 5) { + dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: giving up\n", + ioc->name, __func__)); + goto out; + } else if (max_retries > 1) + dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: %d retry\n", + ioc->name, __func__, max_retries - 1)); + termination_count = 0; query_count = 0; - mpi_reply = ioc->tm_cmds.reply; for (smid = 1; smid <= ioc->scsiio_depth; smid++) { + if (ioc->ioc_reset_in_progress_status) + goto out; scmd = _scsih_scsi_lookup_get(ioc, smid); if (!scmd) continue; @@ -5565,34 +5640,90 @@ _scsih_sas_broadcast_primative_event(struct MPT2SAS_ADAPTER *ioc, lun = sas_device_priv_data->lun; query_count++; + if (ioc->ioc_reset_in_progress_status) + goto out; + spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags); - mpt2sas_scsih_issue_tm(ioc, handle, 0, 0, lun, - MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30, NULL); - ioc->tm_cmds.status = MPT2_CMD_NOT_USED; + r = mpt2sas_scsih_issue_tm(ioc, handle, 0, 0, lun, + MPI2_SCSITASKMGMT_TASKTYPE_QUERY_TASK, smid, 30, 0, + TM_MUTEX_OFF); + if (r == FAILED) { + sdev_printk(KERN_WARNING, sdev, + "mpt2sas_scsih_issue_tm: FAILED when sending " + "QUERY_TASK: scmd(%p)\n", scmd); + spin_lock_irqsave(&ioc->scsi_lookup_lock, flags); + goto broadcast_aen_retry; + } ioc_status = le16_to_cpu(mpi_reply->IOCStatus) & MPI2_IOCSTATUS_MASK; - if ((ioc_status == MPI2_IOCSTATUS_SUCCESS) && - (mpi_reply->ResponseCode == + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { + sdev_printk(KERN_WARNING, sdev, "query task: FAILED " + "with IOCSTATUS(0x%04x), scmd(%p)\n", ioc_status, + scmd); + spin_lock_irqsave(&ioc->scsi_lookup_lock, flags); + goto broadcast_aen_retry; + } + + /* see if IO is still owned by IOC and target */ + if (mpi_reply->ResponseCode == MPI2_SCSITASKMGMT_RSP_TM_SUCCEEDED || mpi_reply->ResponseCode == - MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC)) { + MPI2_SCSITASKMGMT_RSP_IO_QUEUED_ON_IOC) { spin_lock_irqsave(&ioc->scsi_lookup_lock, flags); continue; } + task_abort_retries = 0; + tm_retry: + if (task_abort_retries++ == 60) { + dewtprintk(ioc, printk(MPT2SAS_INFO_FMT + "%s: ABORT_TASK: giving up\n", ioc->name, + __func__)); + spin_lock_irqsave(&ioc->scsi_lookup_lock, flags); + goto broadcast_aen_retry; + } + + if (ioc->ioc_reset_in_progress_status) + goto out_no_lock; + r = mpt2sas_scsih_issue_tm(ioc, handle, sdev->channel, sdev->id, sdev->lun, MPI2_SCSITASKMGMT_TASKTYPE_ABORT_TASK, smid, 30, - scmd); - if (r == FAILED) - sdev_printk(KERN_WARNING, sdev, "task abort: FAILED " + scmd->serial_number, TM_MUTEX_OFF); + if (r == FAILED) { + sdev_printk(KERN_WARNING, sdev, + "mpt2sas_scsih_issue_tm: ABORT_TASK: FAILED : " "scmd(%p)\n", scmd); + goto tm_retry; + } + + if (task_abort_retries > 1) + sdev_printk(KERN_WARNING, sdev, + "mpt2sas_scsih_issue_tm: ABORT_TASK: RETRIES (%d):" + " scmd(%p)\n", + task_abort_retries - 1, scmd); + termination_count += le32_to_cpu(mpi_reply->TerminationCount); spin_lock_irqsave(&ioc->scsi_lookup_lock, flags); } + + if (ioc->broadcast_aen_pending) { + dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: loop back due to" + " pending AEN\n", ioc->name, __func__)); + ioc->broadcast_aen_pending = 0; + goto broadcast_aen_retry; + } + + out: spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags); + out_no_lock: - dtmprintk(ioc, printk(MPT2SAS_INFO_FMT + dewtprintk(ioc, printk(MPT2SAS_INFO_FMT "%s - exit, query_count = %d termination_count = %d\n", ioc->name, __func__, query_count, termination_count)); + + ioc->broadcast_aen_busy = 0; + if (!ioc->ioc_reset_in_progress_status) + _scsih_ublock_io_all_device(ioc); + mutex_unlock(&ioc->tm_cmds.mutex); } /** @@ -6866,10 +6997,14 @@ mpt2sas_scsih_event_callback(struct MPT2SAS_ADAPTER *ioc, u8 msix_index, mpi_reply->EventData; if (baen_data->Primitive != - MPI2_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT || - ioc->broadcast_aen_busy) + MPI2_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT) return 1; - ioc->broadcast_aen_busy = 1; + + if (ioc->broadcast_aen_busy) { + ioc->broadcast_aen_pending++; + return 1; + } else + ioc->broadcast_aen_busy = 1; break; } |