From 42e33148df38c60b99d984b76b302c64397ebe4c Mon Sep 17 00:00:00 2001 From: "James.Smart@Emulex.Com" Date: Thu, 15 Dec 2005 09:56:22 -0500 Subject: [SCSI] fix for fc transport recursion problem. In the scenario that a link was broken, the devloss timer for each rport was expire at roughly the same time, causing lots of "delete" workqueue items being queued. Depth is dependent upon the number of rports that were on the link. The rport target remove calls were calling flush_scheduled_work(), which would interrupt the stream, and start the next workqueue item, which did the same thing, and so on until recursion depth was large. This fix stops the recursion in the initial delete path, and pushes it off to a host-level work item that reaps the dead rports. Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_fc.c | 59 +++++++++++++++++++++++++++++++++++++--- include/scsi/scsi_transport_fc.h | 11 ++++++++ 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 6cd5931d9a54..2a1a99a2ef56 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -105,6 +105,7 @@ static struct { { FC_PORTSTATE_LINKDOWN, "Linkdown" }, { FC_PORTSTATE_ERROR, "Error" }, { FC_PORTSTATE_LOOPBACK, "Loopback" }, + { FC_PORTSTATE_DELETED, "Deleted" }, }; fc_enum_name_search(port_state, fc_port_state, fc_port_state_names) #define FC_PORTSTATE_MAX_NAMELEN 20 @@ -211,6 +212,7 @@ fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names) #define FC_MGMTSRVR_PORTID 0x00000a +static void fc_shost_remove_rports(void *data); static void fc_timeout_deleted_rport(void *data); static void fc_scsi_scan_rport(void *data); static void fc_rport_terminate(struct fc_rport *rport); @@ -318,6 +320,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev, fc_host_next_rport_number(shost) = 0; fc_host_next_target_id(shost) = 0; + fc_host_flags(shost) = 0; + INIT_WORK(&fc_host_rport_del_work(shost), fc_shost_remove_rports, shost); return 0; } @@ -387,6 +391,7 @@ show_fc_rport_##field (struct class_device *cdev, char *buf) \ struct fc_internal *i = to_fc_internal(shost->transportt); \ if ((i->f->get_rport_##field) && \ !((rport->port_state == FC_PORTSTATE_BLOCKED) || \ + (rport->port_state == FC_PORTSTATE_DELETED) || \ (rport->port_state == FC_PORTSTATE_NOTPRESENT))) \ i->f->get_rport_##field(rport); \ return snprintf(buf, sz, format_string, cast rport->field); \ @@ -402,6 +407,7 @@ store_fc_rport_##field(struct class_device *cdev, const char *buf, \ struct Scsi_Host *shost = rport_to_shost(rport); \ struct fc_internal *i = to_fc_internal(shost->transportt); \ if ((rport->port_state == FC_PORTSTATE_BLOCKED) || \ + (rport->port_state == FC_PORTSTATE_DELETED) || \ (rport->port_state == FC_PORTSTATE_NOTPRESENT)) \ return -EBUSY; \ val = simple_strtoul(buf, NULL, 0); \ @@ -519,6 +525,7 @@ store_fc_rport_dev_loss_tmo(struct class_device *cdev, const char *buf, struct Scsi_Host *shost = rport_to_shost(rport); struct fc_internal *i = to_fc_internal(shost->transportt); if ((rport->port_state == FC_PORTSTATE_BLOCKED) || + (rport->port_state == FC_PORTSTATE_DELETED) || (rport->port_state == FC_PORTSTATE_NOTPRESENT)) return -EBUSY; val = simple_strtoul(buf, NULL, 0); @@ -1769,7 +1776,7 @@ fc_timeout_deleted_rport(void *data) rport->maxframe_size = -1; rport->supported_classes = FC_COS_UNSPECIFIED; rport->roles = FC_RPORT_ROLE_UNKNOWN; - rport->port_state = FC_PORTSTATE_NOTPRESENT; + rport->port_state = FC_PORTSTATE_DELETED; /* remove the identifiers that aren't used in the consisting binding */ switch (fc_host_tgtid_bind_type(shost)) { @@ -1789,14 +1796,23 @@ fc_timeout_deleted_rport(void *data) break; } - spin_unlock_irqrestore(shost->host_lock, flags); - /* * As this only occurs if the remote port (scsi target) * went away and didn't come back - we'll remove * all attached scsi devices. + * + * We'll schedule the shost work item to perform the actual removal + * to avoid recursion in the different flush calls if we perform + * the removal in each target - and there are lots of targets + * whose timeouts fire at the same time. */ - fc_rport_tgt_remove(rport); + + if ( !(fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED)) { + fc_host_flags(shost) |= FC_SHOST_RPORT_DEL_SCHEDULED; + scsi_queue_work(shost, &fc_host_rport_del_work(shost)); + } + + spin_unlock_irqrestore(shost->host_lock, flags); } /** @@ -1818,6 +1834,41 @@ fc_scsi_scan_rport(void *data) } +/** + * fc_shost_remove_rports - called to remove all rports that are marked + * as in a deleted (not connected) state. + * + * @data: shost whose rports are to be looked at + **/ +static void +fc_shost_remove_rports(void *data) +{ + struct Scsi_Host *shost = (struct Scsi_Host *)data; + struct fc_rport *rport, *next_rport; + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + while (fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED) { + + fc_host_flags(shost) &= ~FC_SHOST_RPORT_DEL_SCHEDULED; + +restart_search: + list_for_each_entry_safe(rport, next_rport, + &fc_host_rport_bindings(shost), peers) { + if (rport->port_state == FC_PORTSTATE_DELETED) { + rport->port_state = FC_PORTSTATE_NOTPRESENT; + spin_unlock_irqrestore(shost->host_lock, flags); + fc_rport_tgt_remove(rport); + spin_lock_irqsave(shost->host_lock, flags); + goto restart_search; + } + } + + } + spin_unlock_irqrestore(shost->host_lock, flags); +} + + MODULE_AUTHOR("Martin Hicks"); MODULE_DESCRIPTION("FC Transport Attributes"); MODULE_LICENSE("GPL"); diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index fac547d32a98..394f14a5b7cb 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -79,6 +79,7 @@ enum fc_port_state { FC_PORTSTATE_LINKDOWN, FC_PORTSTATE_ERROR, FC_PORTSTATE_LOOPBACK, + FC_PORTSTATE_DELETED, }; @@ -325,8 +326,14 @@ struct fc_host_attrs { struct list_head rport_bindings; u32 next_rport_number; u32 next_target_id; + u8 flags; + struct work_struct rport_del_work; }; +/* values for struct fc_host_attrs "flags" field: */ +#define FC_SHOST_RPORT_DEL_SCHEDULED 0x01 + + #define fc_host_node_name(x) \ (((struct fc_host_attrs *)(x)->shost_data)->node_name) #define fc_host_port_name(x) \ @@ -365,6 +372,10 @@ struct fc_host_attrs { (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) #define fc_host_next_target_id(x) \ (((struct fc_host_attrs *)(x)->shost_data)->next_target_id) +#define fc_host_flags(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->flags) +#define fc_host_rport_del_work(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->rport_del_work) /* The functions by which the transport class and the driver communicate */ -- cgit v1.2.3 From 863a930a40eb7f2d18534c2c166b22582f5c6cfd Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 15 Dec 2005 20:01:43 -0800 Subject: [SCSI] fix scsi_reap_target() device_del from atomic context scsi_reap_target() was desgined to be called from any context. However it must do a device_del() of the target device, which may only be called from user context. Thus we have to reimplement scsi_reap_target() via a workqueue. Signed-off-by: James Bottomley --- drivers/scsi/scsi_scan.c | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 94e5167f260d..e36c21e06d31 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -400,6 +400,35 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, return found_target; } +struct work_queue_wrapper { + struct work_struct work; + struct scsi_target *starget; +}; + +static void scsi_target_reap_work(void *data) { + struct work_queue_wrapper *wqw = (struct work_queue_wrapper *)data; + struct scsi_target *starget = wqw->starget; + struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); + unsigned long flags; + + kfree(wqw); + + spin_lock_irqsave(shost->host_lock, flags); + + if (--starget->reap_ref == 0 && list_empty(&starget->devices)) { + list_del_init(&starget->siblings); + spin_unlock_irqrestore(shost->host_lock, flags); + device_del(&starget->dev); + transport_unregister_device(&starget->dev); + put_device(&starget->dev); + return; + + } + spin_unlock_irqrestore(shost->host_lock, flags); + + return; +} + /** * scsi_target_reap - check to see if target is in use and destroy if not * @@ -411,19 +440,18 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, */ void scsi_target_reap(struct scsi_target *starget) { - struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); - unsigned long flags; - spin_lock_irqsave(shost->host_lock, flags); + struct work_queue_wrapper *wqw = + kzalloc(sizeof(struct work_queue_wrapper), GFP_ATOMIC); - if (--starget->reap_ref == 0 && list_empty(&starget->devices)) { - list_del_init(&starget->siblings); - spin_unlock_irqrestore(shost->host_lock, flags); - device_del(&starget->dev); - transport_unregister_device(&starget->dev); - put_device(&starget->dev); + if (!wqw) { + starget_printk(KERN_ERR, starget, + "Failed to allocate memory in scsi_reap_target()\n"); return; } - spin_unlock_irqrestore(shost->host_lock, flags); + + INIT_WORK(&wqw->work, scsi_target_reap_work, wqw); + wqw->starget = starget; + schedule_work(&wqw->work); } /** -- cgit v1.2.3