From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Apr 2011 13:32:46 +0200 Subject: block: get rid of QUEUE_FLAG_REENTER We are currently using this flag to check whether it's safe to call into ->request_fn(). If it is set, we punt to kblockd. But we get a lot of false positives and excessive punts to kblockd, which hurts performance. The only real abuser of this infrastructure is SCSI. So export the async queue run and convert SCSI over to use that. There's room for improvement in that SCSI need not always use the async call, but this fixes our performance issue and they can fix that up in due time. Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 17 +---------------- drivers/scsi/scsi_transport_fc.c | 19 ++++--------------- 2 files changed, 5 insertions(+), 31 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ab55c2fa7ce2..e9901b8f8443 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q) list_splice_init(&shost->starved_list, &starved_list); while (!list_empty(&starved_list)) { - int flagset; - /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn @@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q) continue; } - spin_unlock(shost->host_lock); - - spin_lock(sdev->request_queue->queue_lock); - flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && - !test_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags); - if (flagset) - queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); - __blk_run_queue(sdev->request_queue); - if (flagset) - queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); - spin_unlock(sdev->request_queue->queue_lock); - - spin_lock(shost->host_lock); + blk_run_queue_async(sdev->request_queue); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 28c33506e4ad..815069d13f9b 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3816,28 +3816,17 @@ fail_host_msg: static void fc_bsg_goose_queue(struct fc_rport *rport) { - int flagset; - unsigned long flags; - if (!rport->rqst_q) return; + /* + * This get/put dance makes no sense + */ get_device(&rport->dev); - - spin_lock_irqsave(rport->rqst_q->queue_lock, flags); - flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) && - !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); - if (flagset) - queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); - __blk_run_queue(rport->rqst_q); - if (flagset) - queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); - spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); - + blk_run_queue_async(rport->rqst_q); put_device(&rport->dev); } - /** * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD * @q: rport request queue -- cgit v1.2.3 From a1f74ae82d133ebb2aabb19d181944b4e83e9960 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Tue, 5 Apr 2011 12:45:59 -0400 Subject: [SCSI] mpt2sas: prevent heap overflows and unchecked reads At two points in handling device ioctls via /dev/mpt2ctl, user-supplied length values are used to copy data from userspace into heap buffers without bounds checking, allowing controllable heap corruption and subsequently privilege escalation. Additionally, user-supplied values are used to determine the size of a copy_to_user() as well as the offset into the buffer to be read, with no bounds checking, allowing users to read arbitrary kernel memory. Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Acked-by: Eric Moore Signed-off-by: James Bottomley --- drivers/scsi/mpt2sas/mpt2sas_ctl.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/mpt2sas/mpt2sas_ctl.c b/drivers/scsi/mpt2sas/mpt2sas_ctl.c index 1c6d2b405eef..d72f1f2b1392 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_ctl.c +++ b/drivers/scsi/mpt2sas/mpt2sas_ctl.c @@ -688,6 +688,13 @@ _ctl_do_mpt_command(struct MPT2SAS_ADAPTER *ioc, goto out; } + /* Check for overflow and wraparound */ + if (karg.data_sge_offset * 4 > ioc->request_sz || + karg.data_sge_offset > (UINT_MAX / 4)) { + ret = -EINVAL; + goto out; + } + /* copy in request message frame from user */ if (copy_from_user(mpi_request, mf, karg.data_sge_offset*4)) { printk(KERN_ERR "failure at %s:%d/%s()!\n", __FILE__, __LINE__, @@ -1963,7 +1970,7 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state) Mpi2DiagBufferPostReply_t *mpi_reply; int rc, i; u8 buffer_type; - unsigned long timeleft; + unsigned long timeleft, request_size, copy_size; u16 smid; u16 ioc_status; u8 issue_reset = 0; @@ -1999,6 +2006,8 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state) return -ENOMEM; } + request_size = ioc->diag_buffer_sz[buffer_type]; + if ((karg.starting_offset % 4) || (karg.bytes_to_read % 4)) { printk(MPT2SAS_ERR_FMT "%s: either the starting_offset " "or bytes_to_read are not 4 byte aligned\n", ioc->name, @@ -2006,13 +2015,23 @@ _ctl_diag_read_buffer(void __user *arg, enum block_state state) return -EINVAL; } + if (karg.starting_offset > request_size) + return -EINVAL; + diag_data = (void *)(request_data + karg.starting_offset); dctlprintk(ioc, printk(MPT2SAS_INFO_FMT "%s: diag_buffer(%p), " "offset(%d), sz(%d)\n", ioc->name, __func__, diag_data, karg.starting_offset, karg.bytes_to_read)); + /* Truncate data on requests that are too large */ + if ((diag_data + karg.bytes_to_read < diag_data) || + (diag_data + karg.bytes_to_read > request_data + request_size)) + copy_size = request_size - karg.starting_offset; + else + copy_size = karg.bytes_to_read; + if (copy_to_user((void __user *)uarg->diagnostic_data, - diag_data, karg.bytes_to_read)) { + diag_data, copy_size)) { printk(MPT2SAS_ERR_FMT "%s: Unable to write " "mpt_diag_read_buffer_t data @ %p\n", ioc->name, __func__, diag_data); -- cgit v1.2.3 From 0b8393578c70bc1f09790eeae7d918f38da2e010 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 8 Apr 2011 15:05:36 -0400 Subject: [SCSI] scsi_dh: fix reference counting in scsi_dh_activate error path Commit db422318cbca55168cf965f655471dbf8be82433 ([SCSI] scsi_dh: propagate SCSI device deletion) introduced a regression where the device reference is not dropped prior to scsi_dh_activate's early return from the error path. Signed-off-by: Mike Snitzer Cc: stable@kernel.org # 2.6.38 Reviewed-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/device_handler/scsi_dh.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c index 564e6ecd17c2..0119b8147797 100644 --- a/drivers/scsi/device_handler/scsi_dh.c +++ b/drivers/scsi/device_handler/scsi_dh.c @@ -394,12 +394,14 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) unsigned long flags; struct scsi_device *sdev; struct scsi_device_handler *scsi_dh = NULL; + struct device *dev = NULL; spin_lock_irqsave(q->queue_lock, flags); sdev = q->queuedata; if (sdev && sdev->scsi_dh_data) scsi_dh = sdev->scsi_dh_data->scsi_dh; - if (!scsi_dh || !get_device(&sdev->sdev_gendev) || + dev = get_device(&sdev->sdev_gendev); + if (!scsi_dh || !dev || sdev->sdev_state == SDEV_CANCEL || sdev->sdev_state == SDEV_DEL) err = SCSI_DH_NOSYS; @@ -410,12 +412,13 @@ int scsi_dh_activate(struct request_queue *q, activate_complete fn, void *data) if (err) { if (fn) fn(data, err); - return err; + goto out; } if (scsi_dh->activate) err = scsi_dh->activate(sdev, fn, data); - put_device(&sdev->sdev_gendev); +out: + put_device(dev); return err; } EXPORT_SYMBOL_GPL(scsi_dh_activate); -- cgit v1.2.3 From 86cbfb5607d4b81b1a993ff689bbd2addd5d3a9b Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 22 Apr 2011 10:39:59 -0500 Subject: [SCSI] put stricter guards on queue dead checks SCSI uses request_queue->queuedata == NULL as a signal that the queue is dying. We set this state in the sdev release function. However, this allows a small window where we release the last reference but haven't quite got to this stage yet and so something will try to take a reference in scsi_request_fn and oops. It's very rare, but we had a report here, so we're pushing this as a bug fix The actual fix is to set request_queue->queuedata to NULL in scsi_remove_device() before we drop the reference. This causes correct automatic rejects from scsi_request_fn as people who hold additional references try to submit work and prevents anything from getting a new reference to the sdev that way. Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/scsi_sysfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index e44ff64233fd..e63912510fb9 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -322,14 +322,8 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) kfree(evt); } - if (sdev->request_queue) { - sdev->request_queue->queuedata = NULL; - /* user context needed to free queue */ - scsi_free_queue(sdev->request_queue); - /* temporary expedient, try to catch use of queue lock - * after free of sdev */ - sdev->request_queue = NULL; - } + /* NULL queue means the device can't be used */ + sdev->request_queue = NULL; scsi_target_reap(scsi_target(sdev)); @@ -937,6 +931,12 @@ void __scsi_remove_device(struct scsi_device *sdev) if (sdev->host->hostt->slave_destroy) sdev->host->hostt->slave_destroy(sdev); transport_destroy_device(dev); + + /* cause the request function to reject all I/O requests */ + sdev->request_queue->queuedata = NULL; + + /* Freeing the queue signals to block that we're done */ + scsi_free_queue(sdev->request_queue); put_device(dev); } -- cgit v1.2.3 From 5f6279da3760ce48f478f2856aacebe0c59a39f3 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Tue, 5 Apr 2011 13:27:31 -0400 Subject: [SCSI] pmcraid: reject negative request size There's a code path in pmcraid that can be reached via device ioctl that causes all sorts of ugliness, including heap corruption or triggering the OOM killer due to consecutive allocation of large numbers of pages. Not especially relevant from a security perspective, since users must have CAP_SYS_ADMIN to open the character device. First, the user can call pmcraid_chr_ioctl() with a type PMCRAID_PASSTHROUGH_IOCTL. A pmcraid_passthrough_ioctl_buffer is copied in, and the request_size variable is set to buffer->ioarcb.data_transfer_length, which is an arbitrary 32-bit signed value provided by the user. If a negative value is provided here, bad things can happen. For example, pmcraid_build_passthrough_ioadls() is called with this request_size, which immediately calls pmcraid_alloc_sglist() with a negative size. The resulting math on allocating a scatter list can result in an overflow in the kzalloc() call (if num_elem is 0, the sglist will be smaller than expected), or if num_elem is unexpectedly large the subsequent loop will call alloc_pages() repeatedly, a high number of pages will be allocated and the OOM killer might be invoked. Prevent this value from being negative in pmcraid_ioctl_passthrough(). Signed-off-by: Dan Rosenberg Cc: stable@kernel.org Cc: Anil Ravindranath Signed-off-by: James Bottomley --- drivers/scsi/pmcraid.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 96d5ad0c1e42..7f636b118287 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -3814,6 +3814,9 @@ static long pmcraid_ioctl_passthrough( rc = -EFAULT; goto out_free_buffer; } + } else if (request_size < 0) { + rc = -EINVAL; + goto out_free_buffer; } /* check if we have any additional command parameters */ -- cgit v1.2.3 From c055f5b2614b4f758ae6cc86733f31fa4c2c5844 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 1 May 2011 09:42:07 -0500 Subject: [SCSI] fix oops in scsi_run_queue() The recent commit closing the race window in device teardown: commit 86cbfb5607d4b81b1a993ff689bbd2addd5d3a9b Author: James Bottomley Date: Fri Apr 22 10:39:59 2011 -0500 [SCSI] put stricter guards on queue dead checks is causing a potential NULL deref in scsi_run_queue() because the q->queuedata may already be NULL by the time this function is called. Since we shouldn't be running a queue that is being torn down, simply add a NULL check in scsi_run_queue() to forestall this. Tested-by: Jim Schutt Cc: stable@kernel.org Signed-off-by: James Bottomley --- drivers/scsi/scsi_lib.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9901b8f8443..0bac91e72370 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -400,10 +400,15 @@ static inline int scsi_host_is_busy(struct Scsi_Host *shost) static void scsi_run_queue(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; - struct Scsi_Host *shost = sdev->host; + struct Scsi_Host *shost; LIST_HEAD(starved_list); unsigned long flags; + /* if the device is dead, sdev will be NULL, so no queue to run */ + if (!sdev) + return; + + shost = sdev->host; if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); -- cgit v1.2.3 From 9937a5e2f32892db0dbeefc2b3bc74b3ae3ea9c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 May 2011 11:04:44 +0200 Subject: scsi: remove performance regression due to async queue run Commit c21e6beb removed our queue request_fn re-enter protection, and defaulted to always running the queues from kblockd to be safe. This was a known potential slow down, but should be safe. Unfortunately this is causing big performance regressions for some, so we need to improve this logic. Looking into the details of the re-enter, the real issue is on requeue of requests. Requeue of requests upon seeing a BUSY condition from the device ends up re-running the queue, causing traces like this: scsi_request_fn() scsi_dispatch_cmd() scsi_queue_insert() __scsi_queue_insert() scsi_run_queue() scsi_request_fn() ... potentially causing the issue we want to avoid. So special case the requeue re-run of the queue, but improve it to offload the entire run of local queue and starved queue from a single workqueue callback. This is a lot better than potentially kicking off a workqueue run for each device seen. This also fixes the issue of the local device going into recursion, since the above mentioned commit never moved that queue run out of line. Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 20 ++++++++++++++++---- drivers/scsi/scsi_scan.c | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9901b8f8443..01e4e51c4b68 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -74,8 +74,6 @@ struct kmem_cache *scsi_sdb_cache; */ #define SCSI_QUEUE_DELAY 3 -static void scsi_run_queue(struct request_queue *q); - /* * Function: scsi_unprep_request() * @@ -161,7 +159,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) blk_requeue_request(q, cmd->request); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_run_queue(q); + kblockd_schedule_work(q, &device->requeue_work); return 0; } @@ -433,7 +431,11 @@ static void scsi_run_queue(struct request_queue *q) continue; } - blk_run_queue_async(sdev->request_queue); + spin_unlock(shost->host_lock); + spin_lock(sdev->request_queue->queue_lock); + __blk_run_queue(sdev->request_queue); + spin_unlock(sdev->request_queue->queue_lock); + spin_lock(shost->host_lock); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); @@ -442,6 +444,16 @@ static void scsi_run_queue(struct request_queue *q) blk_run_queue(q); } +void scsi_requeue_run_queue(struct work_struct *work) +{ + struct scsi_device *sdev; + struct request_queue *q; + + sdev = container_of(work, struct scsi_device, requeue_work); + q = sdev->request_queue; + scsi_run_queue(q); +} + /* * Function: scsi_requeue_command() * diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 087821fac8fe..58584dc0724a 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -242,6 +242,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); extern void scsi_evt_thread(struct work_struct *work); + extern void scsi_requeue_run_queue(struct work_struct *work); sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_ATOMIC); @@ -264,6 +265,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, INIT_LIST_HEAD(&sdev->event_list); spin_lock_init(&sdev->list_lock); INIT_WORK(&sdev->event_work, scsi_evt_thread); + INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue); sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->sdev_target = starget; -- cgit v1.2.3 From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 18 May 2011 11:19:24 -0600 Subject: drivercore: revert addition of of_match to struct device Commit b826291c, "drivercore/dt: add a match table pointer to struct device" added an of_match pointer to struct device to cache the of_match_table entry discovered at driver match time. This was unsafe because matching is not an atomic operation with probing a driver. If two or more drivers are attempted to be matched to a driver at the same time, then the cached matching entry pointer could get overwritten. This patch reverts the of_match cache pointer and reworks all users to call of_match_device() directly instead. Signed-off-by: Grant Likely --- drivers/scsi/qlogicpti.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c index e2d45c91b8e8..9689d41c7888 100644 --- a/drivers/scsi/qlogicpti.c +++ b/drivers/scsi/qlogicpti.c @@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = { .use_clustering = ENABLE_CLUSTERING, }; +static const struct of_device_id qpti_match[]; static int __devinit qpti_sbus_probe(struct platform_device *op) { + const struct of_device_id *match; struct scsi_host_template *tpnt; struct device_node *dp = op->dev.of_node; struct Scsi_Host *host; @@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op) static int nqptis; const char *fcode; - if (!op->dev.of_match) + match = of_match_device(qpti_match, &op->dev); + if (!match) return -EINVAL; - tpnt = op->dev.of_match->data; + tpnt = match->data; /* Sometimes Antares cards come up not completely * setup, and we get a report of a zero IRQ. -- cgit v1.2.3