diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-02 09:27:44 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-06-02 09:27:44 -0700 |
commit | 9221dced3069cc9ae2986ba1191b02dae560df28 (patch) | |
tree | 8697b07351f04ad3f0bdf7aba7640d79b920b614 | |
parent | 1975b337ce26b53814f1b5c55b260649a7115393 (diff) | |
parent | 61939b12dc24d0ac958020f261046c35a16e0c48 (diff) |
Merge tag 'for-linus-20190601' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
- A set of patches fixing code comments / kerneldoc (Bart)
- Don't allow loop file change for exclusive open (Jan)
- Fix revalidate of hidden genhd (Jan)
- Init queue failure memory free fix (Jes)
- Improve rq limits failure print (John)
- Fixup for queue removal/addition (Ming)
- Missed error progagation for io_uring buffer registration (Pavel)
* tag 'for-linus-20190601' of git://git.kernel.dk/linux-block:
block: print offending values when cloned rq limits are exceeded
blk-mq: Document the blk_mq_hw_queue_to_node() arguments
blk-mq: Fix spelling in a source code comment
block: Fix bsg_setup_queue() kernel-doc header
block: Fix rq_qos_wait() kernel-doc header
block: Fix blk_mq_*_map_queues() kernel-doc headers
block: Fix throtl_pending_timer_fn() kernel-doc header
block: Convert blk_invalidate_devt() header into a non-kernel-doc header
block/partitions/ldm: Convert a kernel-doc header into a non-kernel-doc header
blk-mq: Fix memory leak in error handling
block: don't protect generic_make_request_checks with blk_queue_enter
block: move blk_exit_queue into __blk_release_queue
block: Don't revalidate bdev of hidden gendisk
loop: Don't change loop device under exclusive opener
io_uring: Fix __io_uring_register() false success
-rw-r--r-- | block/blk-core.c | 81 | ||||
-rw-r--r-- | block/blk-mq-cpumap.c | 10 | ||||
-rw-r--r-- | block/blk-mq-pci.c | 2 | ||||
-rw-r--r-- | block/blk-mq-rdma.c | 4 | ||||
-rw-r--r-- | block/blk-mq-virtio.c | 4 | ||||
-rw-r--r-- | block/blk-mq.c | 5 | ||||
-rw-r--r-- | block/blk-rq-qos.c | 7 | ||||
-rw-r--r-- | block/blk-sysfs.c | 47 | ||||
-rw-r--r-- | block/blk-throttle.c | 2 | ||||
-rw-r--r-- | block/blk.h | 1 | ||||
-rw-r--r-- | block/bsg-lib.c | 1 | ||||
-rw-r--r-- | block/genhd.c | 4 | ||||
-rw-r--r-- | block/partitions/ldm.c | 2 | ||||
-rw-r--r-- | drivers/block/loop.c | 18 | ||||
-rw-r--r-- | fs/block_dev.c | 25 | ||||
-rw-r--r-- | fs/io_uring.c | 2 |
16 files changed, 102 insertions, 113 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 1bf83a0df0f6..ee1b35fe8572 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -282,35 +282,6 @@ void blk_set_queue_dying(struct request_queue *q) } EXPORT_SYMBOL_GPL(blk_set_queue_dying); -/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ -void blk_exit_queue(struct request_queue *q) -{ - /* - * Since the I/O scheduler exit code may access cgroup information, - * perform I/O scheduler exit before disassociating from the block - * cgroup controller. - */ - if (q->elevator) { - ioc_clear_queue(q); - elevator_exit(q, q->elevator); - q->elevator = NULL; - } - - /* - * Remove all references to @q from the block cgroup controller before - * restoring @q->queue_lock to avoid that restoring this pointer causes - * e.g. blkcg_print_blkgs() to crash. - */ - blkcg_exit_queue(q); - - /* - * Since the cgroup code may dereference the @q->backing_dev_info - * pointer, only decrease its reference count after having removed the - * association with the block cgroup controller. - */ - bdi_put(q->backing_dev_info); -} - /** * blk_cleanup_queue - shutdown a request queue * @q: request queue to shutdown @@ -346,14 +317,6 @@ void blk_cleanup_queue(struct request_queue *q) del_timer_sync(&q->backing_dev_info->laptop_mode_wb_timer); blk_sync_queue(q); - /* - * I/O scheduler exit is only safe after the sysfs scheduler attribute - * has been removed. - */ - WARN_ON_ONCE(q->kobj.state_in_sysfs); - - blk_exit_queue(q); - if (queue_is_mq(q)) blk_mq_exit_queue(q); @@ -994,22 +957,8 @@ blk_qc_t generic_make_request(struct bio *bio) * yet. */ struct bio_list bio_list_on_stack[2]; - blk_mq_req_flags_t flags = 0; - struct request_queue *q = bio->bi_disk->queue; blk_qc_t ret = BLK_QC_T_NONE; - if (bio->bi_opf & REQ_NOWAIT) - flags = BLK_MQ_REQ_NOWAIT; - if (bio_flagged(bio, BIO_QUEUE_ENTERED)) - blk_queue_enter_live(q); - else if (blk_queue_enter(q, flags) < 0) { - if (!blk_queue_dying(q) && (bio->bi_opf & REQ_NOWAIT)) - bio_wouldblock_error(bio); - else - bio_io_error(bio); - return ret; - } - if (!generic_make_request_checks(bio)) goto out; @@ -1046,22 +995,11 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); current->bio_list = bio_list_on_stack; do { - bool enter_succeeded = true; - - if (unlikely(q != bio->bi_disk->queue)) { - if (q) - blk_queue_exit(q); - q = bio->bi_disk->queue; - flags = 0; - if (bio->bi_opf & REQ_NOWAIT) - flags = BLK_MQ_REQ_NOWAIT; - if (blk_queue_enter(q, flags) < 0) { - enter_succeeded = false; - q = NULL; - } - } + struct request_queue *q = bio->bi_disk->queue; + blk_mq_req_flags_t flags = bio->bi_opf & REQ_NOWAIT ? + BLK_MQ_REQ_NOWAIT : 0; - if (enter_succeeded) { + if (likely(blk_queue_enter(q, flags) == 0)) { struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ @@ -1069,6 +1007,8 @@ blk_qc_t generic_make_request(struct bio *bio) bio_list_init(&bio_list_on_stack[0]); ret = q->make_request_fn(q, bio); + blk_queue_exit(q); + /* sort new bios into those for a lower level * and those for the same level */ @@ -1095,8 +1035,6 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = NULL; /* deactivate */ out: - if (q) - blk_queue_exit(q); return ret; } EXPORT_SYMBOL(generic_make_request); @@ -1200,7 +1138,9 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, struct request *rq) { if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { - printk(KERN_ERR "%s: over max size limit.\n", __func__); + printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", + __func__, blk_rq_sectors(rq), + blk_queue_get_max_sectors(q, req_op(rq))); return -EIO; } @@ -1212,7 +1152,8 @@ static int blk_cloned_rq_check_limits(struct request_queue *q, */ blk_recalc_rq_segments(rq); if (rq->nr_phys_segments > queue_max_segments(q)) { - printk(KERN_ERR "%s: over max segments limit.\n", __func__); + printk(KERN_ERR "%s: over max segments limit. (%hu > %hu)\n", + __func__, rq->nr_phys_segments, queue_max_segments(q)); return -EIO; } diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 48bebf00a5f3..f945621a0e8f 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c @@ -42,8 +42,8 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap) /* * First do sequential mapping between CPUs and queues. * In case we still have CPUs to map, and we have some number of - * threads per cores then map sibling threads to the same queue for - * performace optimizations. + * threads per cores then map sibling threads to the same queue + * for performance optimizations. */ if (cpu < nr_queues) { map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu); @@ -60,7 +60,11 @@ int blk_mq_map_queues(struct blk_mq_queue_map *qmap) } EXPORT_SYMBOL_GPL(blk_mq_map_queues); -/* +/** + * blk_mq_hw_queue_to_node - Look up the memory node for a hardware queue index + * @qmap: CPU to hardware queue map. + * @index: hardware queue index. + * * We have no quick way of doing reverse lookups. This is only used at * queue init time, so runtime isn't important. */ diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c index ad4545a2a98b..b595a94c4d16 100644 --- a/block/blk-mq-pci.c +++ b/block/blk-mq-pci.c @@ -13,7 +13,7 @@ /** * blk_mq_pci_map_queues - provide a default queue mapping for PCI device - * @set: tagset to provide the mapping for + * @qmap: CPU to hardware queue map. * @pdev: PCI device associated with @set. * @offset: Offset to use for the pci irq vector * diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c index cc921e6ba709..14f968e58b8f 100644 --- a/block/blk-mq-rdma.c +++ b/block/blk-mq-rdma.c @@ -8,8 +8,8 @@ /** * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device - * @set: tagset to provide the mapping for - * @dev: rdma device associated with @set. + * @map: CPU to hardware queue map. + * @dev: rdma device to provide a mapping for. * @first_vec: first interrupt vectors to use for queues (usually 0) * * This function assumes the rdma device @dev has at least as many available diff --git a/block/blk-mq-virtio.c b/block/blk-mq-virtio.c index 75a52c18a8f6..488341628256 100644 --- a/block/blk-mq-virtio.c +++ b/block/blk-mq-virtio.c @@ -11,8 +11,8 @@ /** * blk_mq_virtio_map_queues - provide a default queue mapping for virtio device - * @set: tagset to provide the mapping for - * @vdev: virtio device associated with @set. + * @qmap: CPU to hardware queue map. + * @vdev: virtio device to provide a mapping for. * @first_vec: first interrupt vectors to use for queues (usually 0) * * This function assumes the virtio device @vdev has at least as many available diff --git a/block/blk-mq.c b/block/blk-mq.c index 32b8ad3d341b..ce0f5f4ede70 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2865,7 +2865,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, goto err_exit; if (blk_mq_alloc_ctxs(q)) - goto err_exit; + goto err_poll; /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); @@ -2929,6 +2929,9 @@ err_hctxs: kfree(q->queue_hw_ctx); err_sys_init: blk_mq_sysfs_deinit(q); +err_poll: + blk_stat_free_callback(q->poll_cb); + q->poll_cb = NULL; err_exit: q->mq_ops = NULL; return ERR_PTR(-ENOMEM); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 3f55b56f24bc..659ccb8b693f 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -209,9 +209,10 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, /** * rq_qos_wait - throttle on a rqw if we need to - * @private_data - caller provided specific data - * @acquire_inflight_cb - inc the rqw->inflight counter if we can - * @cleanup_cb - the callback to cleanup in case we race with a waker + * @rqw: rqw to throttle on + * @private_data: caller provided specific data + * @acquire_inflight_cb: inc the rqw->inflight counter if we can + * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a16a02c52a85..75b5281cc577 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -840,6 +840,36 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) kmem_cache_free(blk_requestq_cachep, q); } +/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */ +static void blk_exit_queue(struct request_queue *q) +{ + /* + * Since the I/O scheduler exit code may access cgroup information, + * perform I/O scheduler exit before disassociating from the block + * cgroup controller. + */ + if (q->elevator) { + ioc_clear_queue(q); + elevator_exit(q, q->elevator); + q->elevator = NULL; + } + + /* + * Remove all references to @q from the block cgroup controller before + * restoring @q->queue_lock to avoid that restoring this pointer causes + * e.g. blkcg_print_blkgs() to crash. + */ + blkcg_exit_queue(q); + + /* + * Since the cgroup code may dereference the @q->backing_dev_info + * pointer, only decrease its reference count after having removed the + * association with the block cgroup controller. + */ + bdi_put(q->backing_dev_info); +} + + /** * __blk_release_queue - release a request queue * @work: pointer to the release_work member of the request queue to be released @@ -860,23 +890,10 @@ static void __blk_release_queue(struct work_struct *work) blk_stat_remove_callback(q, q->poll_cb); blk_stat_free_callback(q->poll_cb); - if (!blk_queue_dead(q)) { - /* - * Last reference was dropped without having called - * blk_cleanup_queue(). - */ - WARN_ONCE(blk_queue_init_done(q), - "request queue %p has been registered but blk_cleanup_queue() has not been called for that queue\n", - q); - blk_exit_queue(q); - } - - WARN(blk_queue_root_blkg(q), - "request queue %p is being released but it has not yet been removed from the blkcg controller\n", - q); - blk_free_queue_stats(q->stats); + blk_exit_queue(q); + blk_queue_free_zone_bitmaps(q); if (queue_is_mq(q)) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 1b97a73d2fb1..9ea7c0ecad10 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1220,7 +1220,7 @@ static bool throtl_can_upgrade(struct throtl_data *td, struct throtl_grp *this_tg); /** * throtl_pending_timer_fn - timer function for service_queue->pending_timer - * @arg: the throtl_service_queue being serviced + * @t: the pending_timer member of the throtl_service_queue being serviced * * This timer is armed when a child throtl_grp with active bio's become * pending and queued on the service_queue's pending_tree and expires when diff --git a/block/blk.h b/block/blk.h index e27fd1512e4b..91b3581b7c7a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -50,7 +50,6 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); -void blk_exit_queue(struct request_queue *q); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); void blk_freeze_queue(struct request_queue *q); diff --git a/block/bsg-lib.c b/block/bsg-lib.c index b898a1cdf872..785dd58947f1 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -354,6 +354,7 @@ static const struct blk_mq_ops bsg_mq_ops = { * @dev: device to attach bsg device to * @name: device to give bsg device * @job_fn: bsg job handler + * @timeout: timeout handler function pointer * @dd_job_size: size of LLD data needed for each job */ struct request_queue *bsg_setup_queue(struct device *dev, const char *name, diff --git a/block/genhd.c b/block/genhd.c index ad6826628e79..24654e1d83e6 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -532,8 +532,8 @@ void blk_free_devt(dev_t devt) } } -/** - * We invalidate devt by assigning NULL pointer for devt in idr. +/* + * We invalidate devt by assigning NULL pointer for devt in idr. */ void blk_invalidate_devt(dev_t devt) { diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c index 6db573f33219..fe5d970e2e60 100644 --- a/block/partitions/ldm.c +++ b/block/partitions/ldm.c @@ -19,7 +19,7 @@ #include "check.h" #include "msdos.h" -/** +/* * ldm_debug/info/error/crit - Output an error message * @f: A printf format string containing the message * @...: Variables to substitute into @f diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 102d79575895..f11b7dc16e9d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -945,9 +945,20 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (!file) goto out; + /* + * If we don't hold exclusive handle for the device, upgrade to it + * here to avoid changing device under exclusive owner. + */ + if (!(mode & FMODE_EXCL)) { + bdgrab(bdev); + error = blkdev_get(bdev, mode | FMODE_EXCL, loop_set_fd); + if (error) + goto out_putf; + } + error = mutex_lock_killable(&loop_ctl_mutex); if (error) - goto out_putf; + goto out_bdev; error = -EBUSY; if (lo->lo_state != Lo_unbound) @@ -1012,10 +1023,15 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mutex_unlock(&loop_ctl_mutex); if (partscan) loop_reread_partitions(lo, bdev); + if (!(mode & FMODE_EXCL)) + blkdev_put(bdev, mode | FMODE_EXCL); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); +out_bdev: + if (!(mode & FMODE_EXCL)) + blkdev_put(bdev, mode | FMODE_EXCL); out_putf: fput(file); out: diff --git a/fs/block_dev.c b/fs/block_dev.c index e6886c93c89d..749f5984425d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1406,20 +1406,27 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev, */ int revalidate_disk(struct gendisk *disk) { - struct block_device *bdev; int ret = 0; if (disk->fops->revalidate_disk) ret = disk->fops->revalidate_disk(disk); - bdev = bdget_disk(disk, 0); - if (!bdev) - return ret; - mutex_lock(&bdev->bd_mutex); - check_disk_size_change(disk, bdev, ret == 0); - bdev->bd_invalidated = 0; - mutex_unlock(&bdev->bd_mutex); - bdput(bdev); + /* + * Hidden disks don't have associated bdev so there's no point in + * revalidating it. + */ + if (!(disk->flags & GENHD_FL_HIDDEN)) { + struct block_device *bdev = bdget_disk(disk, 0); + + if (!bdev) + return ret; + + mutex_lock(&bdev->bd_mutex); + check_disk_size_change(disk, bdev, ret == 0); + bdev->bd_invalidated = 0; + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + } return ret; } EXPORT_SYMBOL(revalidate_disk); diff --git a/fs/io_uring.c b/fs/io_uring.c index 310f8d17c53e..0fbb486a320e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2616,7 +2616,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, ret = io_copy_iov(ctx, &iov, arg, i); if (ret) - break; + goto err; /* * Don't impose further limits on the size and buffer |