From 82ed4db499b8598f16f8871261bff088d6b0597f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 27 Jan 2017 09:46:29 +0100 Subject: block: split scsi_request out of struct request And require all drivers that want to support BLOCK_PC to allocate it as the first thing of their private data. To support this the legacy IDE and BSG code is switched to set cmd_size on their queues to let the block layer allocate the additional space. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/nfsd/blocklayout.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 0780ff864539..930d98caab5b 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "blocklayoutxdr.h" #include "pnfs.h" @@ -213,6 +214,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, { struct request_queue *q = bdev->bd_disk->queue; struct request *rq; + struct scsi_request *req; size_t bufflen = 252, len, id_len; u8 *buf, *d, type, assoc; int error; @@ -226,18 +228,19 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, error = -ENOMEM; goto out_free_buf; } - blk_rq_set_block_pc(rq); + req = scsi_req(rq); + scsi_req_init(rq); error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL); if (error) goto out_put_request; - rq->cmd[0] = INQUIRY; - rq->cmd[1] = 1; - rq->cmd[2] = 0x83; - rq->cmd[3] = bufflen >> 8; - rq->cmd[4] = bufflen & 0xff; - rq->cmd_len = COMMAND_SIZE(INQUIRY); + req->cmd[0] = INQUIRY; + req->cmd[1] = 1; + req->cmd[2] = 0x83; + req->cmd[3] = bufflen >> 8; + req->cmd[4] = bufflen & 0xff; + req->cmd_len = COMMAND_SIZE(INQUIRY); error = blk_execute_rq(rq->q, NULL, rq, 1); if (error) { -- cgit v1.2.3 From 72148aecf4fd9218e2db1333534e8403fdd66eb9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 28 Jan 2017 09:32:51 +0100 Subject: block: make scsi_request and scsi ioctl support optional We only need this code to support scsi, ide, cciss and virtio. And at least for virtio it's a deprecated feature to start with. This should shrink the kernel size for embedded device that only use, say eMMC a bit. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/nfsd/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 47febcf99185..20b1c17320d5 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -104,6 +104,7 @@ config NFSD_SCSILAYOUT depends on NFSD_V4 && BLOCK select NFSD_PNFS select EXPORTFS_BLOCK_OPS + select BLK_SCSI_REQUEST help This option enables support for the exporting pNFS SCSI layouts in the kernel's NFS server. The pNFS SCSI layout enables NFS -- cgit v1.2.3 From aebf526b53aea164508730427597d45f3e06b376 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Jan 2017 16:57:31 +0100 Subject: block: fold cmd_type into the REQ_OP_ space Instead of keeping two levels of indirection for requests types, fold it all into the operations. The little caveat here is that previously cmd_type only applied to struct request, while the request and bio op fields were set to plain REQ_OP_READ/WRITE even for passthrough operations. Instead this patch adds new REQ_OP_* for SCSI passthrough and driver private requests, althought it has to add two for each so that we can communicate the data in/out nature of the request. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/nfsd/blocklayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 930d98caab5b..a06115e31612 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -223,7 +223,7 @@ static int nfsd4_scsi_identify_device(struct block_device *bdev, if (!buf) return -ENOMEM; - rq = blk_get_request(q, READ, GFP_KERNEL); + rq = blk_get_request(q, REQ_OP_SCSI_IN, GFP_KERNEL); if (IS_ERR(rq)) { error = -ENOMEM; goto out_free_buf; -- cgit v1.2.3 From f44f1ab5a2dcd4e16eab850fd08e40ff2d0c28d4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:49 +0100 Subject: block: Unhash block device inodes on gendisk destruction Currently, block device inodes stay around after corresponding gendisk hash died until memory reclaim finds them and frees them. Since we will make block device inode pin the bdi, we want to free the block device inode as soon as the device goes away so that bdi does not stay around unnecessarily. Furthermore we need to avoid issues when new device with the same major,minor pair gets created since reusing the bdi structure would be rather difficult in this case. Unhashing block device inode on gendisk destruction nicely deals with these problems. Once last block device inode reference is dropped (which may be directly in del_gendisk()), the inode gets evicted. Furthermore if the major,minor pair gets reallocated, we are guaranteed to get new block device inode even if old block device inode is not yet evicted and thus we avoid issues with possible reuse of bdi. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/block_dev.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 5db5d1340d69..ed6a34be7a1e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -954,6 +954,21 @@ static int bdev_set(struct inode *inode, void *data) static LIST_HEAD(all_bdevs); +/* + * If there is a bdev inode for this device, unhash it so that it gets evicted + * as soon as last inode reference is dropped. + */ +void bdev_unhash_inode(dev_t dev) +{ + struct inode *inode; + + inode = ilookup5(blockdev_superblock, hash(dev), bdev_test, &dev); + if (inode) { + remove_inode_hash(inode); + iput(inode); + } +} + struct block_device *bdget(dev_t dev) { struct block_device *bdev; -- cgit v1.2.3 From dc3b17cc8bf21307c7e076e7c778d5db756f7871 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:50 +0100 Subject: block: Use pointer to backing_dev_info from request_queue We will want to have struct backing_dev_info allocated separately from struct request_queue. As the first step add pointer to backing_dev_info to request_queue and convert all users touching it. No functional changes in this patch. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/gfs2/ops_fstype.c | 2 +- fs/nilfs2/super.c | 2 +- fs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index a34308df927f..c0e5b9a8bf5f 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1226,7 +1226,7 @@ static int set_gfs2_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 12eeae62a2b1..e1872f36147f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1068,7 +1068,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_time_gran = 1; sb->s_max_links = NILFS_LINK_MAX; - sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info; + sb->s_bdi = bdev_get_queue(sb->s_bdev)->backing_dev_info; err = load_nilfs(nilfs, sb); if (err) diff --git a/fs/super.c b/fs/super.c index 1709ed029a2c..ea662b0e5e78 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1047,7 +1047,7 @@ static int set_bdev_super(struct super_block *s, void *data) * We set the bdi here to the queue backing, file systems can * overwrite this in ->fill_super() */ - s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info; + s->s_bdi = bdev_get_queue(s->s_bdev)->backing_dev_info; return 0; } -- cgit v1.2.3 From b1d2dc5659b41741f5a29b2ade76ffb4e5bb13d8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:52 +0100 Subject: block: Make blk_get_backing_dev_info() safe without open bdev Currenly blk_get_backing_dev_info() is not safe to be called when the block device is not open as bdev->bd_disk is NULL in that case. However inode_to_bdi() uses this function and may be call called from flusher worker or other writeback related functions without bdev being open which leads to crashes such as: [113031.075540] Unable to handle kernel paging request for data at address 0x00000000 [113031.075614] Faulting instruction address: 0xc0000000003692e0 0:mon> t [c0000000fb65f900] c00000000036cb6c writeback_sb_inodes+0x30c/0x590 [c0000000fb65fa10] c00000000036ced4 __writeback_inodes_wb+0xe4/0x150 [c0000000fb65fa70] c00000000036d33c wb_writeback+0x30c/0x450 [c0000000fb65fb40] c00000000036e198 wb_workfn+0x268/0x580 [c0000000fb65fc50] c0000000000f3470 process_one_work+0x1e0/0x590 [c0000000fb65fce0] c0000000000f38c8 worker_thread+0xa8/0x660 [c0000000fb65fd80] c0000000000fc4b0 kthread+0x110/0x130 [c0000000fb65fe30] c0000000000098f0 ret_from_kernel_thread+0x5c/0x6c Signed-off-by: Jens Axboe --- fs/block_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index ed6a34be7a1e..601b71b76d7f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -884,6 +884,8 @@ static void bdev_evict_inode(struct inode *inode) spin_lock(&bdev_lock); list_del_init(&bdev->bd_list); spin_unlock(&bdev_lock); + if (bdev->bd_bdi != &noop_backing_dev_info) + bdi_put(bdev->bd_bdi); } static const struct super_operations bdev_sops = { @@ -986,6 +988,7 @@ struct block_device *bdget(dev_t dev) bdev->bd_contains = NULL; bdev->bd_super = NULL; bdev->bd_inode = inode; + bdev->bd_bdi = &noop_backing_dev_info; bdev->bd_block_size = (1 << inode->i_blkbits); bdev->bd_part_count = 0; bdev->bd_invalidated = 0; @@ -1542,6 +1545,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = disk; bdev->bd_queue = disk->queue; bdev->bd_contains = bdev; + if (bdev->bd_bdi == &noop_backing_dev_info) + bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); if (!partno) { ret = -ENXIO; @@ -1637,6 +1642,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdev->bd_disk = NULL; bdev->bd_part = NULL; bdev->bd_queue = NULL; + bdi_put(bdev->bd_bdi); + bdev->bd_bdi = &noop_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; -- cgit v1.2.3 From efa7c9f97e3ef624e9a398bf69c15f58eea9f0e8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 2 Feb 2017 15:56:53 +0100 Subject: block: Get rid of blk_get_backing_dev_info() blk_get_backing_dev_info() is now a simple dereference. Remove that function and simplify some code around that. Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/volumes.c | 2 +- fs/xfs/xfs_buf.c | 3 +-- fs/xfs/xfs_buf.h | 1 - 4 files changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 18004169552c..37a31b12bb0c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1800,7 +1800,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) list_for_each_entry_rcu(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; - bdi = blk_get_backing_dev_info(device->bdev); + bdi = device->bdev->bd_bdi; if (bdi_congested(bdi, bdi_bits)) { ret = 1; break; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3c3c69c0eee4..b2e70073a10d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -366,7 +366,7 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) */ blk_start_plug(&plug); - bdi = blk_get_backing_dev_info(device->bdev); + bdi = device->bdev->bd_bdi; limit = btrfs_async_submit_limit(fs_info); limit = limit * 2 / 3; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 7f0a01f7b592..8bbec20aa138 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -757,7 +757,7 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { - if (bdi_read_congested(target->bt_bdi)) + if (bdi_read_congested(target->bt_bdev->bd_bdi)) return; xfs_buf_read_map(target, map, nmaps, @@ -1790,7 +1790,6 @@ xfs_alloc_buftarg( btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; btp->bt_bdev = bdev; - btp->bt_bdi = blk_get_backing_dev_info(bdev); if (xfs_setsize_buftarg_early(btp, bdev)) goto error; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8a9d3a9599f0..3c867e5a63e1 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -109,7 +109,6 @@ typedef unsigned int xfs_buf_flags_t; typedef struct xfs_buftarg { dev_t bt_dev; struct block_device *bt_bdev; - struct backing_dev_info *bt_bdi; struct xfs_mount *bt_mount; unsigned int bt_meta_sectorsize; size_t bt_meta_sectormask; -- cgit v1.2.3 From a7c5437b0bbec5165df6eb1efc5e37dc40b9e05d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 31 Jan 2017 14:53:17 -0800 Subject: debugfs: add debugfs_lookup() We don't always have easy access to the dentry of a file or directory we created in debugfs. Add a helper which allows us to get a dentry we previously created. The motivation for this change is a problem with blktrace and the blk-mq debugfs entries introduced in 07e4fead45e6 ("blk-mq: create debugfs directory tree"). Namely, in some cases, the directory that blktrace needs to create may already exist, but in other cases, it may not. We _could_ rely on a bunch of implied knowledge to decide whether to create the directory or not, but it's much cleaner on our end to just look it up. Signed-off-by: Omar Sandoval Acked-by: Greg Kroah-Hartman Signed-off-by: Jens Axboe --- fs/debugfs/inode.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'fs') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index f17fcf89e18e..7fb1732a3630 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -248,6 +248,42 @@ static struct file_system_type debug_fs_type = { }; MODULE_ALIAS_FS("debugfs"); +/** + * debugfs_lookup() - look up an existing debugfs file + * @name: a pointer to a string containing the name of the file to look up. + * @parent: a pointer to the parent dentry of the file. + * + * This function will return a pointer to a dentry if it succeeds. If the file + * doesn't exist or an error occurs, %NULL will be returned. The returned + * dentry must be passed to dput() when it is no longer needed. + * + * If debugfs is not enabled in the kernel, the value -%ENODEV will be + * returned. + */ +struct dentry *debugfs_lookup(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + + if (IS_ERR(parent)) + return NULL; + + if (!parent) + parent = debugfs_mount->mnt_root; + + inode_lock(d_inode(parent)); + dentry = lookup_one_len(name, parent, strlen(name)); + inode_unlock(d_inode(parent)); + + if (IS_ERR(dentry)) + return NULL; + if (!d_really_is_positive(dentry)) { + dput(dentry); + return NULL; + } + return dentry; +} +EXPORT_SYMBOL_GPL(debugfs_lookup); + static struct dentry *start_creating(const char *name, struct dentry *parent) { struct dentry *dentry; -- cgit v1.2.3