From 373282e7ab6840cd583a223fa90628f2d8293c26 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Fri, 4 Jan 2019 12:06:37 -0500 Subject: null_blk: add zoned config support information If the kernel is built without CONFIG_BLK_DEV_ZONED, a modprobe of the null_blk driver with zoned=1 fails with 'Invalid argument'. This can be confusing to users, prompting a search as to why the parameter is invalid. To assist in that search, add a bit more information to the failure, additionally adding to the documentation that CONFIG_BLK_DEV_ZONED is needed for zoned=1. Reviewed-by: Bart Van Assche Signed-off-by: John Pittman Added null_blk prefix to error message. Signed-off-by: Jens Axboe --- drivers/block/null_blk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index b3df2793e7cd..34b22d6523ba 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -97,6 +97,7 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); #else static inline int null_zone_init(struct nullb_device *dev) { + pr_err("null_blk: CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; } static inline void null_zone_exit(struct nullb_device *dev) {} -- cgit v1.2.3 From 750afb08ca71310fcf0c4e2cb1565c63b8235b60 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Fri, 4 Jan 2019 09:23:09 +0100 Subject: cross-tree: phase out dma_zalloc_coherent() We already need to zero out memory for dma_alloc_coherent(), as such using dma_zalloc_coherent() is superflous. Phase it out. This change was generated with the following Coccinelle SmPL patch: @ replace_dma_zalloc_coherent @ expression dev, size, data, handle, flags; @@ -dma_zalloc_coherent(dev, size, handle, flags) +dma_alloc_coherent(dev, size, handle, flags) Suggested-by: Christoph Hellwig Signed-off-by: Luis Chamberlain [hch: re-ran the script on the latest tree] Signed-off-by: Christoph Hellwig --- drivers/block/skd_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index a10d5736d8f7..ab893a7571a2 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -2641,8 +2641,8 @@ static int skd_cons_skcomp(struct skd_device *skdev) "comp pci_alloc, total bytes %zd entries %d\n", SKD_SKCOMP_SIZE, SKD_N_COMPLETION_ENTRY); - skcomp = dma_zalloc_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE, - &skdev->cq_dma_address, GFP_KERNEL); + skcomp = dma_alloc_coherent(&skdev->pdev->dev, SKD_SKCOMP_SIZE, + &skdev->cq_dma_address, GFP_KERNEL); if (skcomp == NULL) { rc = -ENOMEM; -- cgit v1.2.3 From 1d69a3f8ae77e3dbfdc1356225cce5ea9c366aec Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Tue, 8 Jan 2019 15:22:53 -0800 Subject: zram: idle writeback fixes and cleanup This patch includes some fixes and cleanup for idle-page writeback. 1. writeback_limit interface Now writeback_limit interface is rather conusing. For example, once writeback limit budget is exausted, admin can see 0 from /sys/block/zramX/writeback_limit which is same semantic with disable writeback_limit at this moment. IOW, admin cannot tell that zero came from disable writeback limit or exausted writeback limit. To make the interface clear, let's sepatate enable of writeback limit to another knob - /sys/block/zram0/writeback_limit_enable * before: while true : # to re-enable writeback limit once previous one is used up echo 0 > /sys/block/zram0/writeback_limit echo $((200<<20)) > /sys/block/zram0/writeback_limit .. .. # used up the writeback limit budget * new # To enable writeback limit, from the beginning, admin should # enable it. echo $((200<<20)) > /sys/block/zram0/writeback_limit echo 1 > /sys/block/zram/0/writeback_limit_enable while true : echo $((200<<20)) > /sys/block/zram0/writeback_limit .. .. # used up the writeback limit budget It's much strightforward. 2. fix condition check idle/huge writeback mode check The mode in writeback_store is not bit opeartion any more so no need to use bit operations. Furthermore, current condition check is broken in that it does writeback every pages regardless of huge/idle. 3. clean up idle_store No need to use goto. [minchan@kernel.org: missed spin_lock_init] Link: http://lkml.kernel.org/r/20190103001601.GA255139@google.com Link: http://lkml.kernel.org/r/20181224033529.19450-1-minchan@kernel.org Signed-off-by: Minchan Kim Suggested-by: John Dias Cc: Sergey Senozhatsky Cc: John Dias Cc: Srinivas Paladugu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 90 +++++++++++++++++++++++++++++++------------ drivers/block/zram/zram_drv.h | 5 ++- 2 files changed, 69 insertions(+), 26 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 33c5cc879f24..04ca65912638 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -316,11 +316,9 @@ static ssize_t idle_store(struct device *dev, * See the comment in writeback_store. */ zram_slot_lock(zram, index); - if (!zram_allocated(zram, index) || - zram_test_flag(zram, index, ZRAM_UNDER_WB)) - goto next; - zram_set_flag(zram, index, ZRAM_IDLE); -next: + if (zram_allocated(zram, index) && + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) + zram_set_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); } @@ -330,6 +328,41 @@ next: } #ifdef CONFIG_ZRAM_WRITEBACK +static ssize_t writeback_limit_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + u64 val; + ssize_t ret = -EINVAL; + + if (kstrtoull(buf, 10, &val)) + return ret; + + down_read(&zram->init_lock); + spin_lock(&zram->wb_limit_lock); + zram->wb_limit_enable = val; + spin_unlock(&zram->wb_limit_lock); + up_read(&zram->init_lock); + ret = len; + + return ret; +} + +static ssize_t writeback_limit_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + bool val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + spin_lock(&zram->wb_limit_lock); + val = zram->wb_limit_enable; + spin_unlock(&zram->wb_limit_lock); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%d\n", val); +} + static ssize_t writeback_limit_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -341,9 +374,9 @@ static ssize_t writeback_limit_store(struct device *dev, return ret; down_read(&zram->init_lock); - atomic64_set(&zram->stats.bd_wb_limit, val); - if (val == 0) - zram->stop_writeback = false; + spin_lock(&zram->wb_limit_lock); + zram->bd_wb_limit = val; + spin_unlock(&zram->wb_limit_lock); up_read(&zram->init_lock); ret = len; @@ -357,7 +390,9 @@ static ssize_t writeback_limit_show(struct device *dev, struct zram *zram = dev_to_zram(dev); down_read(&zram->init_lock); - val = atomic64_read(&zram->stats.bd_wb_limit); + spin_lock(&zram->wb_limit_lock); + val = zram->bd_wb_limit; + spin_unlock(&zram->wb_limit_lock); up_read(&zram->init_lock); return scnprintf(buf, PAGE_SIZE, "%llu\n", val); @@ -588,8 +623,8 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, return 1; } -#define HUGE_WRITEBACK 0x1 -#define IDLE_WRITEBACK 0x2 +#define HUGE_WRITEBACK 1 +#define IDLE_WRITEBACK 2 static ssize_t writeback_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -602,7 +637,7 @@ static ssize_t writeback_store(struct device *dev, struct page *page; ssize_t ret, sz; char mode_buf[8]; - unsigned long mode = -1UL; + int mode = -1; unsigned long blk_idx = 0; sz = strscpy(mode_buf, buf, sizeof(mode_buf)); @@ -618,7 +653,7 @@ static ssize_t writeback_store(struct device *dev, else if (!strcmp(mode_buf, "huge")) mode = HUGE_WRITEBACK; - if (mode == -1UL) + if (mode == -1) return -EINVAL; down_read(&zram->init_lock); @@ -645,10 +680,13 @@ static ssize_t writeback_store(struct device *dev, bvec.bv_len = PAGE_SIZE; bvec.bv_offset = 0; - if (zram->stop_writeback) { + spin_lock(&zram->wb_limit_lock); + if (zram->wb_limit_enable && !zram->bd_wb_limit) { + spin_unlock(&zram->wb_limit_lock); ret = -EIO; break; } + spin_unlock(&zram->wb_limit_lock); if (!blk_idx) { blk_idx = alloc_block_bdev(zram); @@ -667,10 +705,11 @@ static ssize_t writeback_store(struct device *dev, zram_test_flag(zram, index, ZRAM_UNDER_WB)) goto next; - if ((mode & IDLE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_IDLE)) && - (mode & HUGE_WRITEBACK && - !zram_test_flag(zram, index, ZRAM_HUGE))) + if (mode == IDLE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_IDLE)) + goto next; + if (mode == HUGE_WRITEBACK && + !zram_test_flag(zram, index, ZRAM_HUGE)) goto next; /* * Clearing ZRAM_UNDER_WB is duty of caller. @@ -732,11 +771,10 @@ static ssize_t writeback_store(struct device *dev, zram_set_element(zram, index, blk_idx); blk_idx = 0; atomic64_inc(&zram->stats.pages_stored); - if (atomic64_add_unless(&zram->stats.bd_wb_limit, - -1 << (PAGE_SHIFT - 12), 0)) { - if (atomic64_read(&zram->stats.bd_wb_limit) == 0) - zram->stop_writeback = true; - } + spin_lock(&zram->wb_limit_lock); + if (zram->wb_limit_enable && zram->bd_wb_limit > 0) + zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12); + spin_unlock(&zram->wb_limit_lock); next: zram_slot_unlock(zram, index); } @@ -1812,6 +1850,7 @@ static DEVICE_ATTR_RW(comp_algorithm); static DEVICE_ATTR_RW(backing_dev); static DEVICE_ATTR_WO(writeback); static DEVICE_ATTR_RW(writeback_limit); +static DEVICE_ATTR_RW(writeback_limit_enable); #endif static struct attribute *zram_disk_attrs[] = { @@ -1828,6 +1867,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_backing_dev.attr, &dev_attr_writeback.attr, &dev_attr_writeback_limit.attr, + &dev_attr_writeback_limit_enable.attr, #endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, @@ -1867,7 +1907,9 @@ static int zram_add(void) device_id = ret; init_rwsem(&zram->init_lock); - +#ifdef CONFIG_ZRAM_WRITEBACK + spin_lock_init(&zram->wb_limit_lock); +#endif queue = blk_alloc_queue(GFP_KERNEL); if (!queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 4bd3afd15e83..f2fd46daa760 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -86,7 +86,6 @@ struct zram_stats { atomic64_t bd_count; /* no. of pages in backing device */ atomic64_t bd_reads; /* no. of reads from backing device */ atomic64_t bd_writes; /* no. of writes from backing device */ - atomic64_t bd_wb_limit; /* writeback limit of backing device */ #endif }; @@ -114,8 +113,10 @@ struct zram { */ bool claim; /* Protected by bdev->bd_mutex */ struct file *backing_dev; - bool stop_writeback; #ifdef CONFIG_ZRAM_WRITEBACK + spinlock_t wb_limit_lock; + bool wb_limit_enable; + u64 bd_wb_limit; struct block_device *bdev; unsigned int old_block_size; unsigned long *bitmap; -- cgit v1.2.3 From 5db470e229e22b7eda6e23b5566e532c96fb5bc3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 9 Jan 2019 19:17:14 -0800 Subject: loop: drop caches if offset or block_size are changed If we don't drop caches used in old offset or block_size, we can get old data from new offset/block_size, which gives unexpected data to user. For example, Martijn found a loopback bug in the below scenario. 1) LOOP_SET_FD loads first two pages on loop file 2) LOOP_SET_STATUS64 changes the offset on the loop file 3) mount is failed due to the cached pages having wrong superblock Cc: Jens Axboe Cc: linux-block@vger.kernel.org Reported-by: Martijn Coenen Reviewed-by: Bart Van Assche Signed-off-by: Jaegeuk Kim Signed-off-by: Jens Axboe --- drivers/block/loop.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b8a0720d3653..cf5538942834 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1190,6 +1190,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; } + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1218,6 +1224,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + /* kill_bdev should have truncated all the pages */ + if (lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1443,22 +1457,39 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + int err = 0; + if (lo->lo_state != Lo_bound) return -ENXIO; if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; + if (lo->lo_queue->limits.logical_block_size != arg) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + blk_mq_freeze_queue(lo->lo_queue); + /* kill_bdev should have truncated all the pages */ + if (lo->lo_queue->limits.logical_block_size != arg && + lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); - +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - return 0; + return err; } static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, -- cgit v1.2.3 From 85f5a4d666fd9be73856ed16bb36c5af5b406b29 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 8 Jan 2019 19:47:38 +0100 Subject: rbd: don't return 0 on unmap if RBD_DEV_FLAG_REMOVING is set There is a window between when RBD_DEV_FLAG_REMOVING is set and when the device is removed from rbd_dev_list. During this window, we set "already" and return 0. Returning 0 from write(2) can confuse userspace tools because 0 indicates that nothing was written. In particular, "rbd unmap" will retry the write multiple times a second: 10:28:05.463299 write(4, "0", 1) = 0 10:28:05.463509 write(4, "0", 1) = 0 10:28:05.463720 write(4, "0", 1) = 0 10:28:05.463942 write(4, "0", 1) = 0 10:28:05.464155 write(4, "0", 1) = 0 Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Tested-by: Dongsheng Yang --- drivers/block/rbd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8e5140bbf241..1e92b61d0bd5 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -5986,7 +5986,6 @@ static ssize_t do_rbd_remove(struct bus_type *bus, struct list_head *tmp; int dev_id; char opt_buf[6]; - bool already = false; bool force = false; int ret; @@ -6019,13 +6018,13 @@ static ssize_t do_rbd_remove(struct bus_type *bus, spin_lock_irq(&rbd_dev->lock); if (rbd_dev->open_count && !force) ret = -EBUSY; - else - already = test_and_set_bit(RBD_DEV_FLAG_REMOVING, - &rbd_dev->flags); + else if (test_and_set_bit(RBD_DEV_FLAG_REMOVING, + &rbd_dev->flags)) + ret = -EINPROGRESS; spin_unlock_irq(&rbd_dev->lock); } spin_unlock(&rbd_dev_list_lock); - if (ret < 0 || already) + if (ret) return ret; if (force) { -- cgit v1.2.3 From c8a83a6b54d0ca078de036aafb3f6af58c1dc5eb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 14 Jan 2019 09:48:09 +0100 Subject: nbd: Use set_blocksize() to set device blocksize NBD can update block device block size implicitely through bd_set_size(). Make it explicitely set blocksize with set_blocksize() as this behavior of bd_set_size() is going away. CC: Josef Bacik Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 08696f5f00bb..7c9a949e876b 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -288,9 +288,10 @@ static void nbd_size_update(struct nbd_device *nbd) blk_queue_physical_block_size(nbd->disk->queue, config->blksize); set_capacity(nbd->disk, config->bytesize >> 9); if (bdev) { - if (bdev->bd_disk) + if (bdev->bd_disk) { bd_set_size(bdev, config->bytesize); - else + set_blocksize(bdev, config->blksize); + } else bdev->bd_invalidated = 1; bdput(bdev); } -- cgit v1.2.3