diff options
author | Christoph Hellwig <hch@lst.de> | 2020-11-26 18:43:37 +0100 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2020-12-01 14:53:40 -0700 |
commit | a782483cc1f875355690625d8253a232f2581418 (patch) | |
tree | 492d3bf67cdd2535afd826dde93aa35dcfb42180 /block | |
parent | e6cb53827ed60019bbbc5cf189dd204b3b0e8121 (diff) |
block: remove the nr_sects field in struct hd_struct
Now that the hd_struct always has a block device attached to it, there is
no need for having two size field that just get out of sync.
Additionally the field in hd_struct did not use proper serialization,
possibly allowing for torn writes. By only using the block_device field
this problem also gets fixed.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Acked-by: Coly Li <colyli@suse.de> [bcache]
Acked-by: Chao Yu <yuchao0@huawei.com> [f2fs]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 4 | ||||
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk.h | 53 | ||||
-rw-r--r-- | block/genhd.c | 59 | ||||
-rw-r--r-- | block/partitions/core.c | 17 |
5 files changed, 49 insertions, 86 deletions
diff --git a/block/bio.c b/block/bio.c index fa01bef35bb1..669bb47a3198 100644 --- a/block/bio.c +++ b/block/bio.c @@ -613,8 +613,8 @@ void guard_bio_eod(struct bio *bio) rcu_read_lock(); part = __disk_get_part(bio->bi_disk, bio->bi_partno); if (part) - maxsector = part_nr_sects_read(part); - else + maxsector = bdev_nr_sectors(part->bdev); + else maxsector = get_capacity(bio->bi_disk); rcu_read_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index 2db8bda43b6e..988f45094a38 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -755,7 +755,7 @@ static inline int blk_partition_remap(struct bio *bio) goto out; if (bio_sectors(bio)) { - if (bio_check_eod(bio, part_nr_sects_read(p))) + if (bio_check_eod(bio, bdev_nr_sectors(p->bdev))) goto out; bio->bi_iter.bi_sector += p->start_sect; trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), diff --git a/block/blk.h b/block/blk.h index c4839abcfa27..09cee7024fb4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -387,59 +387,6 @@ static inline void hd_free_part(struct hd_struct *part) percpu_ref_exit(&part->ref); } -/* - * Any access of part->nr_sects which is not protected by partition - * bd_mutex or gendisk bdev bd_mutex, should be done using this - * accessor function. - * - * Code written along the lines of i_size_read() and i_size_write(). - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption - * on. - */ -static inline sector_t part_nr_sects_read(struct hd_struct *part) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - sector_t nr_sects; - unsigned seq; - do { - seq = read_seqcount_begin(&part->nr_sects_seq); - nr_sects = part->nr_sects; - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); - return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - sector_t nr_sects; - - preempt_disable(); - nr_sects = part->nr_sects; - preempt_enable(); - return nr_sects; -#else - return part->nr_sects; -#endif -} - -/* - * Should be called with mutex lock held (typically bd_mutex) of partition - * to provide mutual exlusion among writers otherwise seqcount might be - * left in wrong state leaving the readers spinning infinitely. - */ -static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) -{ -#if BITS_PER_LONG==32 && defined(CONFIG_SMP) - preempt_disable(); - write_seqcount_begin(&part->nr_sects_seq); - part->nr_sects = size; - write_seqcount_end(&part->nr_sects_seq); - preempt_enable(); -#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) - preempt_disable(); - part->nr_sects = size; - preempt_enable(); -#else - part->nr_sects = size; -#endif -} - int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); diff --git a/block/genhd.c b/block/genhd.c index bf8fa82f135f..c65f485b9db5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -40,6 +40,16 @@ static void disk_add_events(struct gendisk *disk); static void disk_del_events(struct gendisk *disk); static void disk_release_events(struct gendisk *disk); +void set_capacity(struct gendisk *disk, sector_t sectors) +{ + struct block_device *bdev = disk->part0.bdev; + + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} +EXPORT_SYMBOL(set_capacity); + /* * Set disk capacity and notify if the size is not currently zero and will not * be set to zero. Returns true if a uevent was sent, otherwise false. @@ -47,18 +57,30 @@ static void disk_release_events(struct gendisk *disk); bool set_capacity_and_notify(struct gendisk *disk, sector_t size) { sector_t capacity = get_capacity(disk); + char *envp[] = { "RESIZE=1", NULL }; set_capacity(disk, size); - revalidate_disk_size(disk, true); - if (capacity != size && capacity != 0 && size != 0) { - char *envp[] = { "RESIZE=1", NULL }; + /* + * Only print a message and send a uevent if the gendisk is user visible + * and alive. This avoids spamming the log and udev when setting the + * initial capacity during probing. + */ + if (size == capacity || + (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) + return false; - kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); - return true; - } + pr_info("%s: detected capacity change from %lld to %lld\n", + disk->disk_name, size, capacity); - return false; + /* + * Historically we did not send a uevent for changes to/from an empty + * device. + */ + if (!capacity || !size) + return false; + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } EXPORT_SYMBOL_GPL(set_capacity_and_notify); @@ -247,7 +269,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!part_nr_sects_read(part) && + if (!bdev_nr_sectors(part->bdev) && !(piter->flags & DISK_PITER_INCL_EMPTY) && !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && piter->idx == 0)) @@ -284,7 +306,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit); static inline int sector_in_part(struct hd_struct *part, sector_t sector) { return part->start_sect <= sector && - sector < part->start_sect + part_nr_sects_read(part); + sector < part->start_sect + bdev_nr_sectors(part->bdev); } /** @@ -986,8 +1008,8 @@ void __init printk_all_partitions(void) printk("%s%s %10llu %s %s", is_part0 ? "" : " ", bdevt_str(part_devt(part), devt_buf), - (unsigned long long)part_nr_sects_read(part) >> 1 - , disk_name(disk, part->partno, name_buf), + bdev_nr_sectors(part->bdev) >> 1, + disk_name(disk, part->partno, name_buf), part->info ? part->info->uuid : ""); if (is_part0) { if (dev->parent && dev->parent->driver) @@ -1079,7 +1101,7 @@ static int show_partition(struct seq_file *seqf, void *v) while ((part = disk_part_iter_next(&piter))) seq_printf(seqf, "%4d %7d %10llu %s\n", MAJOR(part_devt(part)), MINOR(part_devt(part)), - (unsigned long long)part_nr_sects_read(part) >> 1, + bdev_nr_sectors(part->bdev) >> 1, disk_name(sgp, part->partno, buf)); disk_part_iter_exit(&piter); @@ -1161,8 +1183,7 @@ ssize_t part_size_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%llu\n", - (unsigned long long)part_nr_sects_read(p)); + return sprintf(buf, "%llu\n", bdev_nr_sectors(p->bdev)); } ssize_t part_stat_show(struct device *dev, @@ -1618,16 +1639,6 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) ptbl = rcu_dereference_protected(disk->part_tbl, 1); rcu_assign_pointer(ptbl->part[0], &disk->part0); - /* - * set_capacity() and get_capacity() currently don't use - * seqcounter to read/update the part0->nr_sects. Still init - * the counter as we can read the sectors in IO submission - * patch using seqence counters. - * - * TODO: Ideally set_capacity() and get_capacity() should be - * converted to make use of bd_mutex and sequence counters. - */ - hd_sects_seq_init(&disk->part0); if (hd_ref_init(&disk->part0)) goto out_free_bdstats; diff --git a/block/partitions/core.c b/block/partitions/core.c index 696bd9ff63c6..bcfa8215bd5e 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -85,6 +85,13 @@ static int (*check_part[])(struct parsed_partitions *) = { NULL }; +static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) +{ + spin_lock(&bdev->bd_size_lock); + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + spin_unlock(&bdev->bd_size_lock); +} + static struct parsed_partitions *allocate_partitions(struct gendisk *hd) { struct parsed_partitions *state; @@ -295,7 +302,7 @@ static void hd_struct_free_work(struct work_struct *work) put_device(disk_to_dev(disk)); part->start_sect = 0; - part->nr_sects = 0; + bdev_set_nr_sectors(part->bdev, 0); part_stat_set_all(part, 0); put_device(part_to_dev(part)); } @@ -412,11 +419,10 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, goto out_free_stats; p->bdev = bdev; - hd_sects_seq_init(p); pdev = part_to_dev(p); p->start_sect = start; - p->nr_sects = len; + bdev_set_nr_sectors(bdev, len); p->partno = partno; p->policy = get_disk_ro(disk); @@ -509,7 +515,7 @@ static bool partition_overlaps(struct gendisk *disk, sector_t start, disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); while ((part = disk_part_iter_next(&piter))) { if (part->partno == skip_partno || - start >= part->start_sect + part->nr_sects || + start >= part->start_sect + bdev_nr_sectors(part->bdev) || start + length <= part->start_sect) continue; overlap = true; @@ -600,8 +606,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno, if (partition_overlaps(bdev->bd_disk, start, length, partno)) goto out_unlock; - part_nr_sects_write(part, length); - bd_set_nr_sectors(bdevp, length); + bdev_set_nr_sectors(bdevp, length); ret = 0; out_unlock: |