From bc3d5717d242a37d2e9ea85d7e7b2e3569324d24 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:16 +0200 Subject: dm zoned: add 'status' callback Add callback to supply information for 'dmsetup status' and 'dmsetup table'. The output for 'dmsetup status' is 0 zoned zones / random / sequential where is the number of unmapped (ie free) random zones, the total number of random zones, the number of unmapped sequential zones, and the total number of sequential zones. Signed-off-by: Hannes Reinecke Reviewed-by: Bob Liu Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 369de15c4e80..c8787560fa9f 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -202,6 +202,11 @@ sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_blocks_shift; } +unsigned int dmz_nr_zones(struct dmz_metadata *zmd) +{ + return zmd->dev->nr_zones; +} + unsigned int dmz_nr_chunks(struct dmz_metadata *zmd) { return zmd->nr_chunks; @@ -217,6 +222,16 @@ unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd) return atomic_read(&zmd->unmap_nr_rnd); } +unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd) +{ + return zmd->nr_seq; +} + +unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd) +{ + return atomic_read(&zmd->unmap_nr_seq); +} + /* * Lock/unlock mapping table. * The map lock also protects all the zone lists. -- cgit v1.2.3 From b71228739851a9b384a59ba0467259eba508b408 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:18 +0200 Subject: dm zoned: store zone id within the zone structure and kill dmz_id() Instead of calculating the zone index by the offset within the zone array store the index within the structure itself. With that the helper dmz_id() is pointless and can be replaced with accessing the ->id value directly. Signed-off-by: Hannes Reinecke Reviewed-by: Bob Liu Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index c8787560fa9f..1993eeb26bc1 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -187,19 +187,14 @@ struct dmz_metadata { /* * Various accessors */ -unsigned int dmz_id(struct dmz_metadata *zmd, struct dm_zone *zone) -{ - return ((unsigned int)(zone - zmd->zones)); -} - sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_sectors_shift; + return (sector_t)zone->id << zmd->dev->zone_nr_sectors_shift; } sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)dmz_id(zmd, zone) << zmd->dev->zone_nr_blocks_shift; + return (sector_t)zone->id << zmd->dev->zone_nr_blocks_shift; } unsigned int dmz_nr_zones(struct dmz_metadata *zmd) @@ -1119,6 +1114,7 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) INIT_LIST_HEAD(&zone->link); atomic_set(&zone->refcount, 0); + zone->id = idx; zone->chunk = DMZ_MAP_UNMAPPED; switch (blkz->type) { @@ -1246,7 +1242,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) ret = -EIO; if (ret < 0) { dmz_dev_err(zmd->dev, "Get zone %u report failed", - dmz_id(zmd, zone)); + zone->id); dmz_check_bdev(zmd->dev); return ret; } @@ -1270,7 +1266,7 @@ static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, return ret; dmz_dev_warn(zmd->dev, "Processing zone %u write error (zone wp %u/%u)", - dmz_id(zmd, zone), zone->wp_block, wp); + zone->id, zone->wp_block, wp); if (zone->wp_block < wp) { dmz_invalidate_blocks(zmd, zone, zone->wp_block, @@ -1309,7 +1305,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) dev->zone_nr_sectors, GFP_NOIO); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", - dmz_id(zmd, zone), ret); + zone->id, ret); return ret; } } @@ -1757,8 +1753,7 @@ again: } /* Update the chunk mapping */ - dmz_set_chunk_mapping(zmd, dzone->chunk, dmz_id(zmd, dzone), - dmz_id(zmd, bzone)); + dmz_set_chunk_mapping(zmd, dzone->chunk, dzone->id, bzone->id); set_bit(DMZ_BUF, &bzone->flags); bzone->chunk = dzone->chunk; @@ -1810,7 +1805,7 @@ again: atomic_dec(&zmd->unmap_nr_seq); if (dmz_is_offline(zone)) { - dmz_dev_warn(zmd->dev, "Zone %u is offline", dmz_id(zmd, zone)); + dmz_dev_warn(zmd->dev, "Zone %u is offline", zone->id); zone = NULL; goto again; } @@ -1852,7 +1847,7 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone, unsigned int chunk) { /* Set the chunk mapping */ - dmz_set_chunk_mapping(zmd, chunk, dmz_id(zmd, dzone), + dmz_set_chunk_mapping(zmd, chunk, dzone->id, DMZ_MAP_UNMAPPED); dzone->chunk = chunk; if (dmz_is_rnd(dzone)) @@ -1880,7 +1875,7 @@ void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone) * Unmapping the chunk buffer zone: clear only * the chunk buffer mapping */ - dzone_id = dmz_id(zmd, zone->bzone); + dzone_id = zone->bzone->id; zone->bzone->bzone = NULL; zone->bzone = NULL; @@ -1942,7 +1937,7 @@ static struct dmz_mblock *dmz_get_bitmap(struct dmz_metadata *zmd, sector_t chunk_block) { sector_t bitmap_block = 1 + zmd->nr_map_blocks + - (sector_t)(dmz_id(zmd, zone) * zmd->zone_nr_bitmap_blocks) + + (sector_t)(zone->id * zmd->zone_nr_bitmap_blocks) + (chunk_block >> DMZ_BLOCK_SHIFT_BITS); return dmz_get_mblock(zmd, bitmap_block); @@ -2022,7 +2017,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, unsigned int n = 0; dmz_dev_debug(zmd->dev, "=> VALIDATE zone %u, block %llu, %u blocks", - dmz_id(zmd, zone), (unsigned long long)chunk_block, + zone->id, (unsigned long long)chunk_block, nr_blocks); WARN_ON(chunk_block + nr_blocks > zone_nr_blocks); @@ -2052,7 +2047,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, zone->weight += n; else { dmz_dev_warn(zmd->dev, "Zone %u: weight %u should be <= %u", - dmz_id(zmd, zone), zone->weight, + zone->id, zone->weight, zone_nr_blocks - n); zone->weight = zone_nr_blocks; } @@ -2102,7 +2097,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, unsigned int n = 0; dmz_dev_debug(zmd->dev, "=> INVALIDATE zone %u, block %llu, %u blocks", - dmz_id(zmd, zone), (u64)chunk_block, nr_blocks); + zone->id, (u64)chunk_block, nr_blocks); WARN_ON(chunk_block + nr_blocks > zmd->dev->zone_nr_blocks); @@ -2132,7 +2127,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, zone->weight -= n; else { dmz_dev_warn(zmd->dev, "Zone %u: weight %u should be >= %u", - dmz_id(zmd, zone), zone->weight, n); + zone->id, zone->weight, n); zone->weight = 0; } @@ -2378,7 +2373,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) { struct dmz_metadata *zmd; - unsigned int i, zid; + unsigned int i; struct dm_zone *zone; int ret; @@ -2419,9 +2414,8 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) goto err; /* Set metadata zones starting from sb_zone */ - zid = dmz_id(zmd, zmd->sb_zone); for (i = 0; i < zmd->nr_meta_zones << 1; i++) { - zone = dmz_get(zmd, zid + i); + zone = dmz_get(zmd, zmd->sb_zone->id + i); if (!dmz_is_rnd(zone)) goto err; set_bit(DMZ_META, &zone->flags); -- cgit v1.2.3 From 735bd7e4cd16270b7b67cb82ff4ba2811bfd8d7b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:19 +0200 Subject: dm zoned: use array for superblock zones Instead of storing just the first superblock zone and calculate the secondary relative to that we should be using an array for holding the superblock zones. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 1993eeb26bc1..900b1c1224f5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -124,6 +124,7 @@ struct dmz_sb { sector_t block; struct dmz_mblock *mblk; struct dmz_super *sb; + struct dm_zone *zone; }; /* @@ -150,7 +151,6 @@ struct dmz_metadata { /* Zone information array */ struct dm_zone *zones; - struct dm_zone *sb_zone; struct dmz_sb sb[2]; unsigned int mblk_primary; u64 sb_gen; @@ -839,8 +839,9 @@ err: /* * Check super block. */ -static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_super *sb) +static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) { + struct dmz_super *sb = zmd->sb[set].sb; unsigned int nr_meta_zones, nr_data_zones; struct dmz_dev *dev = zmd->dev; u32 crc, stored_crc; @@ -932,16 +933,20 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) /* Bad first super block: search for the second one */ zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks; + zmd->sb[1].zone = zmd->sb[0].zone + 1; for (i = 0; i < zmd->nr_rnd_zones - 1; i++) { if (dmz_read_sb(zmd, 1) != 0) break; - if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) + if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) { + zmd->sb[1].zone += i; return 0; + } zmd->sb[1].block += zone_nr_blocks; } dmz_free_mblock(zmd, mblk); zmd->sb[1].mblk = NULL; + zmd->sb[1].zone = NULL; return -EIO; } @@ -985,11 +990,9 @@ static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set) dmz_dev_warn(zmd->dev, "Metadata set %u invalid: recovering", dst_set); if (dst_set == 0) - zmd->sb[0].block = dmz_start_block(zmd, zmd->sb_zone); - else { - zmd->sb[1].block = zmd->sb[0].block + - (zmd->nr_meta_zones << zmd->dev->zone_nr_blocks_shift); - } + zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); + else + zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); page = alloc_page(GFP_NOIO); if (!page) @@ -1033,21 +1036,27 @@ static int dmz_load_sb(struct dmz_metadata *zmd) u64 sb_gen[2] = {0, 0}; int ret; + if (!zmd->sb[0].zone) { + dmz_dev_err(zmd->dev, "Primary super block zone not set"); + return -ENXIO; + } + /* Read and check the primary super block */ - zmd->sb[0].block = dmz_start_block(zmd, zmd->sb_zone); + zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); ret = dmz_get_sb(zmd, 0); if (ret) { dmz_dev_err(zmd->dev, "Read primary super block failed"); return ret; } - ret = dmz_check_sb(zmd, zmd->sb[0].sb); + ret = dmz_check_sb(zmd, 0); /* Read and check secondary super block */ if (ret == 0) { sb_good[0] = true; - zmd->sb[1].block = zmd->sb[0].block + - (zmd->nr_meta_zones << zmd->dev->zone_nr_blocks_shift); + if (!zmd->sb[1].zone) + zmd->sb[1].zone = zmd->sb[0].zone + zmd->nr_meta_zones; + zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); ret = dmz_get_sb(zmd, 1); } else ret = dmz_lookup_secondary_sb(zmd); @@ -1057,7 +1066,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) return ret; } - ret = dmz_check_sb(zmd, zmd->sb[1].sb); + ret = dmz_check_sb(zmd, 1); if (ret == 0) sb_good[1] = true; @@ -1142,9 +1151,9 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) zmd->nr_useable_zones++; if (dmz_is_rnd(zone)) { zmd->nr_rnd_zones++; - if (!zmd->sb_zone) { + if (!zmd->sb[0].zone) { /* Super block zone */ - zmd->sb_zone = zone; + zmd->sb[0].zone = zone; } } } @@ -2415,7 +2424,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) /* Set metadata zones starting from sb_zone */ for (i = 0; i < zmd->nr_meta_zones << 1; i++) { - zone = dmz_get(zmd, zmd->sb_zone->id + i); + zone = dmz_get(zmd, zmd->sb[0].zone->id + i); if (!dmz_is_rnd(zone)) goto err; set_bit(DMZ_META, &zone->flags); -- cgit v1.2.3 From bf28a3ba098676831bde49e8bc47849727d532a5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:20 +0200 Subject: dm zoned: store device in struct dmz_sb Store the device together with the superblock so that we don't have to recur to the metadata to find it. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 90 +++++++++++++++++++++++++++--------------- 1 file changed, 59 insertions(+), 31 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 900b1c1224f5..def836e12dd9 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -122,6 +122,7 @@ enum { */ struct dmz_sb { sector_t block; + struct dmz_dev *dev; struct dmz_mblock *mblk; struct dmz_super *sb; struct dm_zone *zone; @@ -197,6 +198,11 @@ sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) return (sector_t)zone->id << zmd->dev->zone_nr_blocks_shift; } +struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone) +{ + return &zmd->dev[0]; +} + unsigned int dmz_nr_zones(struct dmz_metadata *zmd) { return zmd->dev->nr_zones; @@ -412,9 +418,10 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, { struct dmz_mblock *mblk, *m; sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no; + struct dmz_dev *dev = zmd->sb[zmd->mblk_primary].dev; struct bio *bio; - if (dmz_bdev_is_dying(zmd->dev)) + if (dmz_bdev_is_dying(dev)) return ERR_PTR(-EIO); /* Get a new block and a BIO to read it */ @@ -450,7 +457,7 @@ static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd, /* Submit read BIO */ bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, zmd->dev->bdev); + bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO); @@ -547,6 +554,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, sector_t mblk_no) { struct dmz_mblock *mblk; + struct dmz_dev *dev = zmd->sb[zmd->mblk_primary].dev; /* Check rbtree */ spin_lock(&zmd->mblk_lock); @@ -565,7 +573,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { dmz_release_mblock(zmd, mblk); - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); return ERR_PTR(-EIO); } @@ -589,10 +597,11 @@ static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk) static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, unsigned int set) { + struct dmz_dev *dev = zmd->sb[set].dev; sector_t block = zmd->sb[set].block + mblk->no; struct bio *bio; - if (dmz_bdev_is_dying(zmd->dev)) + if (dmz_bdev_is_dying(dev)) return -EIO; bio = bio_alloc(GFP_NOIO, 1); @@ -604,7 +613,7 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, set_bit(DMZ_META_WRITING, &mblk->state); bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, zmd->dev->bdev); + bio_set_dev(bio, dev->bdev); bio->bi_private = mblk; bio->bi_end_io = dmz_mblock_bio_end_io; bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO); @@ -617,13 +626,13 @@ static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk, /* * Read/write a metadata block. */ -static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, - struct page *page) +static int dmz_rdwr_block(struct dmz_dev *dev, int op, + sector_t block, struct page *page) { struct bio *bio; int ret; - if (dmz_bdev_is_dying(zmd->dev)) + if (dmz_bdev_is_dying(dev)) return -EIO; bio = bio_alloc(GFP_NOIO, 1); @@ -631,14 +640,14 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, return -ENOMEM; bio->bi_iter.bi_sector = dmz_blk2sect(block); - bio_set_dev(bio, zmd->dev->bdev); + bio_set_dev(bio, dev->bdev); bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO); bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0); ret = submit_bio_wait(bio); bio_put(bio); if (ret) - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); return ret; } @@ -650,6 +659,7 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) sector_t block = zmd->sb[set].block; struct dmz_mblock *mblk = zmd->sb[set].mblk; struct dmz_super *sb = zmd->sb[set].sb; + struct dmz_dev *dev = zmd->sb[set].dev; u64 sb_gen = zmd->sb_gen + 1; int ret; @@ -669,9 +679,9 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) sb->crc = 0; sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE)); - ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, block, mblk->page); + ret = dmz_rdwr_block(dev, REQ_OP_WRITE, block, mblk->page); if (ret == 0) - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); + ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL); return ret; } @@ -684,6 +694,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, unsigned int set) { struct dmz_mblock *mblk; + struct dmz_dev *dev = zmd->sb[set].dev; struct blk_plug plug; int ret = 0, nr_mblks_submitted = 0; @@ -705,7 +716,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); ret = -EIO; } nr_mblks_submitted--; @@ -713,7 +724,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, /* Flush drive cache (this will also sync data) */ if (ret == 0) - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); + ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL); return ret; } @@ -750,6 +761,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) { struct dmz_mblock *mblk; struct list_head write_list; + struct dmz_dev *dev; int ret; if (WARN_ON(!zmd)) @@ -763,6 +775,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) * from modifying metadata. */ down_write(&zmd->mblk_sem); + dev = zmd->sb[zmd->mblk_primary].dev; /* * This is called from the target flush work and reclaim work. @@ -770,7 +783,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ dmz_lock_flush(zmd); - if (dmz_bdev_is_dying(zmd->dev)) { + if (dmz_bdev_is_dying(dev)) { ret = -EIO; goto out; } @@ -782,7 +795,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { - ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); + ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL); goto err; } @@ -831,7 +844,7 @@ err: list_splice(&write_list, &zmd->mblk_dirty_list); spin_unlock(&zmd->mblk_lock); } - if (!dmz_check_bdev(zmd->dev)) + if (!dmz_check_bdev(dev)) ret = -EIO; goto out; } @@ -842,8 +855,8 @@ err: static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) { struct dmz_super *sb = zmd->sb[set].sb; + struct dmz_dev *dev = zmd->sb[set].dev; unsigned int nr_meta_zones, nr_data_zones; - struct dmz_dev *dev = zmd->dev; u32 crc, stored_crc; u64 gen; @@ -908,8 +921,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) */ static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set) { - return dmz_rdwr_block(zmd, REQ_OP_READ, zmd->sb[set].block, - zmd->sb[set].mblk->page); + return dmz_rdwr_block(zmd->sb[set].dev, REQ_OP_READ, + zmd->sb[set].block, zmd->sb[set].mblk->page); } /* @@ -934,6 +947,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) /* Bad first super block: search for the second one */ zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks; zmd->sb[1].zone = zmd->sb[0].zone + 1; + zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone); for (i = 0; i < zmd->nr_rnd_zones - 1; i++) { if (dmz_read_sb(zmd, 1) != 0) break; @@ -942,11 +956,13 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) return 0; } zmd->sb[1].block += zone_nr_blocks; + zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone + i); } dmz_free_mblock(zmd, mblk); zmd->sb[1].mblk = NULL; zmd->sb[1].zone = NULL; + zmd->sb[1].dev = NULL; return -EIO; } @@ -987,7 +1003,8 @@ static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set) struct page *page; int i, ret; - dmz_dev_warn(zmd->dev, "Metadata set %u invalid: recovering", dst_set); + dmz_dev_warn(zmd->sb[dst_set].dev, + "Metadata set %u invalid: recovering", dst_set); if (dst_set == 0) zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); @@ -1000,11 +1017,11 @@ static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set) /* Copy metadata blocks */ for (i = 1; i < zmd->nr_meta_blocks; i++) { - ret = dmz_rdwr_block(zmd, REQ_OP_READ, + ret = dmz_rdwr_block(zmd->sb[src_set].dev, REQ_OP_READ, zmd->sb[src_set].block + i, page); if (ret) goto out; - ret = dmz_rdwr_block(zmd, REQ_OP_WRITE, + ret = dmz_rdwr_block(zmd->sb[dst_set].dev, REQ_OP_WRITE, zmd->sb[dst_set].block + i, page); if (ret) goto out; @@ -1043,9 +1060,10 @@ static int dmz_load_sb(struct dmz_metadata *zmd) /* Read and check the primary super block */ zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); + zmd->sb[0].dev = dmz_zone_to_dev(zmd, zmd->sb[0].zone); ret = dmz_get_sb(zmd, 0); if (ret) { - dmz_dev_err(zmd->dev, "Read primary super block failed"); + dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed"); return ret; } @@ -1057,12 +1075,13 @@ static int dmz_load_sb(struct dmz_metadata *zmd) if (!zmd->sb[1].zone) zmd->sb[1].zone = zmd->sb[0].zone + zmd->nr_meta_zones; zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); + zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone); ret = dmz_get_sb(zmd, 1); } else ret = dmz_lookup_secondary_sb(zmd); if (ret) { - dmz_dev_err(zmd->dev, "Read secondary super block failed"); + dmz_dev_err(zmd->sb[1].dev, "Read secondary super block failed"); return ret; } @@ -1078,17 +1097,25 @@ static int dmz_load_sb(struct dmz_metadata *zmd) if (sb_good[0]) sb_gen[0] = le64_to_cpu(zmd->sb[0].sb->gen); - else + else { ret = dmz_recover_mblocks(zmd, 0); + if (ret) { + dmz_dev_err(zmd->sb[0].dev, + "Recovery of superblock 0 failed"); + return -EIO; + } + } if (sb_good[1]) sb_gen[1] = le64_to_cpu(zmd->sb[1].sb->gen); - else + else { ret = dmz_recover_mblocks(zmd, 1); - if (ret) { - dmz_dev_err(zmd->dev, "Recovery failed"); - return -EIO; + if (ret) { + dmz_dev_err(zmd->sb[1].dev, + "Recovery of superblock 1 failed"); + return -EIO; + } } if (sb_gen[0] >= sb_gen[1]) { @@ -1099,7 +1126,8 @@ static int dmz_load_sb(struct dmz_metadata *zmd) zmd->mblk_primary = 1; } - dmz_dev_debug(zmd->dev, "Using super block %u (gen %llu)", + dmz_dev_debug(zmd->sb[zmd->mblk_primary].dev, + "Using super block %u (gen %llu)", zmd->mblk_primary, zmd->sb_gen); return 0; -- cgit v1.2.3 From 368205601375bbfb41b07ec8295eab208b6fced5 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:21 +0200 Subject: dm zoned: move fields from struct dmz_dev to dmz_metadata Move fields from the device structure into the metadata structure and provide accessor functions. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 88 ++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 28 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index def836e12dd9..b844ff02ae7b 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -138,9 +138,16 @@ struct dmz_metadata { unsigned int zone_nr_bitmap_blocks; unsigned int zone_bits_per_mblk; + sector_t zone_nr_blocks; + sector_t zone_nr_blocks_shift; + + sector_t zone_nr_sectors; + sector_t zone_nr_sectors_shift; + unsigned int nr_bitmap_blocks; unsigned int nr_map_blocks; + unsigned int nr_zones; unsigned int nr_useable_zones; unsigned int nr_meta_blocks; unsigned int nr_meta_zones; @@ -190,12 +197,12 @@ struct dmz_metadata { */ sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)zone->id << zmd->dev->zone_nr_sectors_shift; + return (sector_t)zone->id << zmd->zone_nr_sectors_shift; } sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)zone->id << zmd->dev->zone_nr_blocks_shift; + return (sector_t)zone->id << zmd->zone_nr_blocks_shift; } struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone) @@ -203,9 +210,29 @@ struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone) return &zmd->dev[0]; } +unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_blocks; +} + +unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_blocks_shift; +} + +unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_sectors; +} + +unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata *zmd) +{ + return zmd->zone_nr_sectors_shift; +} + unsigned int dmz_nr_zones(struct dmz_metadata *zmd) { - return zmd->dev->nr_zones; + return zmd->nr_zones; } unsigned int dmz_nr_chunks(struct dmz_metadata *zmd) @@ -882,8 +909,8 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) return -ENXIO; } - nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + dev->zone_nr_blocks - 1) - >> dev->zone_nr_blocks_shift; + nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) + >> zmd->zone_nr_blocks_shift; if (!nr_meta_zones || nr_meta_zones >= zmd->nr_rnd_zones) { dmz_dev_err(dev, "Invalid number of metadata blocks"); @@ -932,7 +959,7 @@ static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set) */ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) { - unsigned int zone_nr_blocks = zmd->dev->zone_nr_blocks; + unsigned int zone_nr_blocks = zmd->zone_nr_blocks; struct dmz_mblock *mblk; int i; @@ -1143,7 +1170,7 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) struct dmz_dev *dev = zmd->dev; /* Ignore the eventual last runt (smaller) zone */ - if (blkz->len != dev->zone_nr_sectors) { + if (blkz->len != zmd->zone_nr_sectors) { if (blkz->start + blkz->len == dev->capacity) return 0; return -ENXIO; @@ -1208,19 +1235,24 @@ static int dmz_init_zones(struct dmz_metadata *zmd) int ret; /* Init */ - zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; + zmd->zone_nr_sectors = dev->zone_nr_sectors; + zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors); + zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors); + zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks); + zmd->zone_bitmap_size = zmd->zone_nr_blocks >> 3; zmd->zone_nr_bitmap_blocks = max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); - zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, + zmd->zone_bits_per_mblk = min_t(sector_t, zmd->zone_nr_blocks, DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ - zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); + zmd->nr_zones = dev->nr_zones; + zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); if (!zmd->zones) return -ENOMEM; dmz_dev_info(dev, "Using %zu B for zone information", - sizeof(struct dm_zone) * dev->nr_zones); + sizeof(struct dm_zone) * zmd->nr_zones); /* * Get zone information and initialize zone descriptors. At the same @@ -1339,7 +1371,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), - dev->zone_nr_sectors, GFP_NOIO); + zmd->zone_nr_sectors, GFP_NOIO); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", zone->id, ret); @@ -1393,7 +1425,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) if (dzone_id == DMZ_MAP_UNMAPPED) goto next; - if (dzone_id >= dev->nr_zones) { + if (dzone_id >= zmd->nr_zones) { dmz_dev_err(dev, "Chunk %u mapping: invalid data zone ID %u", chunk, dzone_id); return -EIO; @@ -1414,7 +1446,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) if (bzone_id == DMZ_MAP_UNMAPPED) goto next; - if (bzone_id >= dev->nr_zones) { + if (bzone_id >= zmd->nr_zones) { dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone ID %u", chunk, bzone_id); return -EIO; @@ -1446,7 +1478,7 @@ next: * fully initialized. All remaining zones are unmapped data * zones. Finish initializing those here. */ - for (i = 0; i < dev->nr_zones; i++) { + for (i = 0; i < zmd->nr_zones; i++) { dzone = dmz_get(zmd, i); if (dmz_is_meta(dzone)) continue; @@ -1990,7 +2022,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, sector_t chunk_block = 0; /* Get the zones bitmap blocks */ - while (chunk_block < zmd->dev->zone_nr_blocks) { + while (chunk_block < zmd->zone_nr_blocks) { from_mblk = dmz_get_bitmap(zmd, from_zone, chunk_block); if (IS_ERR(from_mblk)) return PTR_ERR(from_mblk); @@ -2025,7 +2057,7 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, int ret; /* Get the zones bitmap blocks */ - while (chunk_block < zmd->dev->zone_nr_blocks) { + while (chunk_block < zmd->zone_nr_blocks) { /* Get a valid region from the source zone */ ret = dmz_first_valid_block(zmd, from_zone, &chunk_block); if (ret <= 0) @@ -2049,7 +2081,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, sector_t chunk_block, unsigned int nr_blocks) { unsigned int count, bit, nr_bits; - unsigned int zone_nr_blocks = zmd->dev->zone_nr_blocks; + unsigned int zone_nr_blocks = zmd->zone_nr_blocks; struct dmz_mblock *mblk; unsigned int n = 0; @@ -2136,7 +2168,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, dmz_dev_debug(zmd->dev, "=> INVALIDATE zone %u, block %llu, %u blocks", zone->id, (u64)chunk_block, nr_blocks); - WARN_ON(chunk_block + nr_blocks > zmd->dev->zone_nr_blocks); + WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks); while (nr_blocks) { /* Get bitmap block */ @@ -2180,7 +2212,7 @@ static int dmz_test_block(struct dmz_metadata *zmd, struct dm_zone *zone, struct dmz_mblock *mblk; int ret; - WARN_ON(chunk_block >= zmd->dev->zone_nr_blocks); + WARN_ON(chunk_block >= zmd->zone_nr_blocks); /* Get bitmap block */ mblk = dmz_get_bitmap(zmd, zone, chunk_block); @@ -2210,7 +2242,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, unsigned long *bitmap; int n = 0; - WARN_ON(chunk_block + nr_blocks > zmd->dev->zone_nr_blocks); + WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks); while (nr_blocks) { /* Get bitmap block */ @@ -2254,7 +2286,7 @@ int dmz_block_valid(struct dmz_metadata *zmd, struct dm_zone *zone, /* The block is valid: get the number of valid blocks from block */ return dmz_to_next_set_block(zmd, zone, chunk_block, - zmd->dev->zone_nr_blocks - chunk_block, 0); + zmd->zone_nr_blocks - chunk_block, 0); } /* @@ -2270,7 +2302,7 @@ int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone, int ret; ret = dmz_to_next_set_block(zmd, zone, start_block, - zmd->dev->zone_nr_blocks - start_block, 1); + zmd->zone_nr_blocks - start_block, 1); if (ret < 0) return ret; @@ -2278,7 +2310,7 @@ int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone, *chunk_block = start_block; return dmz_to_next_set_block(zmd, zone, start_block, - zmd->dev->zone_nr_blocks - start_block, 0); + zmd->zone_nr_blocks - start_block, 0); } /* @@ -2317,7 +2349,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) struct dmz_mblock *mblk; sector_t chunk_block = 0; unsigned int bit, nr_bits; - unsigned int nr_blocks = zmd->dev->zone_nr_blocks; + unsigned int nr_blocks = zmd->zone_nr_blocks; void *bitmap; int n = 0; @@ -2488,7 +2520,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) dmz_dev_info(dev, " %llu 512-byte logical sectors", (u64)dev->capacity); dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", - dev->nr_zones, (u64)dev->zone_nr_sectors); + zmd->nr_zones, (u64)zmd->zone_nr_sectors); dmz_dev_info(dev, " %u metadata zones", zmd->nr_meta_zones * 2); dmz_dev_info(dev, " %u data zones for %u chunks", @@ -2541,7 +2573,7 @@ int dmz_resume_metadata(struct dmz_metadata *zmd) int ret; /* Check zones */ - for (i = 0; i < dev->nr_zones; i++) { + for (i = 0; i < zmd->nr_zones; i++) { zone = dmz_get(zmd, i); if (!zone) { dmz_dev_err(dev, "Unable to get zone %u", i); @@ -2569,7 +2601,7 @@ int dmz_resume_metadata(struct dmz_metadata *zmd) i, (u64)zone->wp_block, (u64)wp_block); zone->wp_block = wp_block; dmz_invalidate_blocks(zmd, zone, zone->wp_block, - dev->zone_nr_blocks - zone->wp_block); + zmd->zone_nr_blocks - zone->wp_block); } } -- cgit v1.2.3 From 2234e7321dc61f116de1dc913f3ffa7efff02068 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:22 +0200 Subject: dm zoned: introduce dmz_metadata_label() to format device name Introduce dmz_metadata_label() to format the device-mapper device name and use it instead of the device name of the underlying device. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b844ff02ae7b..7cda48683c0b 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -134,6 +134,8 @@ struct dmz_sb { struct dmz_metadata { struct dmz_dev *dev; + char devname[BDEVNAME_SIZE]; + sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; unsigned int zone_bits_per_mblk; @@ -260,6 +262,11 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd) return atomic_read(&zmd->unmap_nr_seq); } +const char *dmz_metadata_label(struct dmz_metadata *zmd) +{ + return (const char *)zmd->devname; +} + /* * Lock/unlock mapping table. * The map lock also protects all the zone lists. @@ -2439,7 +2446,8 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) /* * Initialize the zoned metadata. */ -int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) +int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, + const char *devname) { struct dmz_metadata *zmd; unsigned int i; @@ -2450,6 +2458,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata) if (!zmd) return -ENOMEM; + strcpy(zmd->devname, devname); zmd->dev = dev; zmd->mblk_rbtree = RB_ROOT; init_rwsem(&zmd->mblk_sem); -- cgit v1.2.3 From d0e21ce40c7a41df43b70b863cc64395c7787abd Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:23 +0200 Subject: dm zoned: Introduce dmz_dev_is_dying() and dmz_check_dev() Introduce accessors dmz_dev_is_dying() and dmz_check_dev() to avoid having to reference the devices directly. Signed-off-by: Hannes Reinecke Reviewed-by: Bob Liu Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 7cda48683c0b..426af738f1ca 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -267,6 +267,16 @@ const char *dmz_metadata_label(struct dmz_metadata *zmd) return (const char *)zmd->devname; } +bool dmz_check_dev(struct dmz_metadata *zmd) +{ + return dmz_check_bdev(&zmd->dev[0]); +} + +bool dmz_dev_is_dying(struct dmz_metadata *zmd) +{ + return dmz_bdev_is_dying(&zmd->dev[0]); +} + /* * Lock/unlock mapping table. * The map lock also protects all the zone lists. @@ -1719,7 +1729,7 @@ again: /* Allocate a random zone */ dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!dzone) { - if (dmz_bdev_is_dying(zmd->dev)) { + if (dmz_dev_is_dying(zmd)) { dzone = ERR_PTR(-EIO); goto out; } @@ -1820,7 +1830,7 @@ again: /* Allocate a random zone */ bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); if (!bzone) { - if (dmz_bdev_is_dying(zmd->dev)) { + if (dmz_dev_is_dying(zmd)) { bzone = ERR_PTR(-EIO); goto out; } -- cgit v1.2.3 From aa821c8dc0d76fa9f827becf1186bfd824f1fcfb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:26 +0200 Subject: dm zoned: use dmz_zone_to_dev() when handling metadata I/O Use accessors to retrieve the device pointer in preparation for adding an additional block device. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 426af738f1ca..312194be4cb0 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1310,6 +1310,7 @@ static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx, */ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) { + struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); unsigned int noio_flag; int ret; @@ -1320,16 +1321,16 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) * GFP_NOIO was specified. */ noio_flag = memalloc_noio_save(); - ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), 1, + ret = blkdev_report_zones(dev->bdev, dmz_start_sect(zmd, zone), 1, dmz_update_zone_cb, zone); memalloc_noio_restore(noio_flag); if (ret == 0) ret = -EIO; if (ret < 0) { - dmz_dev_err(zmd->dev, "Get zone %u report failed", + dmz_dev_err(dev, "Get zone %u report failed", zone->id); - dmz_check_bdev(zmd->dev); + dmz_check_bdev(dev); return ret; } @@ -1343,6 +1344,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, struct dm_zone *zone) { + struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); unsigned int wp = 0; int ret; @@ -1351,7 +1353,7 @@ static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, if (ret) return ret; - dmz_dev_warn(zmd->dev, "Processing zone %u write error (zone wp %u/%u)", + dmz_dev_warn(dev, "Processing zone %u write error (zone wp %u/%u)", zone->id, zone->wp_block, wp); if (zone->wp_block < wp) { @@ -1384,7 +1386,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) return 0; if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { - struct dmz_dev *dev = zmd->dev; + struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), -- cgit v1.2.3 From ca1a70450a969c63dd19f0a34504fa1bd227e730 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:27 +0200 Subject: dm zoned: add metadata logging functions Use the metadata label for logging and not the underlying device. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 96 +++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 39 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 312194be4cb0..0e7122867fd8 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -194,6 +194,17 @@ struct dmz_metadata { wait_queue_head_t free_wq; }; +#define dmz_zmd_info(zmd, format, args...) \ + DMINFO("(%s): " format, (zmd)->devname, ## args) + +#define dmz_zmd_err(zmd, format, args...) \ + DMERR("(%s): " format, (zmd)->devname, ## args) + +#define dmz_zmd_warn(zmd, format, args...) \ + DMWARN("(%s): " format, (zmd)->devname, ## args) + +#define dmz_zmd_debug(zmd, format, args...) \ + DMDEBUG("(%s): " format, (zmd)->devname, ## args) /* * Various accessors */ @@ -1098,7 +1109,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) int ret; if (!zmd->sb[0].zone) { - dmz_dev_err(zmd->dev, "Primary super block zone not set"); + dmz_zmd_err(zmd, "Primary super block zone not set"); return -ENXIO; } @@ -1135,7 +1146,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) /* Use highest generation sb first */ if (!sb_good[0] && !sb_good[1]) { - dmz_dev_err(zmd->dev, "No valid super block found"); + dmz_zmd_err(zmd, "No valid super block found"); return -EIO; } @@ -1248,7 +1259,7 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) */ static int dmz_init_zones(struct dmz_metadata *zmd) { - struct dmz_dev *dev = zmd->dev; + struct dmz_dev *dev = &zmd->dev[0]; int ret; /* Init */ @@ -1268,8 +1279,8 @@ static int dmz_init_zones(struct dmz_metadata *zmd) if (!zmd->zones) return -ENOMEM; - dmz_dev_info(dev, "Using %zu B for zone information", - sizeof(struct dm_zone) * zmd->nr_zones); + DMINFO("(%s): Using %zu B for zone information", + zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); /* * Get zone information and initialize zone descriptors. At the same @@ -1412,7 +1423,6 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone); */ static int dmz_load_mapping(struct dmz_metadata *zmd) { - struct dmz_dev *dev = zmd->dev; struct dm_zone *dzone, *bzone; struct dmz_mblock *dmap_mblk = NULL; struct dmz_map *dmap; @@ -1445,7 +1455,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) goto next; if (dzone_id >= zmd->nr_zones) { - dmz_dev_err(dev, "Chunk %u mapping: invalid data zone ID %u", + dmz_zmd_err(zmd, "Chunk %u mapping: invalid data zone ID %u", chunk, dzone_id); return -EIO; } @@ -1466,14 +1476,14 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) goto next; if (bzone_id >= zmd->nr_zones) { - dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone ID %u", + dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone ID %u", chunk, bzone_id); return -EIO; } bzone = dmz_get(zmd, bzone_id); if (!dmz_is_rnd(bzone)) { - dmz_dev_err(dev, "Chunk %u mapping: invalid buffer zone %u", + dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone %u", chunk, bzone_id); return -EIO; } @@ -1893,7 +1903,7 @@ again: atomic_dec(&zmd->unmap_nr_seq); if (dmz_is_offline(zone)) { - dmz_dev_warn(zmd->dev, "Zone %u is offline", zone->id); + dmz_zmd_warn(zmd, "Zone %u is offline", zone->id); zone = NULL; goto again; } @@ -2104,7 +2114,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, struct dmz_mblock *mblk; unsigned int n = 0; - dmz_dev_debug(zmd->dev, "=> VALIDATE zone %u, block %llu, %u blocks", + dmz_zmd_debug(zmd, "=> VALIDATE zone %u, block %llu, %u blocks", zone->id, (unsigned long long)chunk_block, nr_blocks); @@ -2134,7 +2144,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, if (likely(zone->weight + n <= zone_nr_blocks)) zone->weight += n; else { - dmz_dev_warn(zmd->dev, "Zone %u: weight %u should be <= %u", + dmz_zmd_warn(zmd, "Zone %u: weight %u should be <= %u", zone->id, zone->weight, zone_nr_blocks - n); zone->weight = zone_nr_blocks; @@ -2184,7 +2194,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, struct dmz_mblock *mblk; unsigned int n = 0; - dmz_dev_debug(zmd->dev, "=> INVALIDATE zone %u, block %llu, %u blocks", + dmz_zmd_debug(zmd, "=> INVALIDATE zone %u, block %llu, %u blocks", zone->id, (u64)chunk_block, nr_blocks); WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks); @@ -2214,7 +2224,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, if (zone->weight >= n) zone->weight -= n; else { - dmz_dev_warn(zmd->dev, "Zone %u: weight %u should be >= %u", + dmz_zmd_warn(zmd, "Zone %u: weight %u should be >= %u", zone->id, zone->weight, n); zone->weight = 0; } @@ -2424,7 +2434,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) while (!list_empty(&zmd->mblk_dirty_list)) { mblk = list_first_entry(&zmd->mblk_dirty_list, struct dmz_mblock, link); - dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)", + dmz_zmd_warn(zmd, "mblock %llu still in dirty list (ref %u)", (u64)mblk->no, mblk->ref); list_del_init(&mblk->link); rb_erase(&mblk->node, &zmd->mblk_rbtree); @@ -2442,7 +2452,7 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) /* Sanity checks: the mblock rbtree should now be empty */ root = &zmd->mblk_rbtree; rbtree_postorder_for_each_entry_safe(mblk, next, root, node) { - dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree", + dmz_zmd_warn(zmd, "mblock %llu ref %u still in rbtree", (u64)mblk->no, mblk->ref); mblk->ref = 0; dmz_free_mblock(zmd, mblk); @@ -2455,6 +2465,19 @@ static void dmz_cleanup_metadata(struct dmz_metadata *zmd) mutex_destroy(&zmd->map_lock); } +static void dmz_print_dev(struct dmz_metadata *zmd, int num) +{ + struct dmz_dev *dev = &zmd->dev[num]; + + dmz_dev_info(dev, "Host-%s zoned block device", + bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? + "aware" : "managed"); + dmz_dev_info(dev, " %llu 512-byte logical sectors", + (u64)dev->capacity); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", + dev->nr_zones, (u64)zmd->zone_nr_sectors); +} + /* * Initialize the zoned metadata. */ @@ -2531,34 +2554,31 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, /* Metadata cache shrinker */ ret = register_shrinker(&zmd->mblk_shrinker); if (ret) { - dmz_dev_err(dev, "Register metadata cache shrinker failed"); + dmz_zmd_err(zmd, "Register metadata cache shrinker failed"); goto err; } - dmz_dev_info(dev, "Host-%s zoned block device", - bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? - "aware" : "managed"); - dmz_dev_info(dev, " %llu 512-byte logical sectors", - (u64)dev->capacity); - dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", DMZ_META_VER); + dmz_print_dev(zmd, 0); + + dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors", zmd->nr_zones, (u64)zmd->zone_nr_sectors); - dmz_dev_info(dev, " %u metadata zones", + dmz_zmd_info(zmd, " %u metadata zones", zmd->nr_meta_zones * 2); - dmz_dev_info(dev, " %u data zones for %u chunks", + dmz_zmd_info(zmd, " %u data zones for %u chunks", zmd->nr_data_zones, zmd->nr_chunks); - dmz_dev_info(dev, " %u random zones (%u unmapped)", + dmz_zmd_info(zmd, " %u random zones (%u unmapped)", zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd)); - dmz_dev_info(dev, " %u sequential zones (%u unmapped)", + dmz_zmd_info(zmd, " %u sequential zones (%u unmapped)", zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq)); - dmz_dev_info(dev, " %u reserved sequential data zones", + dmz_zmd_info(zmd, " %u reserved sequential data zones", zmd->nr_reserved_seq); - - dmz_dev_debug(dev, "Format:"); - dmz_dev_debug(dev, "%u metadata blocks per set (%u max cache)", + dmz_zmd_debug(zmd, "Format:"); + dmz_zmd_debug(zmd, "%u metadata blocks per set (%u max cache)", zmd->nr_meta_blocks, zmd->max_nr_mblks); - dmz_dev_debug(dev, " %u data zone mapping blocks", + dmz_zmd_debug(zmd, " %u data zone mapping blocks", zmd->nr_map_blocks); - dmz_dev_debug(dev, " %u bitmap blocks", + dmz_zmd_debug(zmd, " %u bitmap blocks", zmd->nr_bitmap_blocks); *metadata = zmd; @@ -2587,7 +2607,6 @@ void dmz_dtr_metadata(struct dmz_metadata *zmd) */ int dmz_resume_metadata(struct dmz_metadata *zmd) { - struct dmz_dev *dev = zmd->dev; struct dm_zone *zone; sector_t wp_block; unsigned int i; @@ -2597,20 +2616,19 @@ int dmz_resume_metadata(struct dmz_metadata *zmd) for (i = 0; i < zmd->nr_zones; i++) { zone = dmz_get(zmd, i); if (!zone) { - dmz_dev_err(dev, "Unable to get zone %u", i); + dmz_zmd_err(zmd, "Unable to get zone %u", i); return -EIO; } - wp_block = zone->wp_block; ret = dmz_update_zone(zmd, zone); if (ret) { - dmz_dev_err(dev, "Broken zone %u", i); + dmz_zmd_err(zmd, "Broken zone %u", i); return ret; } if (dmz_is_offline(zone)) { - dmz_dev_warn(dev, "Zone %u is offline", i); + dmz_zmd_warn(zmd, "Zone %u is offline", i); continue; } @@ -2618,7 +2636,7 @@ int dmz_resume_metadata(struct dmz_metadata *zmd) if (!dmz_is_seq(zone)) zone->wp_block = 0; else if (zone->wp_block != wp_block) { - dmz_dev_err(dev, "Zone %u: Invalid wp (%llu / %llu)", + dmz_zmd_err(zmd, "Zone %u: Invalid wp (%llu / %llu)", i, (u64)zone->wp_block, (u64)wp_block); zone->wp_block = wp_block; dmz_invalidate_blocks(zmd, zone, zone->wp_block, -- cgit v1.2.3 From ae3c1f1171467f83849c7e8c5e0e632c5078ca2f Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:28 +0200 Subject: dm zoned: Reduce logging output on startup dm-zoned is becoming quite chatty during startup; reduce the noise by moving some information to 'debug' level. Suggested-by: Mike Snitzer Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 0e7122867fd8..1290728c197e 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1279,8 +1279,8 @@ static int dmz_init_zones(struct dmz_metadata *zmd) if (!zmd->zones) return -ENOMEM; - DMINFO("(%s): Using %zu B for zone information", - zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); + DMDEBUG("(%s): Using %zu B for zone information", + zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); /* * Get zone information and initialize zone descriptors. At the same @@ -2563,16 +2563,16 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors", zmd->nr_zones, (u64)zmd->zone_nr_sectors); - dmz_zmd_info(zmd, " %u metadata zones", - zmd->nr_meta_zones * 2); - dmz_zmd_info(zmd, " %u data zones for %u chunks", - zmd->nr_data_zones, zmd->nr_chunks); - dmz_zmd_info(zmd, " %u random zones (%u unmapped)", - zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd)); - dmz_zmd_info(zmd, " %u sequential zones (%u unmapped)", - zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq)); - dmz_zmd_info(zmd, " %u reserved sequential data zones", - zmd->nr_reserved_seq); + dmz_zmd_debug(zmd, " %u metadata zones", + zmd->nr_meta_zones * 2); + dmz_zmd_debug(zmd, " %u data zones for %u chunks", + zmd->nr_data_zones, zmd->nr_chunks); + dmz_zmd_debug(zmd, " %u random zones (%u unmapped)", + zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd)); + dmz_zmd_debug(zmd, " %u sequential zones (%u unmapped)", + zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq)); + dmz_zmd_debug(zmd, " %u reserved sequential data zones", + zmd->nr_reserved_seq); dmz_zmd_debug(zmd, "Format:"); dmz_zmd_debug(zmd, "%u metadata blocks per set (%u max cache)", zmd->nr_meta_blocks, zmd->max_nr_mblks); -- cgit v1.2.3 From dc076c838f65723325001c977b39e55fc6ba0fa7 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:29 +0200 Subject: dm zoned: ignore metadata zone in dmz_alloc_zone() When looking up zones in dmz_alloc_zone() we need to ignore metadata zones so as not to accidentally overwrite metadata. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Bob Liu Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 1290728c197e..939deed1606a 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1907,7 +1907,13 @@ again: zone = NULL; goto again; } + if (dmz_is_meta(zone)) { + struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); + dmz_dev_warn(dev, "Zone %u has metadata", zone->id); + zone = NULL; + goto again; + } return zone; } -- cgit v1.2.3 From bd5c40313a1467e4683d92456fc5219d94823f24 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 11 May 2020 10:24:30 +0200 Subject: dm zoned: metadata version 2 Implement handling for metadata version 2. The new metadata adds a label and UUID for the device mapper device, and additional UUID for the underlying block devices. It also allows for an additional regular drive to be used for emulating random access zones. The emulated zones will be placed logically in front of the zones from the zoned block device, causing the superblocks and metadata to be stored on that device. The first zone of the original zoned device will be used to hold another, tertiary copy of the metadata; this copy carries a generation number of 0 and is never updated; it's just used for identification. Signed-off-by: Hannes Reinecke Reviewed-by: Bob Liu Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 310 ++++++++++++++++++++++++++++++++++------- 1 file changed, 261 insertions(+), 49 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 939deed1606a..fba690dd37f5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -16,7 +16,7 @@ /* * Metadata version. */ -#define DMZ_META_VER 1 +#define DMZ_META_VER 2 /* * On-disk super block magic. @@ -69,8 +69,17 @@ struct dmz_super { /* Checksum */ __le32 crc; /* 48 */ + /* DM-Zoned label */ + u8 dmz_label[32]; /* 80 */ + + /* DM-Zoned UUID */ + u8 dmz_uuid[16]; /* 96 */ + + /* Device UUID */ + u8 dev_uuid[16]; /* 112 */ + /* Padding to full 512B sector */ - u8 reserved[464]; /* 512 */ + u8 reserved[400]; /* 512 */ }; /* @@ -133,8 +142,11 @@ struct dmz_sb { */ struct dmz_metadata { struct dmz_dev *dev; + unsigned int nr_devs; char devname[BDEVNAME_SIZE]; + char label[BDEVNAME_SIZE]; + uuid_t uuid; sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; @@ -161,8 +173,9 @@ struct dmz_metadata { /* Zone information array */ struct dm_zone *zones; - struct dmz_sb sb[2]; + struct dmz_sb sb[3]; unsigned int mblk_primary; + unsigned int sb_version; u64 sb_gen; unsigned int min_nr_mblks; unsigned int max_nr_mblks; @@ -195,31 +208,56 @@ struct dmz_metadata { }; #define dmz_zmd_info(zmd, format, args...) \ - DMINFO("(%s): " format, (zmd)->devname, ## args) + DMINFO("(%s): " format, (zmd)->label, ## args) #define dmz_zmd_err(zmd, format, args...) \ - DMERR("(%s): " format, (zmd)->devname, ## args) + DMERR("(%s): " format, (zmd)->label, ## args) #define dmz_zmd_warn(zmd, format, args...) \ - DMWARN("(%s): " format, (zmd)->devname, ## args) + DMWARN("(%s): " format, (zmd)->label, ## args) #define dmz_zmd_debug(zmd, format, args...) \ - DMDEBUG("(%s): " format, (zmd)->devname, ## args) + DMDEBUG("(%s): " format, (zmd)->label, ## args) /* * Various accessors */ +static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone) +{ + unsigned int zone_id; + + if (WARN_ON(!zone)) + return 0; + + zone_id = zone->id; + if (zmd->nr_devs > 1 && + (zone_id >= zmd->dev[1].zone_offset)) + zone_id -= zmd->dev[1].zone_offset; + return zone_id; +} + sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)zone->id << zmd->zone_nr_sectors_shift; + unsigned int zone_id = dmz_dev_zone_id(zmd, zone); + + return (sector_t)zone_id << zmd->zone_nr_sectors_shift; } sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)zone->id << zmd->zone_nr_blocks_shift; + unsigned int zone_id = dmz_dev_zone_id(zmd, zone); + + return (sector_t)zone_id << zmd->zone_nr_blocks_shift; } struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone) { + if (WARN_ON(!zone)) + return &zmd->dev[0]; + + if (zmd->nr_devs > 1 && + zone->id >= zmd->dev[1].zone_offset) + return &zmd->dev[1]; + return &zmd->dev[0]; } @@ -275,17 +313,29 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd) const char *dmz_metadata_label(struct dmz_metadata *zmd) { - return (const char *)zmd->devname; + return (const char *)zmd->label; } bool dmz_check_dev(struct dmz_metadata *zmd) { - return dmz_check_bdev(&zmd->dev[0]); + unsigned int i; + + for (i = 0; i < zmd->nr_devs; i++) { + if (!dmz_check_bdev(&zmd->dev[i])) + return false; + } + return true; } bool dmz_dev_is_dying(struct dmz_metadata *zmd) { - return dmz_bdev_is_dying(&zmd->dev[0]); + unsigned int i; + + for (i = 0; i < zmd->nr_devs; i++) { + if (dmz_bdev_is_dying(&zmd->dev[i])) + return true; + } + return false; } /* @@ -687,6 +737,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, struct bio *bio; int ret; + if (WARN_ON(!dev)) + return -EIO; + if (dmz_bdev_is_dying(dev)) return -EIO; @@ -711,19 +764,32 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, */ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) { - sector_t block = zmd->sb[set].block; struct dmz_mblock *mblk = zmd->sb[set].mblk; struct dmz_super *sb = zmd->sb[set].sb; struct dmz_dev *dev = zmd->sb[set].dev; + sector_t sb_block; u64 sb_gen = zmd->sb_gen + 1; int ret; sb->magic = cpu_to_le32(DMZ_MAGIC); - sb->version = cpu_to_le32(DMZ_META_VER); + + sb->version = cpu_to_le32(zmd->sb_version); + if (zmd->sb_version > 1) { + BUILD_BUG_ON(UUID_SIZE != 16); + export_uuid(sb->dmz_uuid, &zmd->uuid); + memcpy(sb->dmz_label, zmd->label, BDEVNAME_SIZE); + export_uuid(sb->dev_uuid, &dev->uuid); + } sb->gen = cpu_to_le64(sb_gen); - sb->sb_block = cpu_to_le64(block); + /* + * The metadata always references the absolute block address, + * ie relative to the entire block range, not the per-device + * block address. + */ + sb_block = zmd->sb[set].zone->id << zmd->zone_nr_blocks_shift; + sb->sb_block = cpu_to_le64(sb_block); sb->nr_meta_blocks = cpu_to_le32(zmd->nr_meta_blocks); sb->nr_reserved_seq = cpu_to_le32(zmd->nr_reserved_seq); sb->nr_chunks = cpu_to_le32(zmd->nr_chunks); @@ -734,7 +800,8 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) sb->crc = 0; sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE)); - ret = dmz_rdwr_block(dev, REQ_OP_WRITE, block, mblk->page); + ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block, + mblk->page); if (ret == 0) ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL); @@ -915,6 +982,23 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) u32 crc, stored_crc; u64 gen; + if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { + dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", + DMZ_MAGIC, le32_to_cpu(sb->magic)); + return -ENXIO; + } + + zmd->sb_version = le32_to_cpu(sb->version); + if (zmd->sb_version > DMZ_META_VER) { + dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)", + DMZ_META_VER, zmd->sb_version); + return -EINVAL; + } + if ((zmd->sb_version < 1) && (set == 2)) { + dmz_dev_err(dev, "Tertiary superblocks are not supported"); + return -EINVAL; + } + gen = le64_to_cpu(sb->gen); stored_crc = le32_to_cpu(sb->crc); sb->crc = 0; @@ -925,16 +1009,45 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) return -ENXIO; } - if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { - dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", - DMZ_MAGIC, le32_to_cpu(sb->magic)); - return -ENXIO; - } + if (zmd->sb_version > 1) { + uuid_t sb_uuid; + + import_uuid(&sb_uuid, sb->dmz_uuid); + if (uuid_is_null(&sb_uuid)) { + dmz_dev_err(dev, "NULL DM-Zoned uuid"); + return -ENXIO; + } else if (uuid_is_null(&zmd->uuid)) { + uuid_copy(&zmd->uuid, &sb_uuid); + } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) { + dmz_dev_err(dev, "mismatching DM-Zoned uuid, " + "is %pUl expected %pUl", + &sb_uuid, &zmd->uuid); + return -ENXIO; + } + if (!strlen(zmd->label)) + memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE); + else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) { + dmz_dev_err(dev, "mismatching DM-Zoned label, " + "is %s expected %s", + sb->dmz_label, zmd->label); + return -ENXIO; + } + import_uuid(&dev->uuid, sb->dev_uuid); + if (uuid_is_null(&dev->uuid)) { + dmz_dev_err(dev, "NULL device uuid"); + return -ENXIO; + } - if (le32_to_cpu(sb->version) != DMZ_META_VER) { - dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)", - DMZ_META_VER, le32_to_cpu(sb->version)); - return -ENXIO; + if (set == 2) { + /* + * Generation number should be 0, but it doesn't + * really matter if it isn't. + */ + if (gen != 0) + dmz_dev_warn(dev, "Invalid generation %llu", + gen); + return 0; + } } nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) @@ -1185,21 +1298,38 @@ static int dmz_load_sb(struct dmz_metadata *zmd) "Using super block %u (gen %llu)", zmd->mblk_primary, zmd->sb_gen); + if ((zmd->sb_version > 1) && zmd->sb[2].zone) { + zmd->sb[2].block = dmz_start_block(zmd, zmd->sb[2].zone); + zmd->sb[2].dev = dmz_zone_to_dev(zmd, zmd->sb[2].zone); + ret = dmz_get_sb(zmd, 2); + if (ret) { + dmz_dev_err(zmd->sb[2].dev, + "Read tertiary super block failed"); + return ret; + } + ret = dmz_check_sb(zmd, 2); + if (ret == -EINVAL) + return ret; + } return 0; } /* * Initialize a zone descriptor. */ -static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) +static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) { struct dmz_metadata *zmd = data; + struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0]; + int idx = num + dev->zone_offset; struct dm_zone *zone = &zmd->zones[idx]; - struct dmz_dev *dev = zmd->dev; - /* Ignore the eventual last runt (smaller) zone */ if (blkz->len != zmd->zone_nr_sectors) { - if (blkz->start + blkz->len == dev->capacity) + if (zmd->sb_version > 1) { + /* Ignore the eventual runt (smaller) zone */ + set_bit(DMZ_OFFLINE, &zone->flags); + return 0; + } else if (blkz->start + blkz->len == dev->capacity) return 0; return -ENXIO; } @@ -1234,16 +1364,45 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) zmd->nr_useable_zones++; if (dmz_is_rnd(zone)) { zmd->nr_rnd_zones++; - if (!zmd->sb[0].zone) { - /* Super block zone */ + if (zmd->nr_devs == 1 && !zmd->sb[0].zone) { + /* Primary super block zone */ zmd->sb[0].zone = zone; } } + if (zmd->nr_devs > 1 && !zmd->sb[2].zone) { + /* Tertiary superblock zone */ + zmd->sb[2].zone = zone; + } } return 0; } +static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) +{ + int idx; + sector_t zone_offset = 0; + + for(idx = 0; idx < dev->nr_zones; idx++) { + struct dm_zone *zone = &zmd->zones[idx]; + + INIT_LIST_HEAD(&zone->link); + atomic_set(&zone->refcount, 0); + zone->id = idx; + zone->chunk = DMZ_MAP_UNMAPPED; + set_bit(DMZ_RND, &zone->flags); + zone->wp_block = 0; + zmd->nr_rnd_zones++; + zmd->nr_useable_zones++; + if (dev->capacity - zone_offset < zmd->zone_nr_sectors) { + /* Disable runt zone */ + set_bit(DMZ_OFFLINE, &zone->flags); + break; + } + zone_offset += zmd->zone_nr_sectors; + } +} + /* * Free zones descriptors. */ @@ -1259,11 +1418,11 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) */ static int dmz_init_zones(struct dmz_metadata *zmd) { - struct dmz_dev *dev = &zmd->dev[0]; - int ret; + int i, ret; + struct dmz_dev *zoned_dev = &zmd->dev[0]; /* Init */ - zmd->zone_nr_sectors = dev->zone_nr_sectors; + zmd->zone_nr_sectors = zmd->dev[0].zone_nr_sectors; zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors); zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors); zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks); @@ -1274,7 +1433,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd) DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ - zmd->nr_zones = dev->nr_zones; + zmd->nr_zones = 0; + for (i = 0; i < zmd->nr_devs; i++) + zmd->nr_zones += zmd->dev[i].nr_zones; + + if (!zmd->nr_zones) { + DMERR("(%s): No zones found", zmd->devname); + return -ENXIO; + } zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); if (!zmd->zones) return -ENOMEM; @@ -1282,14 +1448,27 @@ static int dmz_init_zones(struct dmz_metadata *zmd) DMDEBUG("(%s): Using %zu B for zone information", zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); + if (zmd->nr_devs > 1) { + dmz_emulate_zones(zmd, &zmd->dev[0]); + /* + * Primary superblock zone is always at zone 0 when multiple + * drives are present. + */ + zmd->sb[0].zone = &zmd->zones[0]; + + zoned_dev = &zmd->dev[1]; + } + /* * Get zone information and initialize zone descriptors. At the same * time, determine where the super block should be: first block of the * first randomly writable zone. */ - ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone, - zmd); + ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES, + dmz_init_zone, zmd); if (ret < 0) { + DMDEBUG("(%s): Failed to report zones, error %d", + zmd->devname, ret); dmz_drop_zones(zmd); return ret; } @@ -1325,6 +1504,9 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) unsigned int noio_flag; int ret; + if (dev->flags & DMZ_BDEV_REGULAR) + return 0; + /* * Get zone information from disk. Since blkdev_report_zones() uses * GFP_KERNEL by default for memory allocations, set the per-task @@ -2475,19 +2657,34 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num) { struct dmz_dev *dev = &zmd->dev[num]; - dmz_dev_info(dev, "Host-%s zoned block device", - bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? - "aware" : "managed"); - dmz_dev_info(dev, " %llu 512-byte logical sectors", - (u64)dev->capacity); - dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", - dev->nr_zones, (u64)zmd->zone_nr_sectors); + if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) + dmz_dev_info(dev, "Regular block device"); + else + dmz_dev_info(dev, "Host-%s zoned block device", + bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? + "aware" : "managed"); + if (zmd->sb_version > 1) { + sector_t sector_offset = + dev->zone_offset << zmd->zone_nr_sectors_shift; + + dmz_dev_info(dev, " %llu 512-byte logical sectors (offset %llu)", + (u64)dev->capacity, (u64)sector_offset); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors (offset %llu)", + dev->nr_zones, (u64)zmd->zone_nr_sectors, + (u64)dev->zone_offset); + } else { + dmz_dev_info(dev, " %llu 512-byte logical sectors", + (u64)dev->capacity); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", + dev->nr_zones, (u64)zmd->zone_nr_sectors); + } } /* * Initialize the zoned metadata. */ -int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, +int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, + struct dmz_metadata **metadata, const char *devname) { struct dmz_metadata *zmd; @@ -2501,6 +2698,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, strcpy(zmd->devname, devname); zmd->dev = dev; + zmd->nr_devs = num_dev; zmd->mblk_rbtree = RB_ROOT; init_rwsem(&zmd->mblk_sem); mutex_init(&zmd->mblk_flush_lock); @@ -2535,11 +2733,24 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, /* Set metadata zones starting from sb_zone */ for (i = 0; i < zmd->nr_meta_zones << 1; i++) { zone = dmz_get(zmd, zmd->sb[0].zone->id + i); - if (!dmz_is_rnd(zone)) + if (!dmz_is_rnd(zone)) { + dmz_zmd_err(zmd, + "metadata zone %d is not random", i); + ret = -ENXIO; goto err; + } + set_bit(DMZ_META, &zone->flags); + } + if (zmd->sb[2].zone) { + zone = dmz_get(zmd, zmd->sb[2].zone->id); + if (!zone) { + dmz_zmd_err(zmd, + "Tertiary metadata zone not present"); + ret = -ENXIO; + goto err; + } set_bit(DMZ_META, &zone->flags); } - /* Load mapping table */ ret = dmz_load_mapping(zmd); if (ret) @@ -2564,8 +2775,9 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, goto err; } - dmz_zmd_info(zmd, "DM-Zoned metadata version %d", DMZ_META_VER); - dmz_print_dev(zmd, 0); + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version); + for (i = 0; i < zmd->nr_devs; i++) + dmz_print_dev(zmd, i); dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors", zmd->nr_zones, (u64)zmd->zone_nr_sectors); -- cgit v1.2.3 From 489dc0f06a5837f87482c0ce61d830d24e17082e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 19 May 2020 10:14:19 +0200 Subject: dm zoned: return NULL if dmz_get_zone_for_reclaim() fails to find a zone The only case where dmz_get_zone_for_reclaim() cannot return a zone is if the respective lists are empty. So we should just return a simple NULL value here as we really don't have an error code which would make sense. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index fba690dd37f5..fa7bcb28e952 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1845,7 +1845,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) return dzone; } - return ERR_PTR(-EBUSY); + return NULL; } /* @@ -1865,7 +1865,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) return zone; } - return ERR_PTR(-EBUSY); + return NULL; } /* -- cgit v1.2.3 From 34f5affd04c4a16d9df19c369bcec6e873e57ffe Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 19 May 2020 10:14:20 +0200 Subject: dm zoned: separate random and cache zones Instead of lumping emulated zones together with random zones we should be handling them as separate 'cache' zones. This improves code readability and allows an easier implementation of different cache policies. Also add additional allocation flags, to separate the type (cache, random, or sequential) from the purpose (eg reclaim). Also switch the allocation policy to not use random zones as buffer zones if cache zones are present. This avoids a performance drop when all cache zones are used. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 123 ++++++++++++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 31 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index fa7bcb28e952..6c009a8b36a4 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -166,6 +166,7 @@ struct dmz_metadata { unsigned int nr_meta_blocks; unsigned int nr_meta_zones; unsigned int nr_data_zones; + unsigned int nr_cache_zones; unsigned int nr_rnd_zones; unsigned int nr_reserved_seq; unsigned int nr_chunks; @@ -196,6 +197,11 @@ struct dmz_metadata { struct list_head unmap_rnd_list; struct list_head map_rnd_list; + unsigned int nr_cache; + atomic_t unmap_nr_cache; + struct list_head unmap_cache_list; + struct list_head map_cache_list; + unsigned int nr_seq; atomic_t unmap_nr_seq; struct list_head unmap_seq_list; @@ -301,6 +307,16 @@ unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd) return atomic_read(&zmd->unmap_nr_rnd); } +unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd) +{ + return zmd->nr_cache; +} + +unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd) +{ + return atomic_read(&zmd->unmap_nr_cache); +} + unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd) { return zmd->nr_seq; @@ -1390,9 +1406,9 @@ static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) atomic_set(&zone->refcount, 0); zone->id = idx; zone->chunk = DMZ_MAP_UNMAPPED; - set_bit(DMZ_RND, &zone->flags); + set_bit(DMZ_CACHE, &zone->flags); zone->wp_block = 0; - zmd->nr_rnd_zones++; + zmd->nr_cache_zones++; zmd->nr_useable_zones++; if (dev->capacity - zone_offset < zmd->zone_nr_sectors) { /* Disable runt zone */ @@ -1647,7 +1663,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) dzone->chunk = chunk; dmz_get_zone_weight(zmd, dzone); - if (dmz_is_rnd(dzone)) + if (dmz_is_cache(dzone)) + list_add_tail(&dzone->link, &zmd->map_cache_list); + else if (dmz_is_rnd(dzone)) list_add_tail(&dzone->link, &zmd->map_rnd_list); else list_add_tail(&dzone->link, &zmd->map_seq_list); @@ -1664,7 +1682,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) } bzone = dmz_get(zmd, bzone_id); - if (!dmz_is_rnd(bzone)) { + if (!dmz_is_rnd(bzone) && !dmz_is_cache(bzone)) { dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone %u", chunk, bzone_id); return -EIO; @@ -1676,7 +1694,10 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) bzone->bzone = dzone; dzone->bzone = bzone; dmz_get_zone_weight(zmd, bzone); - list_add_tail(&bzone->link, &zmd->map_rnd_list); + if (dmz_is_cache(bzone)) + list_add_tail(&bzone->link, &zmd->map_cache_list); + else + list_add_tail(&bzone->link, &zmd->map_rnd_list); next: chunk++; e++; @@ -1693,8 +1714,12 @@ next: dzone = dmz_get(zmd, i); if (dmz_is_meta(dzone)) continue; + if (dmz_is_offline(dzone)) + continue; - if (dmz_is_rnd(dzone)) + if (dmz_is_cache(dzone)) + zmd->nr_cache++; + else if (dmz_is_rnd(dzone)) zmd->nr_rnd++; else zmd->nr_seq++; @@ -1707,7 +1732,10 @@ next: /* Unmapped data zone */ set_bit(DMZ_DATA, &dzone->flags); dzone->chunk = DMZ_MAP_UNMAPPED; - if (dmz_is_rnd(dzone)) { + if (dmz_is_cache(dzone)) { + list_add_tail(&dzone->link, &zmd->unmap_cache_list); + atomic_inc(&zmd->unmap_nr_cache); + } else if (dmz_is_rnd(dzone)) { list_add_tail(&dzone->link, &zmd->unmap_rnd_list); atomic_inc(&zmd->unmap_nr_rnd); } else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) { @@ -1751,6 +1779,9 @@ static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (dmz_is_seq(zone)) { /* LRU rotate sequential zone */ list_add_tail(&zone->link, &zmd->map_seq_list); + } else if (dmz_is_cache(zone)) { + /* LRU rotate cache zone */ + list_add_tail(&zone->link, &zmd->map_cache_list); } else { /* LRU rotate random zone */ list_add_tail(&zone->link, &zmd->map_rnd_list); @@ -1826,17 +1857,19 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone) } /* - * Select a random write zone for reclaim. + * Select a cache or random write zone for reclaim. */ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) { struct dm_zone *dzone = NULL; struct dm_zone *zone; + struct list_head *zone_list = &zmd->map_rnd_list; - if (list_empty(&zmd->map_rnd_list)) - return ERR_PTR(-EBUSY); + /* If we have cache zones select from the cache zone list */ + if (zmd->nr_cache) + zone_list = &zmd->map_cache_list; - list_for_each_entry(zone, &zmd->map_rnd_list, link) { + list_for_each_entry(zone, zone_list, link) { if (dmz_is_buf(zone)) dzone = zone->bzone; else @@ -1855,9 +1888,6 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) { struct dm_zone *zone; - if (list_empty(&zmd->map_seq_list)) - return ERR_PTR(-EBUSY); - list_for_each_entry(zone, &zmd->map_seq_list, link) { if (!zone->bzone) continue; @@ -1907,6 +1937,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu unsigned int dzone_id; struct dm_zone *dzone = NULL; int ret = 0; + int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND; dmz_lock_map(zmd); again: @@ -1921,7 +1952,7 @@ again: goto out; /* Allocate a random zone */ - dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); + dzone = dmz_alloc_zone(zmd, alloc_flags); if (!dzone) { if (dmz_dev_is_dying(zmd)) { dzone = ERR_PTR(-EIO); @@ -2014,6 +2045,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd, struct dm_zone *dzone) { struct dm_zone *bzone; + int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND; dmz_lock_map(zmd); again: @@ -2022,7 +2054,7 @@ again: goto out; /* Allocate a random zone */ - bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND); + bzone = dmz_alloc_zone(zmd, alloc_flags); if (!bzone) { if (dmz_dev_is_dying(zmd)) { bzone = ERR_PTR(-EIO); @@ -2039,7 +2071,10 @@ again: bzone->chunk = dzone->chunk; bzone->bzone = dzone; dzone->bzone = bzone; - list_add_tail(&bzone->link, &zmd->map_rnd_list); + if (dmz_is_cache(bzone)) + list_add_tail(&bzone->link, &zmd->map_cache_list); + else + list_add_tail(&bzone->link, &zmd->map_rnd_list); out: dmz_unlock_map(zmd); @@ -2055,31 +2090,46 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags) struct list_head *list; struct dm_zone *zone; - if (flags & DMZ_ALLOC_RND) + if (flags & DMZ_ALLOC_CACHE) + list = &zmd->unmap_cache_list; + else if (flags & DMZ_ALLOC_RND) list = &zmd->unmap_rnd_list; else list = &zmd->unmap_seq_list; + again: if (list_empty(list)) { /* - * No free zone: if this is for reclaim, allow using the - * reserved sequential zones. + * No free zone: return NULL if this is for not reclaim. */ - if (!(flags & DMZ_ALLOC_RECLAIM) || - list_empty(&zmd->reserved_seq_zones_list)) + if (!(flags & DMZ_ALLOC_RECLAIM)) return NULL; - - zone = list_first_entry(&zmd->reserved_seq_zones_list, - struct dm_zone, link); - list_del_init(&zone->link); - atomic_dec(&zmd->nr_reserved_seq_zones); + /* + * Use sequential write zones if we started off with random + * zones and the list is empty + */ + if (list == &zmd->unmap_rnd_list) { + list = &zmd->unmap_seq_list; + goto again; + } + /* + * Fallback to the reserved sequential zones + */ + zone = list_first_entry_or_null(&zmd->reserved_seq_zones_list, + struct dm_zone, link); + if (zone) { + list_del_init(&zone->link); + atomic_dec(&zmd->nr_reserved_seq_zones); + } return zone; } zone = list_first_entry(list, struct dm_zone, link); list_del_init(&zone->link); - if (dmz_is_rnd(zone)) + if (dmz_is_cache(zone)) + atomic_dec(&zmd->unmap_nr_cache); + else if (dmz_is_rnd(zone)) atomic_dec(&zmd->unmap_nr_rnd); else atomic_dec(&zmd->unmap_nr_seq); @@ -2110,7 +2160,10 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone) dmz_reset_zone(zmd, zone); /* Return the zone to its type unmap list */ - if (dmz_is_rnd(zone)) { + if (dmz_is_cache(zone)) { + list_add_tail(&zone->link, &zmd->unmap_cache_list); + atomic_inc(&zmd->unmap_nr_cache); + } else if (dmz_is_rnd(zone)) { list_add_tail(&zone->link, &zmd->unmap_rnd_list); atomic_inc(&zmd->unmap_nr_rnd); } else if (atomic_read(&zmd->nr_reserved_seq_zones) < @@ -2136,7 +2189,9 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone, dmz_set_chunk_mapping(zmd, chunk, dzone->id, DMZ_MAP_UNMAPPED); dzone->chunk = chunk; - if (dmz_is_rnd(dzone)) + if (dmz_is_cache(dzone)) + list_add_tail(&dzone->link, &zmd->map_cache_list); + else if (dmz_is_rnd(dzone)) list_add_tail(&dzone->link, &zmd->map_rnd_list); else list_add_tail(&dzone->link, &zmd->map_seq_list); @@ -2711,6 +2766,10 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, INIT_LIST_HEAD(&zmd->unmap_rnd_list); INIT_LIST_HEAD(&zmd->map_rnd_list); + atomic_set(&zmd->unmap_nr_cache, 0); + INIT_LIST_HEAD(&zmd->unmap_cache_list); + INIT_LIST_HEAD(&zmd->map_cache_list); + atomic_set(&zmd->unmap_nr_seq, 0); INIT_LIST_HEAD(&zmd->unmap_seq_list); INIT_LIST_HEAD(&zmd->map_seq_list); @@ -2733,7 +2792,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, /* Set metadata zones starting from sb_zone */ for (i = 0; i < zmd->nr_meta_zones << 1; i++) { zone = dmz_get(zmd, zmd->sb[0].zone->id + i); - if (!dmz_is_rnd(zone)) { + if (!dmz_is_rnd(zone) && !dmz_is_cache(zone)) { dmz_zmd_err(zmd, "metadata zone %d is not random", i); ret = -ENXIO; @@ -2785,6 +2844,8 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, zmd->nr_meta_zones * 2); dmz_zmd_debug(zmd, " %u data zones for %u chunks", zmd->nr_data_zones, zmd->nr_chunks); + dmz_zmd_debug(zmd, " %u cache zones (%u unmapped)", + zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache)); dmz_zmd_debug(zmd, " %u random zones (%u unmapped)", zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd)); dmz_zmd_debug(zmd, " %u sequential zones (%u unmapped)", -- cgit v1.2.3 From 90a9b8693f1b84a695864f2b416cba9bde107268 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 19 May 2020 10:14:21 +0200 Subject: dm zoned: reclaim random zones when idle When the system is idle we should be starting reclaiming random zones, too. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 6c009a8b36a4..b5fd67eff046 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1859,15 +1859,20 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone) /* * Select a cache or random write zone for reclaim. */ -static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd) +static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, + bool idle) { struct dm_zone *dzone = NULL; struct dm_zone *zone; struct list_head *zone_list = &zmd->map_rnd_list; /* If we have cache zones select from the cache zone list */ - if (zmd->nr_cache) + if (zmd->nr_cache) { zone_list = &zmd->map_cache_list; + /* Try to relaim random zones, too, when idle */ + if (idle && list_empty(zone_list)) + zone_list = &zmd->map_rnd_list; + } list_for_each_entry(zone, zone_list, link) { if (dmz_is_buf(zone)) @@ -1901,7 +1906,7 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) /* * Select a zone for reclaim. */ -struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd) +struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle) { struct dm_zone *zone; @@ -1917,7 +1922,7 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd) if (list_empty(&zmd->reserved_seq_zones_list)) zone = dmz_get_seq_zone_for_reclaim(zmd); else - zone = dmz_get_rnd_zone_for_reclaim(zmd); + zone = dmz_get_rnd_zone_for_reclaim(zmd, idle); dmz_unlock_map(zmd); return zone; -- cgit v1.2.3 From a16b7dee302d2040d9e1fedff2161d1aceda0e8c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 19 May 2020 10:14:23 +0200 Subject: dm zoned: terminate reclaim on congestion When dmz_get_chunk_mapping() selects a zone which is under reclaim we should terminate the reclaim copy process. Since we're changing the zone itself, reclaim needs to run afterwards again anyway. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b5fd67eff046..db0dc2b5d44d 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1851,7 +1851,9 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone) { dmz_unlock_map(zmd); dmz_unlock_metadata(zmd); + set_bit(DMZ_RECLAIM_TERMINATE, &zone->flags); wait_on_bit_timeout(&zone->flags, DMZ_RECLAIM, TASK_UNINTERRUPTIBLE, HZ); + clear_bit(DMZ_RECLAIM_TERMINATE, &zone->flags); dmz_lock_metadata(zmd); dmz_lock_map(zmd); } -- cgit v1.2.3 From b4756d43a1dd2cfb778eb3cef3ba2efd2dcd5263 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 22 May 2020 10:58:53 +0200 Subject: dm zoned: remove leftover hunk for switching to sequential zones Remove a leftover hunk to switch from random zones to sequential zones when selecting a reclaim zone; the logic has moved into the caller and this hunk is now pointless. Fixes: 34f5affd04c4 ("dm zoned: separate random and cache zones") Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index db0dc2b5d44d..4a2e351365c5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -2111,14 +2111,6 @@ again: */ if (!(flags & DMZ_ALLOC_RECLAIM)) return NULL; - /* - * Use sequential write zones if we started off with random - * zones and the list is empty - */ - if (list == &zmd->unmap_rnd_list) { - list = &zmd->unmap_seq_list; - goto again; - } /* * Fallback to the reserved sequential zones */ -- cgit v1.2.3 From 35d0c96e422a484bbc5d4921fa20dcc880bfba2c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:42 +0200 Subject: dm zoned: add debugging message for reading superblocks Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 4a2e351365c5..ef1524d5928a 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1105,6 +1105,10 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) */ static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set) { + dmz_zmd_debug(zmd, "read superblock set %d dev %s block %llu", + set, zmd->sb[set].dev->name, + zmd->sb[set].block); + return dmz_rdwr_block(zmd->sb[set].dev, REQ_OP_READ, zmd->sb[set].block, zmd->sb[set].mblk->page); } -- cgit v1.2.3 From 1565929b870fe166c5a57a85d6cb5a2bfe1e6c84 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:43 +0200 Subject: dm zoned: avoid unnecessary device recalulation for secondary superblock The secondary superblock must reside on the same device as the primary superblock, so there is no need to re-calculate the device. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index ef1524d5928a..043ed882970a 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1135,7 +1135,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) /* Bad first super block: search for the second one */ zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks; zmd->sb[1].zone = zmd->sb[0].zone + 1; - zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone); + zmd->sb[1].dev = zmd->sb[0].dev; for (i = 0; i < zmd->nr_rnd_zones - 1; i++) { if (dmz_read_sb(zmd, 1) != 0) break; @@ -1144,7 +1144,6 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) return 0; } zmd->sb[1].block += zone_nr_blocks; - zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone + i); } dmz_free_mblock(zmd, mblk); @@ -1263,7 +1262,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) if (!zmd->sb[1].zone) zmd->sb[1].zone = zmd->sb[0].zone + zmd->nr_meta_zones; zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); - zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone); + zmd->sb[1].dev = zmd->sb[0].dev; ret = dmz_get_sb(zmd, 1); } else ret = dmz_lookup_secondary_sb(zmd); -- cgit v1.2.3 From aec67b4ffa4bea4a02063d9a0f379e5795d6f5dc Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:45 +0200 Subject: dm zoned: add a 'reserved' zone flag Instead of counting the number of reserved zones in dmz_free_zone(), mark the zone as 'reserved' during allocation and simplify dmz_free_zone(). Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 043ed882970a..0982ab1758a6 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1743,6 +1743,7 @@ next: atomic_inc(&zmd->unmap_nr_rnd); } else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) { list_add_tail(&dzone->link, &zmd->reserved_seq_zones_list); + set_bit(DMZ_RESERVED, &dzone->flags); atomic_inc(&zmd->nr_reserved_seq_zones); zmd->nr_seq--; } else { @@ -2168,8 +2169,7 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone) } else if (dmz_is_rnd(zone)) { list_add_tail(&zone->link, &zmd->unmap_rnd_list); atomic_inc(&zmd->unmap_nr_rnd); - } else if (atomic_read(&zmd->nr_reserved_seq_zones) < - zmd->nr_reserved_seq) { + } else if (dmz_is_reserved(zone)) { list_add_tail(&zone->link, &zmd->reserved_seq_zones_list); atomic_inc(&zmd->nr_reserved_seq_zones); } else { -- cgit v1.2.3 From a92fbc446d1a93950b7e25bec6ad75dd26f01ba8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:46 +0200 Subject: dm zoned: convert to xarray The zones array is getting really large, and large arrays tend to wreak havoc with the CPU caches. So convert it to xarray to become more cache friendly. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Colin Ian King # fix leak in dmz_insert Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 122 ++++++++++++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 32 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 0982ab1758a6..b235283a8846 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -172,7 +172,7 @@ struct dmz_metadata { unsigned int nr_chunks; /* Zone information array */ - struct dm_zone *zones; + struct xarray zones; struct dmz_sb sb[3]; unsigned int mblk_primary; @@ -327,6 +327,32 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd) return atomic_read(&zmd->unmap_nr_seq); } +static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id) +{ + return xa_load(&zmd->zones, zone_id); +} + +static struct dm_zone *dmz_insert(struct dmz_metadata *zmd, + unsigned int zone_id) +{ + struct dm_zone *zone = kzalloc(sizeof(struct dm_zone), GFP_KERNEL); + + if (!zone) + return ERR_PTR(-ENOMEM); + + if (xa_insert(&zmd->zones, zone_id, zone, GFP_KERNEL)) { + kfree(zone); + return ERR_PTR(-EBUSY); + } + + INIT_LIST_HEAD(&zone->link); + atomic_set(&zone->refcount, 0); + zone->id = zone_id; + zone->chunk = DMZ_MAP_UNMAPPED; + + return zone; +} + const char *dmz_metadata_label(struct dmz_metadata *zmd) { return (const char *)zmd->label; @@ -1122,6 +1148,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) { unsigned int zone_nr_blocks = zmd->zone_nr_blocks; struct dmz_mblock *mblk; + unsigned int zone_id = zmd->sb[0].zone->id; int i; /* Allocate a block */ @@ -1134,16 +1161,15 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) /* Bad first super block: search for the second one */ zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks; - zmd->sb[1].zone = zmd->sb[0].zone + 1; + zmd->sb[1].zone = dmz_get(zmd, zone_id + 1); zmd->sb[1].dev = zmd->sb[0].dev; - for (i = 0; i < zmd->nr_rnd_zones - 1; i++) { + for (i = 1; i < zmd->nr_rnd_zones; i++) { if (dmz_read_sb(zmd, 1) != 0) break; - if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) { - zmd->sb[1].zone += i; + if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) return 0; - } zmd->sb[1].block += zone_nr_blocks; + zmd->sb[1].zone = dmz_get(zmd, zone_id + i); } dmz_free_mblock(zmd, mblk); @@ -1259,8 +1285,12 @@ static int dmz_load_sb(struct dmz_metadata *zmd) /* Read and check secondary super block */ if (ret == 0) { sb_good[0] = true; - if (!zmd->sb[1].zone) - zmd->sb[1].zone = zmd->sb[0].zone + zmd->nr_meta_zones; + if (!zmd->sb[1].zone) { + unsigned int zone_id = + zmd->sb[0].zone->id + zmd->nr_meta_zones; + + zmd->sb[1].zone = dmz_get(zmd, zone_id); + } zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); zmd->sb[1].dev = zmd->sb[0].dev; ret = dmz_get_sb(zmd, 1); @@ -1341,7 +1371,11 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) struct dmz_metadata *zmd = data; struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0]; int idx = num + dev->zone_offset; - struct dm_zone *zone = &zmd->zones[idx]; + struct dm_zone *zone; + + zone = dmz_insert(zmd, idx); + if (IS_ERR(zone)) + return PTR_ERR(zone); if (blkz->len != zmd->zone_nr_sectors) { if (zmd->sb_version > 1) { @@ -1353,11 +1387,6 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) return -ENXIO; } - INIT_LIST_HEAD(&zone->link); - atomic_set(&zone->refcount, 0); - zone->id = idx; - zone->chunk = DMZ_MAP_UNMAPPED; - switch (blkz->type) { case BLK_ZONE_TYPE_CONVENTIONAL: set_bit(DMZ_RND, &zone->flags); @@ -1397,18 +1426,17 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) return 0; } -static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) +static int dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) { int idx; sector_t zone_offset = 0; for(idx = 0; idx < dev->nr_zones; idx++) { - struct dm_zone *zone = &zmd->zones[idx]; + struct dm_zone *zone; - INIT_LIST_HEAD(&zone->link); - atomic_set(&zone->refcount, 0); - zone->id = idx; - zone->chunk = DMZ_MAP_UNMAPPED; + zone = dmz_insert(zmd, idx); + if (IS_ERR(zone)) + return PTR_ERR(zone); set_bit(DMZ_CACHE, &zone->flags); zone->wp_block = 0; zmd->nr_cache_zones++; @@ -1420,6 +1448,7 @@ static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) } zone_offset += zmd->zone_nr_sectors; } + return 0; } /* @@ -1427,8 +1456,15 @@ static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) */ static void dmz_drop_zones(struct dmz_metadata *zmd) { - kfree(zmd->zones); - zmd->zones = NULL; + int idx; + + for(idx = 0; idx < zmd->nr_zones; idx++) { + struct dm_zone *zone = xa_load(&zmd->zones, idx); + + kfree(zone); + xa_erase(&zmd->zones, idx); + } + xa_destroy(&zmd->zones); } /* @@ -1460,20 +1496,25 @@ static int dmz_init_zones(struct dmz_metadata *zmd) DMERR("(%s): No zones found", zmd->devname); return -ENXIO; } - zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); - if (!zmd->zones) - return -ENOMEM; + xa_init(&zmd->zones); DMDEBUG("(%s): Using %zu B for zone information", zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); if (zmd->nr_devs > 1) { - dmz_emulate_zones(zmd, &zmd->dev[0]); + ret = dmz_emulate_zones(zmd, &zmd->dev[0]); + if (ret < 0) { + DMDEBUG("(%s): Failed to emulate zones, error %d", + zmd->devname, ret); + dmz_drop_zones(zmd); + return ret; + } + /* * Primary superblock zone is always at zone 0 when multiple * drives are present. */ - zmd->sb[0].zone = &zmd->zones[0]; + zmd->sb[0].zone = dmz_get(zmd, 0); zoned_dev = &zmd->dev[1]; } @@ -1576,11 +1617,6 @@ static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, return 0; } -static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id) -{ - return &zmd->zones[zone_id]; -} - /* * Reset a zone write pointer. */ @@ -1662,6 +1698,11 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) } dzone = dmz_get(zmd, dzone_id); + if (!dzone) { + dmz_zmd_err(zmd, "Chunk %u mapping: data zone %u not present", + chunk, dzone_id); + return -EIO; + } set_bit(DMZ_DATA, &dzone->flags); dzone->chunk = chunk; dmz_get_zone_weight(zmd, dzone); @@ -1685,6 +1726,11 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) } bzone = dmz_get(zmd, bzone_id); + if (!bzone) { + dmz_zmd_err(zmd, "Chunk %u mapping: buffer zone %u not present", + chunk, bzone_id); + return -EIO; + } if (!dmz_is_rnd(bzone) && !dmz_is_cache(bzone)) { dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone %u", chunk, bzone_id); @@ -1715,6 +1761,8 @@ next: */ for (i = 0; i < zmd->nr_zones; i++) { dzone = dmz_get(zmd, i); + if (!dzone) + continue; if (dmz_is_meta(dzone)) continue; if (dmz_is_offline(dzone)) @@ -1978,6 +2026,10 @@ again: } else { /* The chunk is already mapped: get the mapping zone */ dzone = dmz_get(zmd, dzone_id); + if (!dzone) { + dzone = ERR_PTR(-EIO); + goto out; + } if (dzone->chunk != chunk) { dzone = ERR_PTR(-EIO); goto out; @@ -2794,6 +2846,12 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, /* Set metadata zones starting from sb_zone */ for (i = 0; i < zmd->nr_meta_zones << 1; i++) { zone = dmz_get(zmd, zmd->sb[0].zone->id + i); + if (!zone) { + dmz_zmd_err(zmd, + "metadata zone %u not present", i); + ret = -ENXIO; + goto err; + } if (!dmz_is_rnd(zone) && !dmz_is_cache(zone)) { dmz_zmd_err(zmd, "metadata zone %d is not random", i); -- cgit v1.2.3 From 5d2c74f3ddc010b5812e556715f7605201eff101 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:47 +0200 Subject: dm zoned: allocate temporary superblock for tertiary devices Checking the tertiary superblock just consists of validating UUIDs, crcs, and the generation number; it doesn't have contents which would be required during the actual operation. So allocate a temporary superblock when checking tertiary devices to avoid having to store it together with the 'real' superblocks. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 109 +++++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 48 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b235283a8846..525ac0d80287 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -174,7 +174,7 @@ struct dmz_metadata { /* Zone information array */ struct xarray zones; - struct dmz_sb sb[3]; + struct dmz_sb sb[2]; unsigned int mblk_primary; unsigned int sb_version; u64 sb_gen; @@ -1016,10 +1016,11 @@ err: /* * Check super block. */ -static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) +static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, + bool tertiary) { - struct dmz_super *sb = zmd->sb[set].sb; - struct dmz_dev *dev = zmd->sb[set].dev; + struct dmz_super *sb = dsb->sb; + struct dmz_dev *dev = dsb->dev; unsigned int nr_meta_zones, nr_data_zones; u32 crc, stored_crc; u64 gen; @@ -1036,7 +1037,7 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) DMZ_META_VER, zmd->sb_version); return -EINVAL; } - if ((zmd->sb_version < 1) && (set == 2)) { + if (zmd->sb_version < 2 && tertiary) { dmz_dev_err(dev, "Tertiary superblocks are not supported"); return -EINVAL; } @@ -1080,7 +1081,7 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) return -ENXIO; } - if (set == 2) { + if (tertiary) { /* * Generation number should be 0, but it doesn't * really matter if it isn't. @@ -1129,14 +1130,13 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) /* * Read the first or second super block from disk. */ -static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set) +static int dmz_read_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set) { dmz_zmd_debug(zmd, "read superblock set %d dev %s block %llu", - set, zmd->sb[set].dev->name, - zmd->sb[set].block); + set, sb->dev->name, sb->block); - return dmz_rdwr_block(zmd->sb[set].dev, REQ_OP_READ, - zmd->sb[set].block, zmd->sb[set].mblk->page); + return dmz_rdwr_block(sb->dev, REQ_OP_READ, + sb->block, sb->mblk->page); } /* @@ -1164,7 +1164,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) zmd->sb[1].zone = dmz_get(zmd, zone_id + 1); zmd->sb[1].dev = zmd->sb[0].dev; for (i = 1; i < zmd->nr_rnd_zones; i++) { - if (dmz_read_sb(zmd, 1) != 0) + if (dmz_read_sb(zmd, &zmd->sb[1], 1) != 0) break; if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) return 0; @@ -1181,9 +1181,9 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd) } /* - * Read the first or second super block from disk. + * Read a super block from disk. */ -static int dmz_get_sb(struct dmz_metadata *zmd, unsigned int set) +static int dmz_get_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set) { struct dmz_mblock *mblk; int ret; @@ -1193,14 +1193,14 @@ static int dmz_get_sb(struct dmz_metadata *zmd, unsigned int set) if (!mblk) return -ENOMEM; - zmd->sb[set].mblk = mblk; - zmd->sb[set].sb = mblk->data; + sb->mblk = mblk; + sb->sb = mblk->data; /* Read super block */ - ret = dmz_read_sb(zmd, set); + ret = dmz_read_sb(zmd, sb, set); if (ret) { dmz_free_mblock(zmd, mblk); - zmd->sb[set].mblk = NULL; + sb->mblk = NULL; return ret; } @@ -1274,13 +1274,13 @@ static int dmz_load_sb(struct dmz_metadata *zmd) /* Read and check the primary super block */ zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); zmd->sb[0].dev = dmz_zone_to_dev(zmd, zmd->sb[0].zone); - ret = dmz_get_sb(zmd, 0); + ret = dmz_get_sb(zmd, &zmd->sb[0], 0); if (ret) { dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed"); return ret; } - ret = dmz_check_sb(zmd, 0); + ret = dmz_check_sb(zmd, &zmd->sb[0], false); /* Read and check secondary super block */ if (ret == 0) { @@ -1293,7 +1293,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) } zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone); zmd->sb[1].dev = zmd->sb[0].dev; - ret = dmz_get_sb(zmd, 1); + ret = dmz_get_sb(zmd, &zmd->sb[1], 1); } else ret = dmz_lookup_secondary_sb(zmd); @@ -1302,7 +1302,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) return ret; } - ret = dmz_check_sb(zmd, 1); + ret = dmz_check_sb(zmd, &zmd->sb[1], false); if (ret == 0) sb_good[1] = true; @@ -1347,20 +1347,40 @@ static int dmz_load_sb(struct dmz_metadata *zmd) "Using super block %u (gen %llu)", zmd->mblk_primary, zmd->sb_gen); - if ((zmd->sb_version > 1) && zmd->sb[2].zone) { - zmd->sb[2].block = dmz_start_block(zmd, zmd->sb[2].zone); - zmd->sb[2].dev = dmz_zone_to_dev(zmd, zmd->sb[2].zone); - ret = dmz_get_sb(zmd, 2); - if (ret) { - dmz_dev_err(zmd->sb[2].dev, - "Read tertiary super block failed"); - return ret; + if (zmd->sb_version > 1) { + int i; + struct dmz_sb *sb; + + sb = kzalloc(sizeof(struct dmz_sb), GFP_KERNEL); + if (!sb) + return -ENOMEM; + for (i = 1; i < zmd->nr_devs; i++) { + sb->block = 0; + sb->zone = dmz_get(zmd, zmd->dev[i].zone_offset); + sb->dev = &zmd->dev[i]; + if (!dmz_is_meta(sb->zone)) { + dmz_dev_err(sb->dev, + "Tertiary super block zone %u not marked as metadata zone", + sb->zone->id); + ret = -EINVAL; + goto out_kfree; + } + ret = dmz_get_sb(zmd, sb, i + 1); + if (ret) { + dmz_dev_err(sb->dev, + "Read tertiary super block failed"); + dmz_free_mblock(zmd, sb->mblk); + goto out_kfree; + } + ret = dmz_check_sb(zmd, sb, true); + dmz_free_mblock(zmd, sb->mblk); + if (ret == -EINVAL) + goto out_kfree; } - ret = dmz_check_sb(zmd, 2); - if (ret == -EINVAL) - return ret; + out_kfree: + kfree(sb); } - return 0; + return ret; } /* @@ -1417,12 +1437,15 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) zmd->sb[0].zone = zone; } } - if (zmd->nr_devs > 1 && !zmd->sb[2].zone) { - /* Tertiary superblock zone */ - zmd->sb[2].zone = zone; + if (zmd->nr_devs > 1 && num == 0) { + /* + * Tertiary superblock zones are always at the + * start of the zoned devices, so mark them + * as metadata zone. + */ + set_bit(DMZ_META, &zone->flags); } } - return 0; } @@ -2860,16 +2883,6 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, } set_bit(DMZ_META, &zone->flags); } - if (zmd->sb[2].zone) { - zone = dmz_get(zmd, zmd->sb[2].zone->id); - if (!zone) { - dmz_zmd_err(zmd, - "Tertiary metadata zone not present"); - ret = -ENXIO; - goto err; - } - set_bit(DMZ_META, &zone->flags); - } /* Load mapping table */ ret = dmz_load_mapping(zmd); if (ret) -- cgit v1.2.3 From 8f22272af7a72763fe9f6b710cdcc380fed80f75 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:48 +0200 Subject: dm zoned: add device pointer to struct dm_zone Add a pointer, to the containing device, within struct dm_zone and kill dmz_zone_to_dev(). Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 525ac0d80287..68d44506e6f3 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -229,16 +229,10 @@ struct dmz_metadata { */ static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone) { - unsigned int zone_id; - if (WARN_ON(!zone)) return 0; - zone_id = zone->id; - if (zmd->nr_devs > 1 && - (zone_id >= zmd->dev[1].zone_offset)) - zone_id -= zmd->dev[1].zone_offset; - return zone_id; + return zone->id - zone->dev->zone_offset; } sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone) @@ -255,18 +249,6 @@ sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) return (sector_t)zone_id << zmd->zone_nr_blocks_shift; } -struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone) -{ - if (WARN_ON(!zone)) - return &zmd->dev[0]; - - if (zmd->nr_devs > 1 && - zone->id >= zmd->dev[1].zone_offset) - return &zmd->dev[1]; - - return &zmd->dev[0]; -} - unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd) { return zmd->zone_nr_blocks; @@ -333,7 +315,7 @@ static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id) } static struct dm_zone *dmz_insert(struct dmz_metadata *zmd, - unsigned int zone_id) + unsigned int zone_id, struct dmz_dev *dev) { struct dm_zone *zone = kzalloc(sizeof(struct dm_zone), GFP_KERNEL); @@ -349,6 +331,7 @@ static struct dm_zone *dmz_insert(struct dmz_metadata *zmd, atomic_set(&zone->refcount, 0); zone->id = zone_id; zone->chunk = DMZ_MAP_UNMAPPED; + zone->dev = dev; return zone; } @@ -1273,7 +1256,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd) /* Read and check the primary super block */ zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone); - zmd->sb[0].dev = dmz_zone_to_dev(zmd, zmd->sb[0].zone); + zmd->sb[0].dev = zmd->sb[0].zone->dev; ret = dmz_get_sb(zmd, &zmd->sb[0], 0); if (ret) { dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed"); @@ -1393,7 +1376,7 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) int idx = num + dev->zone_offset; struct dm_zone *zone; - zone = dmz_insert(zmd, idx); + zone = dmz_insert(zmd, idx, dev); if (IS_ERR(zone)) return PTR_ERR(zone); @@ -1457,7 +1440,7 @@ static int dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) for(idx = 0; idx < dev->nr_zones; idx++) { struct dm_zone *zone; - zone = dmz_insert(zmd, idx); + zone = dmz_insert(zmd, idx, dev); if (IS_ERR(zone)) return PTR_ERR(zone); set_bit(DMZ_CACHE, &zone->flags); @@ -1583,7 +1566,7 @@ static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx, */ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) { - struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); + struct dmz_dev *dev = zone->dev; unsigned int noio_flag; int ret; @@ -1620,7 +1603,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) static int dmz_handle_seq_write_err(struct dmz_metadata *zmd, struct dm_zone *zone) { - struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); + struct dmz_dev *dev = zone->dev; unsigned int wp = 0; int ret; @@ -1657,7 +1640,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) return 0; if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { - struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); + struct dmz_dev *dev = zone->dev; ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, dmz_start_sect(zmd, zone), @@ -2218,9 +2201,7 @@ again: goto again; } if (dmz_is_meta(zone)) { - struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone); - - dmz_dev_warn(dev, "Zone %u has metadata", zone->id); + dmz_zmd_warn(zmd, "Zone %u has metadata", zone->id); zone = NULL; goto again; } -- cgit v1.2.3 From 18979819b57ecbc598a8e27d925ab4bb9e145cf0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:49 +0200 Subject: dm zoned: add metadata pointer to struct dmz_dev Add a metadata pointer within struct dmz_dev and use it as argument for blkdev_report_zones() instead of the metadata itself. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 68d44506e6f3..71f263a78515 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1371,8 +1371,8 @@ static int dmz_load_sb(struct dmz_metadata *zmd) */ static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) { - struct dmz_metadata *zmd = data; - struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0]; + struct dmz_dev *dev = data; + struct dmz_metadata *zmd = dev->metadata; int idx = num + dev->zone_offset; struct dm_zone *zone; @@ -1495,8 +1495,12 @@ static int dmz_init_zones(struct dmz_metadata *zmd) /* Allocate zone array */ zmd->nr_zones = 0; - for (i = 0; i < zmd->nr_devs; i++) - zmd->nr_zones += zmd->dev[i].nr_zones; + for (i = 0; i < zmd->nr_devs; i++) { + struct dmz_dev *dev = &zmd->dev[i]; + + dev->metadata = zmd; + zmd->nr_zones += dev->nr_zones; + } if (!zmd->nr_zones) { DMERR("(%s): No zones found", zmd->devname); @@ -1531,7 +1535,7 @@ static int dmz_init_zones(struct dmz_metadata *zmd) * first randomly writable zone. */ ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES, - dmz_init_zone, zmd); + dmz_init_zone, zoned_dev); if (ret < 0) { DMDEBUG("(%s): Failed to report zones, error %d", zmd->devname, ret); -- cgit v1.2.3 From bd82fdabf162fec1404c4e22988b178c4f3dd23b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:51 +0200 Subject: dm zoned: move random and sequential zones into struct dmz_dev Random and sequential zones should be part of the respective device structure to make arbitration between devices possible. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 139 +++++++++++++++++++++++------------------ 1 file changed, 79 insertions(+), 60 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 71f263a78515..ce17bf3628c6 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -192,21 +192,12 @@ struct dmz_metadata { /* Zone allocation management */ struct mutex map_lock; struct dmz_mblock **map_mblk; - unsigned int nr_rnd; - atomic_t unmap_nr_rnd; - struct list_head unmap_rnd_list; - struct list_head map_rnd_list; unsigned int nr_cache; atomic_t unmap_nr_cache; struct list_head unmap_cache_list; struct list_head map_cache_list; - unsigned int nr_seq; - atomic_t unmap_nr_seq; - struct list_head unmap_seq_list; - struct list_head map_seq_list; - atomic_t nr_reserved_seq_zones; struct list_head reserved_seq_zones_list; @@ -279,14 +270,14 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd) return zmd->nr_chunks; } -unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd) +unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd, int idx) { - return zmd->nr_rnd; + return zmd->dev[idx].nr_rnd; } -unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd) +unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx) { - return atomic_read(&zmd->unmap_nr_rnd); + return atomic_read(&zmd->dev[idx].unmap_nr_rnd); } unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd) @@ -299,14 +290,14 @@ unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata *zmd) return atomic_read(&zmd->unmap_nr_cache); } -unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd) +unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx) { - return zmd->nr_seq; + return zmd->dev[idx].nr_seq; } -unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd) +unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx) { - return atomic_read(&zmd->unmap_nr_seq); + return atomic_read(&zmd->dev[idx].unmap_nr_seq); } static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id) @@ -1500,6 +1491,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd) dev->metadata = zmd; zmd->nr_zones += dev->nr_zones; + + atomic_set(&dev->unmap_nr_rnd, 0); + INIT_LIST_HEAD(&dev->unmap_rnd_list); + INIT_LIST_HEAD(&dev->map_rnd_list); + + atomic_set(&dev->unmap_nr_seq, 0); + INIT_LIST_HEAD(&dev->unmap_seq_list); + INIT_LIST_HEAD(&dev->map_seq_list); } if (!zmd->nr_zones) { @@ -1720,9 +1719,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) if (dmz_is_cache(dzone)) list_add_tail(&dzone->link, &zmd->map_cache_list); else if (dmz_is_rnd(dzone)) - list_add_tail(&dzone->link, &zmd->map_rnd_list); + list_add_tail(&dzone->link, &dzone->dev->map_rnd_list); else - list_add_tail(&dzone->link, &zmd->map_seq_list); + list_add_tail(&dzone->link, &dzone->dev->map_seq_list); /* Check buffer zone */ bzone_id = le32_to_cpu(dmap[e].bzone_id); @@ -1756,7 +1755,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd) if (dmz_is_cache(bzone)) list_add_tail(&bzone->link, &zmd->map_cache_list); else - list_add_tail(&bzone->link, &zmd->map_rnd_list); + list_add_tail(&bzone->link, &bzone->dev->map_rnd_list); next: chunk++; e++; @@ -1781,9 +1780,9 @@ next: if (dmz_is_cache(dzone)) zmd->nr_cache++; else if (dmz_is_rnd(dzone)) - zmd->nr_rnd++; + dzone->dev->nr_rnd++; else - zmd->nr_seq++; + dzone->dev->nr_seq++; if (dmz_is_data(dzone)) { /* Already initialized */ @@ -1797,16 +1796,18 @@ next: list_add_tail(&dzone->link, &zmd->unmap_cache_list); atomic_inc(&zmd->unmap_nr_cache); } else if (dmz_is_rnd(dzone)) { - list_add_tail(&dzone->link, &zmd->unmap_rnd_list); - atomic_inc(&zmd->unmap_nr_rnd); + list_add_tail(&dzone->link, + &dzone->dev->unmap_rnd_list); + atomic_inc(&dzone->dev->unmap_nr_rnd); } else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) { list_add_tail(&dzone->link, &zmd->reserved_seq_zones_list); set_bit(DMZ_RESERVED, &dzone->flags); atomic_inc(&zmd->nr_reserved_seq_zones); - zmd->nr_seq--; + dzone->dev->nr_seq--; } else { - list_add_tail(&dzone->link, &zmd->unmap_seq_list); - atomic_inc(&zmd->unmap_nr_seq); + list_add_tail(&dzone->link, + &dzone->dev->unmap_seq_list); + atomic_inc(&dzone->dev->unmap_nr_seq); } } @@ -1840,13 +1841,13 @@ static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone) list_del_init(&zone->link); if (dmz_is_seq(zone)) { /* LRU rotate sequential zone */ - list_add_tail(&zone->link, &zmd->map_seq_list); + list_add_tail(&zone->link, &zone->dev->map_seq_list); } else if (dmz_is_cache(zone)) { /* LRU rotate cache zone */ list_add_tail(&zone->link, &zmd->map_cache_list); } else { /* LRU rotate random zone */ - list_add_tail(&zone->link, &zmd->map_rnd_list); + list_add_tail(&zone->link, &zone->dev->map_rnd_list); } } @@ -1928,14 +1929,24 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, { struct dm_zone *dzone = NULL; struct dm_zone *zone; - struct list_head *zone_list = &zmd->map_rnd_list; + struct list_head *zone_list; /* If we have cache zones select from the cache zone list */ if (zmd->nr_cache) { zone_list = &zmd->map_cache_list; /* Try to relaim random zones, too, when idle */ - if (idle && list_empty(zone_list)) - zone_list = &zmd->map_rnd_list; + if (idle && list_empty(zone_list)) { + int i; + + for (i = 1; i < zmd->nr_devs; i++) { + zone_list = &zmd->dev[i].map_rnd_list; + if (!list_empty(zone_list)) + break; + } + } + } else { + /* Otherwise the random zones are on the first disk */ + zone_list = &zmd->dev[0].map_rnd_list; } list_for_each_entry(zone, zone_list, link) { @@ -1956,12 +1967,17 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) { struct dm_zone *zone; + int i; - list_for_each_entry(zone, &zmd->map_seq_list, link) { - if (!zone->bzone) - continue; - if (dmz_lock_zone_reclaim(zone)) - return zone; + for (i = 0; i < zmd->nr_devs; i++) { + struct dmz_dev *dev = &zmd->dev[i]; + + list_for_each_entry(zone, &dev->map_seq_list, link) { + if (!zone->bzone) + continue; + if (dmz_lock_zone_reclaim(zone)) + return zone; + } } return NULL; @@ -2147,7 +2163,7 @@ again: if (dmz_is_cache(bzone)) list_add_tail(&bzone->link, &zmd->map_cache_list); else - list_add_tail(&bzone->link, &zmd->map_rnd_list); + list_add_tail(&bzone->link, &bzone->dev->map_rnd_list); out: dmz_unlock_map(zmd); @@ -2162,21 +2178,27 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags) { struct list_head *list; struct dm_zone *zone; + unsigned int dev_idx = 0; +again: if (flags & DMZ_ALLOC_CACHE) list = &zmd->unmap_cache_list; else if (flags & DMZ_ALLOC_RND) - list = &zmd->unmap_rnd_list; + list = &zmd->dev[dev_idx].unmap_rnd_list; else - list = &zmd->unmap_seq_list; + list = &zmd->dev[dev_idx].unmap_seq_list; -again: if (list_empty(list)) { /* * No free zone: return NULL if this is for not reclaim. */ if (!(flags & DMZ_ALLOC_RECLAIM)) return NULL; + if (dev_idx < zmd->nr_devs) { + dev_idx++; + goto again; + } + /* * Fallback to the reserved sequential zones */ @@ -2195,9 +2217,9 @@ again: if (dmz_is_cache(zone)) atomic_dec(&zmd->unmap_nr_cache); else if (dmz_is_rnd(zone)) - atomic_dec(&zmd->unmap_nr_rnd); + atomic_dec(&zone->dev->unmap_nr_rnd); else - atomic_dec(&zmd->unmap_nr_seq); + atomic_dec(&zone->dev->unmap_nr_seq); if (dmz_is_offline(zone)) { dmz_zmd_warn(zmd, "Zone %u is offline", zone->id); @@ -2227,14 +2249,14 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone) list_add_tail(&zone->link, &zmd->unmap_cache_list); atomic_inc(&zmd->unmap_nr_cache); } else if (dmz_is_rnd(zone)) { - list_add_tail(&zone->link, &zmd->unmap_rnd_list); - atomic_inc(&zmd->unmap_nr_rnd); + list_add_tail(&zone->link, &zone->dev->unmap_rnd_list); + atomic_inc(&zone->dev->unmap_nr_rnd); } else if (dmz_is_reserved(zone)) { list_add_tail(&zone->link, &zmd->reserved_seq_zones_list); atomic_inc(&zmd->nr_reserved_seq_zones); } else { - list_add_tail(&zone->link, &zmd->unmap_seq_list); - atomic_inc(&zmd->unmap_nr_seq); + list_add_tail(&zone->link, &zone->dev->unmap_seq_list); + atomic_inc(&zone->dev->unmap_nr_seq); } wake_up_all(&zmd->free_wq); @@ -2254,9 +2276,9 @@ void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone, if (dmz_is_cache(dzone)) list_add_tail(&dzone->link, &zmd->map_cache_list); else if (dmz_is_rnd(dzone)) - list_add_tail(&dzone->link, &zmd->map_rnd_list); + list_add_tail(&dzone->link, &dzone->dev->map_rnd_list); else - list_add_tail(&dzone->link, &zmd->map_seq_list); + list_add_tail(&dzone->link, &dzone->dev->map_seq_list); } /* @@ -2824,18 +2846,11 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, INIT_LIST_HEAD(&zmd->mblk_dirty_list); mutex_init(&zmd->map_lock); - atomic_set(&zmd->unmap_nr_rnd, 0); - INIT_LIST_HEAD(&zmd->unmap_rnd_list); - INIT_LIST_HEAD(&zmd->map_rnd_list); atomic_set(&zmd->unmap_nr_cache, 0); INIT_LIST_HEAD(&zmd->unmap_cache_list); INIT_LIST_HEAD(&zmd->map_cache_list); - atomic_set(&zmd->unmap_nr_seq, 0); - INIT_LIST_HEAD(&zmd->unmap_seq_list); - INIT_LIST_HEAD(&zmd->map_seq_list); - atomic_set(&zmd->nr_reserved_seq_zones, 0); INIT_LIST_HEAD(&zmd->reserved_seq_zones_list); @@ -2904,10 +2919,14 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, zmd->nr_data_zones, zmd->nr_chunks); dmz_zmd_debug(zmd, " %u cache zones (%u unmapped)", zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache)); - dmz_zmd_debug(zmd, " %u random zones (%u unmapped)", - zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd)); - dmz_zmd_debug(zmd, " %u sequential zones (%u unmapped)", - zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq)); + for (i = 0; i < zmd->nr_devs; i++) { + dmz_zmd_debug(zmd, " %u random zones (%u unmapped)", + dmz_nr_rnd_zones(zmd, i), + dmz_nr_unmap_rnd_zones(zmd, i)); + dmz_zmd_debug(zmd, " %u sequential zones (%u unmapped)", + dmz_nr_seq_zones(zmd, i), + dmz_nr_unmap_seq_zones(zmd, i)); + } dmz_zmd_debug(zmd, " %u reserved sequential data zones", zmd->nr_reserved_seq); dmz_zmd_debug(zmd, "Format:"); -- cgit v1.2.3 From 4dba12881f882b629774796bb8655f5b1415d803 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:52 +0200 Subject: dm zoned: support arbitrary number of devices Remove the hard-coded limit of two devices and support an unlimited number of additional zoned devices. Signed-off-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index ce17bf3628c6..49bafc86aa9a 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1525,7 +1525,20 @@ static int dmz_init_zones(struct dmz_metadata *zmd) */ zmd->sb[0].zone = dmz_get(zmd, 0); - zoned_dev = &zmd->dev[1]; + for (i = 1; i < zmd->nr_devs; i++) { + zoned_dev = &zmd->dev[i]; + + ret = blkdev_report_zones(zoned_dev->bdev, 0, + BLK_ALL_ZONES, + dmz_init_zone, zoned_dev); + if (ret < 0) { + DMDEBUG("(%s): Failed to report zones, error %d", + zmd->devname, ret); + dmz_drop_zones(zmd); + return ret; + } + } + return 0; } /* -- cgit v1.2.3 From 22c1ef66c4cbb82baf81a28abedfe8ad20ad9126 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:53 +0200 Subject: dm zoned: allocate zone by device index When allocating a zone, pass in an indicator on which device the zone should be allocated; this increases performance for a multi-device setup because reclaim will now allocate zones on the device for which reclaim is running. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 49bafc86aa9a..1a6cdab3e4ef 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -2050,7 +2050,7 @@ again: goto out; /* Allocate a random zone */ - dzone = dmz_alloc_zone(zmd, alloc_flags); + dzone = dmz_alloc_zone(zmd, 0, alloc_flags); if (!dzone) { if (dmz_dev_is_dying(zmd)) { dzone = ERR_PTR(-EIO); @@ -2156,7 +2156,7 @@ again: goto out; /* Allocate a random zone */ - bzone = dmz_alloc_zone(zmd, alloc_flags); + bzone = dmz_alloc_zone(zmd, 0, alloc_flags); if (!bzone) { if (dmz_dev_is_dying(zmd)) { bzone = ERR_PTR(-EIO); @@ -2187,11 +2187,12 @@ out: * Get an unmapped (free) zone. * This must be called with the mapping lock held. */ -struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags) +struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned int dev_idx, + unsigned long flags) { struct list_head *list; struct dm_zone *zone; - unsigned int dev_idx = 0; + int i = 0; again: if (flags & DMZ_ALLOC_CACHE) @@ -2207,8 +2208,12 @@ again: */ if (!(flags & DMZ_ALLOC_RECLAIM)) return NULL; - if (dev_idx < zmd->nr_devs) { - dev_idx++; + /* + * Try to allocate from other devices + */ + if (i < zmd->nr_devs) { + dev_idx = (dev_idx + 1) % zmd->nr_devs; + i++; goto again; } -- cgit v1.2.3 From 69875d443bc3bb1b2e1f77fe3da5ad5c8c729aa2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:54 +0200 Subject: dm zoned: select reclaim zone based on device index per-device reclaim should select zones on that device only. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 50 +++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 30 deletions(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 1a6cdab3e4ef..0cb90799b8ce 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1938,7 +1938,7 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone) * Select a cache or random write zone for reclaim. */ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, - bool idle) + unsigned int idx, bool idle) { struct dm_zone *dzone = NULL; struct dm_zone *zone; @@ -1948,24 +1948,17 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, if (zmd->nr_cache) { zone_list = &zmd->map_cache_list; /* Try to relaim random zones, too, when idle */ - if (idle && list_empty(zone_list)) { - int i; - - for (i = 1; i < zmd->nr_devs; i++) { - zone_list = &zmd->dev[i].map_rnd_list; - if (!list_empty(zone_list)) - break; - } - } - } else { - /* Otherwise the random zones are on the first disk */ - zone_list = &zmd->dev[0].map_rnd_list; - } + if (idle && list_empty(zone_list)) + zone_list = &zmd->dev[idx].map_rnd_list; + } else + zone_list = &zmd->dev[idx].map_rnd_list; list_for_each_entry(zone, zone_list, link) { - if (dmz_is_buf(zone)) + if (dmz_is_buf(zone)) { dzone = zone->bzone; - else + if (dzone->dev->dev_idx != idx) + continue; + } else dzone = zone; if (dmz_lock_zone_reclaim(dzone)) return dzone; @@ -1977,20 +1970,16 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, /* * Select a buffered sequential zone for reclaim. */ -static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) +static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd, + unsigned int idx) { struct dm_zone *zone; - int i; - - for (i = 0; i < zmd->nr_devs; i++) { - struct dmz_dev *dev = &zmd->dev[i]; - list_for_each_entry(zone, &dev->map_seq_list, link) { - if (!zone->bzone) - continue; - if (dmz_lock_zone_reclaim(zone)) - return zone; - } + list_for_each_entry(zone, &zmd->dev[idx].map_seq_list, link) { + if (!zone->bzone) + continue; + if (dmz_lock_zone_reclaim(zone)) + return zone; } return NULL; @@ -1999,7 +1988,8 @@ static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd) /* * Select a zone for reclaim. */ -struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle) +struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, + unsigned int dev_idx, bool idle) { struct dm_zone *zone; @@ -2013,9 +2003,9 @@ struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle) */ dmz_lock_map(zmd); if (list_empty(&zmd->reserved_seq_zones_list)) - zone = dmz_get_seq_zone_for_reclaim(zmd); + zone = dmz_get_seq_zone_for_reclaim(zmd, dev_idx); else - zone = dmz_get_rnd_zone_for_reclaim(zmd, idle); + zone = dmz_get_rnd_zone_for_reclaim(zmd, dev_idx, idle); dmz_unlock_map(zmd); return zone; -- cgit v1.2.3 From 2094045fe5b5dda98c4ec6cb1ac7b12ba4382856 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:55 +0200 Subject: dm zoned: prefer full zones for reclaim Prefer full zones when selecting the next zone for reclaim. Signed-off-by: Hannes Reinecke Reviewed-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 0cb90799b8ce..59a34895f5a8 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1941,7 +1941,7 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, unsigned int idx, bool idle) { struct dm_zone *dzone = NULL; - struct dm_zone *zone; + struct dm_zone *zone, *last = NULL; struct list_head *zone_list; /* If we have cache zones select from the cache zone list */ @@ -1958,6 +1958,13 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd, dzone = zone->bzone; if (dzone->dev->dev_idx != idx) continue; + if (!last) { + last = dzone; + continue; + } + if (last->weight < dzone->weight) + continue; + dzone = last; } else dzone = zone; if (dmz_lock_zone_reclaim(dzone)) -- cgit v1.2.3 From 27d49ac1dd751897506ba51df7226fc0ce7ef681 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 2 Jun 2020 13:09:56 +0200 Subject: dm zoned: check superblock location When specifying several devices the superblock location must be checked to ensure the devices are specified in the correct order. Signed-off-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-metadata.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/md/dm-zoned-metadata.c') diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 59a34895f5a8..314ce31a2c43 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -997,7 +997,7 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, struct dmz_dev *dev = dsb->dev; unsigned int nr_meta_zones, nr_data_zones; u32 crc, stored_crc; - u64 gen; + u64 gen, sb_block; if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", @@ -1026,6 +1026,14 @@ static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb, return -ENXIO; } + sb_block = le64_to_cpu(sb->sb_block); + if (sb_block != (u64)dsb->zone->id << zmd->zone_nr_blocks_shift ) { + dmz_dev_err(dev, "Invalid superblock position " + "(is %llu expected %llu)", + sb_block, + (u64)dsb->zone->id << zmd->zone_nr_blocks_shift); + return -EINVAL; + } if (zmd->sb_version > 1) { uuid_t sb_uuid; -- cgit v1.2.3