diff options
Diffstat (limited to 'drivers/md/dm-thin.c')
-rw-r--r-- | drivers/md/dm-thin.c | 194 |
1 files changed, 123 insertions, 71 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 88f2f802d528..ee29037ffc2e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -640,7 +640,9 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR_LIMIT("dm_thin_insert_block() failed"); + DMERR_LIMIT("%s: dm_thin_insert_block() failed: error = %d", + dm_device_name(pool->pool_md), r); + set_pool_mode(pool, PM_READ_ONLY); cell_error(pool, m->cell); goto out; } @@ -881,31 +883,23 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, } } -static int commit(struct pool *pool) -{ - int r; - - r = dm_pool_commit_metadata(pool->pmd); - if (r) - DMERR_LIMIT("commit failed: error = %d", r); - - return r; -} - /* * A non-zero return indicates read_only or fail_io mode. * Many callers don't care about the return value. */ -static int commit_or_fallback(struct pool *pool) +static int commit(struct pool *pool) { int r; if (get_pool_mode(pool) != PM_WRITE) return -EINVAL; - r = commit(pool); - if (r) + r = dm_pool_commit_metadata(pool->pmd); + if (r) { + DMERR_LIMIT("%s: dm_pool_commit_metadata failed: error = %d", + dm_device_name(pool->pool_md), r); set_pool_mode(pool, PM_READ_ONLY); + } return r; } @@ -917,6 +911,13 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) unsigned long flags; struct pool *pool = tc->pool; + /* + * Once no_free_space is set we must not allow allocation to succeed. + * Otherwise it is difficult to explain, debug, test and support. + */ + if (pool->no_free_space) + return -ENOSPC; + r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); if (r) return r; @@ -931,37 +932,46 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) } if (!free_blocks) { - if (pool->no_free_space) - return -ENOSPC; - else { - /* - * Try to commit to see if that will free up some - * more space. - */ - (void) commit_or_fallback(pool); + /* + * Try to commit to see if that will free up some + * more space. + */ + r = commit(pool); + if (r) + return r; - r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); - if (r) - return r; + r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); + if (r) + return r; - /* - * If we still have no space we set a flag to avoid - * doing all this checking and return -ENOSPC. - */ - if (!free_blocks) { - DMWARN("%s: no free space available.", - dm_device_name(pool->pool_md)); - spin_lock_irqsave(&pool->lock, flags); - pool->no_free_space = 1; - spin_unlock_irqrestore(&pool->lock, flags); - return -ENOSPC; - } + /* + * If we still have no space we set a flag to avoid + * doing all this checking and return -ENOSPC. This + * flag serves as a latch that disallows allocations from + * this pool until the admin takes action (e.g. resize or + * table reload). + */ + if (!free_blocks) { + DMWARN("%s: no free data space available.", + dm_device_name(pool->pool_md)); + spin_lock_irqsave(&pool->lock, flags); + pool->no_free_space = 1; + spin_unlock_irqrestore(&pool->lock, flags); + return -ENOSPC; } } r = dm_pool_alloc_data_block(pool->pmd, result); - if (r) + if (r) { + if (r == -ENOSPC && + !dm_pool_get_free_metadata_block_count(pool->pmd, &free_blocks) && + !free_blocks) { + DMWARN("%s: no free metadata space available.", + dm_device_name(pool->pool_md)); + set_pool_mode(pool, PM_READ_ONLY); + } return r; + } return 0; } @@ -1085,6 +1095,7 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, { int r; dm_block_t data_block; + struct pool *pool = tc->pool; r = alloc_data_block(tc, &data_block); switch (r) { @@ -1094,13 +1105,14 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; case -ENOSPC: - no_space(tc->pool, cell); + no_space(pool, cell); break; default: DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", __func__, r); - cell_error(tc->pool, cell); + set_pool_mode(pool, PM_READ_ONLY); + cell_error(pool, cell); break; } } @@ -1340,7 +1352,7 @@ static void process_deferred_bios(struct pool *pool) if (bio_list_empty(&bios) && !need_commit_due_to_time(pool)) return; - if (commit_or_fallback(pool)) { + if (commit(pool)) { while ((bio = bio_list_pop(&bios))) bio_io_error(bio); return; @@ -1386,7 +1398,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) switch (mode) { case PM_FAIL: - DMERR("switching pool to failure mode"); + DMERR("%s: switching pool to failure mode", + dm_device_name(pool->pool_md)); + dm_pool_metadata_read_only(pool->pmd); pool->process_bio = process_bio_fail; pool->process_discard = process_bio_fail; pool->process_prepared_mapping = process_prepared_mapping_fail; @@ -1394,10 +1408,12 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) break; case PM_READ_ONLY: - DMERR("switching pool to read-only mode"); + DMERR("%s: switching pool to read-only mode", + dm_device_name(pool->pool_md)); r = dm_pool_abort_metadata(pool->pmd); if (r) { - DMERR("aborting transaction failed"); + DMERR("%s: aborting transaction failed", + dm_device_name(pool->pool_md)); set_pool_mode(pool, PM_FAIL); } else { dm_pool_metadata_read_only(pool->pmd); @@ -1409,6 +1425,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) break; case PM_WRITE: + dm_pool_metadata_read_write(pool->pmd); pool->process_bio = process_bio; pool->process_discard = process_discard; pool->process_prepared_mapping = process_prepared_mapping; @@ -1625,12 +1642,19 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) struct pool_c *pt = ti->private; /* - * We want to make sure that degraded pools are never upgraded. + * We want to make sure that a pool in PM_FAIL mode is never upgraded. */ enum pool_mode old_mode = pool->pf.mode; enum pool_mode new_mode = pt->adjusted_pf.mode; - if (old_mode > new_mode) + /* + * If we were in PM_FAIL mode, rollback of metadata failed. We're + * not going to recover without a thin_repair. So we never let the + * pool move out of the old mode. On the other hand a PM_READ_ONLY + * may have been due to a lack of metadata or data space, and may + * now work (ie. if the underlying devices have been resized). + */ + if (old_mode == PM_FAIL) new_mode = old_mode; pool->ti = ti; @@ -2083,6 +2107,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * them down to the data device. The thin device's discard * processing will cause mappings to be removed from the btree. */ + ti->discard_zeroes_data_unsupported = true; if (pf.discard_enabled && pf.discard_passdown) { ti->num_discard_bios = 1; @@ -2092,7 +2117,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * thin devices' discard limits consistent). */ ti->discards_supported = true; - ti->discard_zeroes_data_unsupported = true; } ti->private = pt; @@ -2156,19 +2180,22 @@ static int maybe_resize_data_dev(struct dm_target *ti, bool *need_commit) r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); if (r) { - DMERR("failed to retrieve data device size"); + DMERR("%s: failed to retrieve data device size", + dm_device_name(pool->pool_md)); return r; } if (data_size < sb_data_size) { - DMERR("pool target (%llu blocks) too small: expected %llu", + DMERR("%s: pool target (%llu blocks) too small: expected %llu", + dm_device_name(pool->pool_md), (unsigned long long)data_size, sb_data_size); return -EINVAL; } else if (data_size > sb_data_size) { r = dm_pool_resize_data_dev(pool->pmd, data_size); if (r) { - DMERR("failed to resize data device"); + DMERR("%s: failed to resize data device", + dm_device_name(pool->pool_md)); set_pool_mode(pool, PM_READ_ONLY); return r; } @@ -2192,19 +2219,22 @@ static int maybe_resize_metadata_dev(struct dm_target *ti, bool *need_commit) r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size); if (r) { - DMERR("failed to retrieve data device size"); + DMERR("%s: failed to retrieve metadata device size", + dm_device_name(pool->pool_md)); return r; } if (metadata_dev_size < sb_metadata_dev_size) { - DMERR("metadata device (%llu blocks) too small: expected %llu", + DMERR("%s: metadata device (%llu blocks) too small: expected %llu", + dm_device_name(pool->pool_md), metadata_dev_size, sb_metadata_dev_size); return -EINVAL; } else if (metadata_dev_size > sb_metadata_dev_size) { r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); if (r) { - DMERR("failed to resize metadata device"); + DMERR("%s: failed to resize metadata device", + dm_device_name(pool->pool_md)); return r; } @@ -2248,7 +2278,7 @@ static int pool_preresume(struct dm_target *ti) return r; if (need_commit1 || need_commit2) - (void) commit_or_fallback(pool); + (void) commit(pool); return 0; } @@ -2275,7 +2305,7 @@ static void pool_postsuspend(struct dm_target *ti) cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); - (void) commit_or_fallback(pool); + (void) commit(pool); } static int check_arg_count(unsigned argc, unsigned args_required) @@ -2409,7 +2439,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; - (void) commit_or_fallback(pool); + (void) commit(pool); r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) @@ -2471,7 +2501,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) DMWARN("Unrecognised thin pool target message received: %s", argv[0]); if (!r) - (void) commit_or_fallback(pool); + (void) commit(pool); return r; } @@ -2526,41 +2556,47 @@ static void pool_status(struct dm_target *ti, status_type_t type, /* Commit to ensure statistics aren't out-of-date */ if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) - (void) commit_or_fallback(pool); + (void) commit(pool); r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) { - DMERR("dm_pool_get_metadata_transaction_id returned %d", r); + DMERR("%s: dm_pool_get_metadata_transaction_id returned %d", + dm_device_name(pool->pool_md), r); goto err; } r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata); if (r) { - DMERR("dm_pool_get_free_metadata_block_count returned %d", r); + DMERR("%s: dm_pool_get_free_metadata_block_count returned %d", + dm_device_name(pool->pool_md), r); goto err; } r = dm_pool_get_metadata_dev_size(pool->pmd, &nr_blocks_metadata); if (r) { - DMERR("dm_pool_get_metadata_dev_size returned %d", r); + DMERR("%s: dm_pool_get_metadata_dev_size returned %d", + dm_device_name(pool->pool_md), r); goto err; } r = dm_pool_get_free_block_count(pool->pmd, &nr_free_blocks_data); if (r) { - DMERR("dm_pool_get_free_block_count returned %d", r); + DMERR("%s: dm_pool_get_free_block_count returned %d", + dm_device_name(pool->pool_md), r); goto err; } r = dm_pool_get_data_dev_size(pool->pmd, &nr_blocks_data); if (r) { - DMERR("dm_pool_get_data_dev_size returned %d", r); + DMERR("%s: dm_pool_get_data_dev_size returned %d", + dm_device_name(pool->pool_md), r); goto err; } r = dm_pool_get_metadata_snap(pool->pmd, &held_root); if (r) { - DMERR("dm_pool_get_metadata_snap returned %d", r); + DMERR("%s: dm_pool_get_metadata_snap returned %d", + dm_device_name(pool->pool_md), r); goto err; } @@ -2648,17 +2684,33 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct pool_c *pt = ti->private; struct pool *pool = pt->pool; + uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT; - blk_limits_io_min(limits, 0); - blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); + /* + * If the system-determined stacked limits are compatible with the + * pool's blocksize (io_opt is a factor) do not override them. + */ + if (io_opt_sectors < pool->sectors_per_block || + do_div(io_opt_sectors, pool->sectors_per_block)) { + blk_limits_io_min(limits, 0); + blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); + } /* * pt->adjusted_pf is a staging area for the actual features to use. * They get transferred to the live pool in bind_control_target() * called from pool_preresume(). */ - if (!pt->adjusted_pf.discard_enabled) + if (!pt->adjusted_pf.discard_enabled) { + /* + * Must explicitly disallow stacking discard limits otherwise the + * block layer will stack them if pool's data device has support. + * QUEUE_FLAG_DISCARD wouldn't be set but there is no way for the + * user to see that, so make sure to set all discard limits to 0. + */ + limits->discard_granularity = 0; return; + } disable_passdown_if_not_supported(pt); @@ -2669,7 +2721,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 8, 0}, + .version = {1, 9, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2794,10 +2846,10 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ + ti->discard_zeroes_data_unsupported = true; if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; ti->num_discard_bios = 1; - ti->discard_zeroes_data_unsupported = true; /* Discard bios must be split on a block boundary */ ti->split_discard_bios = true; } @@ -2956,7 +3008,7 @@ static int thin_iterate_devices(struct dm_target *ti, static struct target_type thin_target = { .name = "thin", - .version = {1, 8, 0}, + .version = {1, 9, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, |