diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-17 14:21:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-17 14:21:15 -0700 |
commit | dac94e29110cd606dec37673644caf2cf6fd1dde (patch) | |
tree | 0f545d215bd9aca6808db1c762bbe48271afa245 | |
parent | 243bfd2c052dafe9e59fb0170434bfe706439156 (diff) | |
parent | 7e1b9521f5a8356553f5e58b07952bf346632ea4 (diff) |
Merge tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper fixes from Mike Snitzer:
- a couple DM thin provisioning fixes
- a few request-based DM and DM multipath fixes for issues that were
made when merging Christoph's changes with Bart's changes for 4.12
- a DM bufio unsigned overflow fix
- a couple pure fixes for the DM cache target.
- various very small tweaks to the DM cache target that enable
considerable speed improvements in the face of continuous IO. Given
that the cache target was significantly reworked for 4.12 I see no
reason to sit on these advances until 4.13 considering the favorable
results associated with such minimalist tweaks.
* tag 'for-4.12/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm cache: handle kmalloc failure allocating background_tracker struct
dm bufio: make the parameter "retain_bytes" unsigned long
dm mpath: multipath_clone_and_map must not return -EIO
dm mpath: don't return -EIO from dm_report_EIO
dm rq: add a missing break to map_request
dm space map disk: fix some book keeping in the disk space map
dm thin metadata: call precommit before saving the roots
dm cache policy smq: don't do any writebacks unless IDLE
dm cache: simplify the IDLE vs BUSY state calculation
dm cache: track all IO to the cache rather than just the origin device's IO
dm cache policy smq: stop preemptively demoting blocks
dm cache policy smq: put newly promoted entries at the top of the multiqueue
dm cache policy smq: be more aggressive about triggering a writeback
dm cache policy smq: only demote entries in bottom half of the clean multiqueue
dm cache: fix incorrect 'idle_time' reset in IO tracker
-rw-r--r-- | drivers/md/dm-bufio.c | 16 | ||||
-rw-r--r-- | drivers/md/dm-cache-background-tracker.c | 5 | ||||
-rw-r--r-- | drivers/md/dm-cache-policy-smq.c | 31 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 27 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 19 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 1 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 4 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 15 |
8 files changed, 66 insertions, 52 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 5db11a405129..cd8139593ccd 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -218,7 +218,7 @@ static DEFINE_SPINLOCK(param_spinlock); * Buffers are freed after this timeout */ static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; -static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; +static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; static unsigned long dm_bufio_peak_allocated; static unsigned long dm_bufio_allocated_kmem_cache; @@ -1558,10 +1558,10 @@ static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp) return true; } -static unsigned get_retain_buffers(struct dm_bufio_client *c) +static unsigned long get_retain_buffers(struct dm_bufio_client *c) { - unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); - return retain_bytes / c->block_size; + unsigned long retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes); + return retain_bytes >> (c->sectors_per_block_bits + SECTOR_SHIFT); } static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, @@ -1571,7 +1571,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan, struct dm_buffer *b, *tmp; unsigned long freed = 0; unsigned long count = nr_to_scan; - unsigned retain_target = get_retain_buffers(c); + unsigned long retain_target = get_retain_buffers(c); for (l = 0; l < LIST_SIZE; l++) { list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) { @@ -1794,8 +1794,8 @@ static bool older_than(struct dm_buffer *b, unsigned long age_hz) static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) { struct dm_buffer *b, *tmp; - unsigned retain_target = get_retain_buffers(c); - unsigned count; + unsigned long retain_target = get_retain_buffers(c); + unsigned long count; LIST_HEAD(write_list); dm_bufio_lock(c); @@ -1955,7 +1955,7 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); -module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR); +module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory"); module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR); diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 9b1afdfb13f0..707233891291 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -33,6 +33,11 @@ struct background_tracker *btracker_create(unsigned max_work) { struct background_tracker *b = kmalloc(sizeof(*b), GFP_KERNEL); + if (!b) { + DMERR("couldn't create background_tracker"); + return NULL; + } + b->max_work = max_work; atomic_set(&b->pending_promotes, 0); atomic_set(&b->pending_writebacks, 0); diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 72479bd61e11..e5eb9c9b4bc8 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1120,8 +1120,6 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) * Cache entries may not be populated. So we cannot rely on the * size of the clean queue. */ - unsigned nr_clean; - if (idle) { /* * We'd like to clean everything. @@ -1129,18 +1127,16 @@ static bool clean_target_met(struct smq_policy *mq, bool idle) return q_size(&mq->dirty) == 0u; } - nr_clean = from_cblock(mq->cache_size) - q_size(&mq->dirty); - return (nr_clean + btracker_nr_writebacks_queued(mq->bg_work)) >= - percent_to_target(mq, CLEAN_TARGET); + /* + * If we're busy we don't worry about cleaning at all. + */ + return true; } -static bool free_target_met(struct smq_policy *mq, bool idle) +static bool free_target_met(struct smq_policy *mq) { unsigned nr_free; - if (!idle) - return true; - nr_free = from_cblock(mq->cache_size) - mq->cache_alloc.nr_allocated; return (nr_free + btracker_nr_demotions_queued(mq->bg_work)) >= percent_to_target(mq, FREE_TARGET); @@ -1190,9 +1186,9 @@ static void queue_demotion(struct smq_policy *mq) if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed))) return; - e = q_peek(&mq->clean, mq->clean.nr_levels, true); + e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); if (!e) { - if (!clean_target_met(mq, false)) + if (!clean_target_met(mq, true)) queue_writeback(mq); return; } @@ -1220,7 +1216,7 @@ static void queue_promotion(struct smq_policy *mq, dm_oblock_t oblock, * We always claim to be 'idle' to ensure some demotions happen * with continuous loads. */ - if (!free_target_met(mq, true)) + if (!free_target_met(mq)) queue_demotion(mq); return; } @@ -1421,14 +1417,10 @@ static int smq_get_background_work(struct dm_cache_policy *p, bool idle, spin_lock_irqsave(&mq->lock, flags); r = btracker_issue(mq->bg_work, result); if (r == -ENODATA) { - /* find some writeback work to do */ - if (mq->migrations_allowed && !free_target_met(mq, idle)) - queue_demotion(mq); - - else if (!clean_target_met(mq, idle)) + if (!clean_target_met(mq, idle)) { queue_writeback(mq); - - r = btracker_issue(mq->bg_work, result); + r = btracker_issue(mq->bg_work, result); + } } spin_unlock_irqrestore(&mq->lock, flags); @@ -1452,6 +1444,7 @@ static void __complete_background_work(struct smq_policy *mq, clear_pending(mq, e); if (success) { e->oblock = work->oblock; + e->level = NR_CACHE_LEVELS - 1; push(mq, e); // h, q, a } else { diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1db375f50a13..d682a0511381 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -94,6 +94,9 @@ static void iot_io_begin(struct io_tracker *iot, sector_t len) static void __iot_io_end(struct io_tracker *iot, sector_t len) { + if (!len) + return; + iot->in_flight -= len; if (!iot->in_flight) iot->idle_time = jiffies; @@ -474,7 +477,7 @@ struct cache { spinlock_t invalidation_lock; struct list_head invalidation_requests; - struct io_tracker origin_tracker; + struct io_tracker tracker; struct work_struct commit_ws; struct batcher committer; @@ -901,8 +904,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) static bool accountable_bio(struct cache *cache, struct bio *bio) { - return ((bio->bi_bdev == cache->origin_dev->bdev) && - bio_op(bio) != REQ_OP_DISCARD); + return bio_op(bio) != REQ_OP_DISCARD; } static void accounted_begin(struct cache *cache, struct bio *bio) @@ -912,7 +914,7 @@ static void accounted_begin(struct cache *cache, struct bio *bio) if (accountable_bio(cache, bio)) { pb->len = bio_sectors(bio); - iot_io_begin(&cache->origin_tracker, pb->len); + iot_io_begin(&cache->tracker, pb->len); } } @@ -921,7 +923,7 @@ static void accounted_complete(struct cache *cache, struct bio *bio) size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); - iot_io_end(&cache->origin_tracker, pb->len); + iot_io_end(&cache->tracker, pb->len); } static void accounted_request(struct cache *cache, struct bio *bio) @@ -1716,20 +1718,19 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock, enum busy { IDLE, - MODERATE, BUSY }; static enum busy spare_migration_bandwidth(struct cache *cache) { - bool idle = iot_idle_for(&cache->origin_tracker, HZ); + bool idle = iot_idle_for(&cache->tracker, HZ); sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) * cache->sectors_per_block; - if (current_volume <= cache->migration_threshold) - return idle ? IDLE : MODERATE; + if (idle && current_volume <= cache->migration_threshold) + return IDLE; else - return idle ? MODERATE : BUSY; + return BUSY; } static void inc_hit_counter(struct cache *cache, struct bio *bio) @@ -2045,8 +2046,6 @@ static void check_migrations(struct work_struct *ws) for (;;) { b = spare_migration_bandwidth(cache); - if (b == BUSY) - break; r = policy_get_background_work(cache->policy, b == IDLE, &op); if (r == -ENODATA) @@ -2717,7 +2716,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) batcher_init(&cache->committer, commit_op, cache, issue_op, cache, cache->wq); - iot_init(&cache->origin_tracker); + iot_init(&cache->tracker); init_rwsem(&cache->background_work_lock); prevent_background_work(cache); @@ -2941,7 +2940,7 @@ static void cache_postsuspend(struct dm_target *ti) cancel_delayed_work(&cache->waker); flush_workqueue(cache->wq); - WARN_ON(cache->origin_tracker.in_flight); + WARN_ON(cache->tracker.in_flight); /* * If it's a flush suspend there won't be any deferred bios, so this diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 926a6bcb32c8..3df056b73b66 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -447,7 +447,7 @@ failed: * it has been invoked. */ #define dm_report_EIO(m) \ -({ \ +do { \ struct mapped_device *md = dm_table_get_md((m)->ti->table); \ \ pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \ @@ -455,8 +455,7 @@ failed: test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ dm_noflush_suspending((m)->ti)); \ - -EIO; \ -}) +} while (0) /* * Map cloned requests (request-based multipath) @@ -481,7 +480,8 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, if (!pgpath) { if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_DELAY_REQUEUE; - return dm_report_EIO(m); /* Failed */ + dm_report_EIO(m); /* Failed */ + return DM_MAPIO_KILL; } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { if (pg_init_all_paths(m)) @@ -558,7 +558,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m if (!pgpath) { if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) return DM_MAPIO_REQUEUE; - return dm_report_EIO(m); + dm_report_EIO(m); + return -EIO; } mpio->pgpath = pgpath; @@ -1493,7 +1494,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, if (atomic_read(&m->nr_valid_paths) == 0 && !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { if (error == -EIO) - error = dm_report_EIO(m); + dm_report_EIO(m); /* complete with the original error */ r = DM_ENDIO_DONE; } @@ -1524,8 +1525,10 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone, fail_path(mpio->pgpath); if (atomic_read(&m->nr_valid_paths) == 0 && - !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - return dm_report_EIO(m); + !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + dm_report_EIO(m); + return -EIO; + } /* Queue for the daemon to resubmit */ dm_bio_restore(get_bio_details_from_bio(clone), clone); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 2af27026aa2e..b639fa7246ee 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -507,6 +507,7 @@ static int map_request(struct dm_rq_target_io *tio) case DM_MAPIO_KILL: /* The target wants to complete the I/O */ dm_kill_unmapped_request(rq, -EIO); + break; default: DMWARN("unimplemented target map return value: %d", r); BUG(); diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 0f0251d0d337..d31d18d9727c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -484,11 +484,11 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) if (r < 0) return r; - r = save_sm_roots(pmd); + r = dm_tm_pre_commit(pmd->tm); if (r < 0) return r; - r = dm_tm_pre_commit(pmd->tm); + r = save_sm_roots(pmd); if (r < 0) return r; diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index ebb280a14325..32adf6b4a9c7 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -142,10 +142,23 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) { + int r; + uint32_t old_count; enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - return sm_ll_dec(&smd->ll, b, &ev); + r = sm_ll_dec(&smd->ll, b, &ev); + if (!r && (ev == SM_FREE)) { + /* + * It's only free if it's also free in the last + * transaction. + */ + r = sm_ll_lookup(&smd->old_ll, b, &old_count); + if (!r && !old_count) + smd->nr_allocated_this_transaction--; + } + + return r; } static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) |