summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 14:12:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 14:12:17 -0800
commit80201fe175cbf7f3e372f53eba0a881a702ad926 (patch)
tree8026c68d52763614268a9c3c80759ad386bd5967 /drivers/md
parent4221b807d1f73c03d22543416d303b60a5d1ef31 (diff)
parentaaeee62c841cc1e48231e1d60c304d2da9c4e41c (diff)
Merge tag 'for-5.1/block-20190302' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "Not a huge amount of changes in this round, the biggest one is that we finally have Mings multi-page bvec support merged. Apart from that, this pull request contains: - Small series that avoids quiescing the queue for sysfs changes that match what we currently have (Aleksei) - Series of bcache fixes (via Coly) - Series of lightnvm fixes (via Mathias) - NVMe pull request from Christoph. Nothing major, just SPDX/license cleanups, RR mp policy (Hannes), and little fixes (Bart, Chaitanya). - BFQ series (Paolo) - Save blk-mq cpu -> hw queue mapping, removing a pointer indirection for the fast path (Jianchao) - fops->iopoll() added for async IO polling, this is a feature that the upcoming io_uring interface will use (Christoph, me) - Partition scan loop fixes (Dongli) - mtip32xx conversion from managed resource API (Christoph) - cdrom registration race fix (Guenter) - MD pull from Song, two minor fixes. - Various documentation fixes (Marcos) - Multi-page bvec feature. This brings a lot of nice improvements with it, like more efficient splitting, larger IOs can be supported without growing the bvec table size, and so on. (Ming) - Various little fixes to core and drivers" * tag 'for-5.1/block-20190302' of git://git.kernel.dk/linux-block: (117 commits) block: fix updating bio's front segment size block: Replace function name in string with __func__ nbd: propagate genlmsg_reply return code floppy: remove set but not used variable 'q' null_blk: fix checking for REQ_FUA block: fix NULL pointer dereference in register_disk fs: fix guard_bio_eod to check for real EOD errors blk-mq: use HCTX_TYPE_DEFAULT but not 0 to index blk_mq_tag_set->map block: optimize bvec iteration in bvec_iter_advance block: introduce mp_bvec_for_each_page() for iterating over page block: optimize blk_bio_segment_split for single-page bvec block: optimize __blk_segment_map_sg() for single-page bvec block: introduce bvec_nth_page() iomap: wire up the iopoll method block: add bio_set_polled() helper block: wire up block device iopoll method fs: add an iopoll method to struct file_operations loop: set GENHD_FL_NO_PART_SCAN after blkdev_reread_part() loop: do not print warn message if partition scan is successful block: bounce: make sure that bvec table is updated ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/btree.c3
-rw-r--r--drivers/md/bcache/extents.c13
-rw-r--r--drivers/md/bcache/request.c7
-rw-r--r--drivers/md/bcache/stats.c2
-rw-r--r--drivers/md/bcache/super.c30
-rw-r--r--drivers/md/bcache/sysfs.c81
-rw-r--r--drivers/md/bcache/sysfs.h23
-rw-r--r--drivers/md/bcache/util.c6
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-crypt.c3
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-table.c13
-rw-r--r--drivers/md/md-linear.c3
-rw-r--r--drivers/md/raid1.c9
14 files changed, 124 insertions, 74 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 23cb1dc7296b..64def336f053 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -432,8 +432,9 @@ static void do_btree_node_write(struct btree *b)
int j;
struct bio_vec *bv;
void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bv, b->bio, j)
+ bio_for_each_segment_all(bv, b->bio, j, iter_all)
memcpy(page_address(bv->bv_page),
base + j * PAGE_SIZE, PAGE_SIZE);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 956004366699..886710043025 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -538,6 +538,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
unsigned int i, stale;
+ char buf[80];
if (!KEY_PTRS(k) ||
bch_extent_invalid(bk, k))
@@ -547,19 +548,19 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
if (!ptr_available(b->c, k, i))
return true;
- if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
- return false;
-
for (i = 0; i < KEY_PTRS(k); i++) {
stale = ptr_stale(b->c, k, i);
+ if (stale && KEY_DIRTY(k)) {
+ bch_extent_to_text(buf, sizeof(buf), k);
+ pr_info("stale dirty pointer, stale %u, key: %s",
+ stale, buf);
+ }
+
btree_bug_on(stale > BUCKET_GC_GEN_MAX, b,
"key too stale: %i, need_gc %u",
stale, b->c->need_gc);
- btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
- b, "stale dirty pointer");
-
if (stale)
return true;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 15070412a32e..f101bfe8657a 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -392,10 +392,11 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
/*
* Flag for bypass if the IO is for read-ahead or background,
- * unless the read-ahead request is for metadata (eg, for gfs2).
+ * unless the read-ahead request is for metadata
+ * (eg, for gfs2 or xfs).
*/
if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
- !(bio->bi_opf & REQ_PRIO))
+ !(bio->bi_opf & (REQ_META|REQ_PRIO)))
goto skip;
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
@@ -877,7 +878,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
}
if (!(bio->bi_opf & REQ_RAHEAD) &&
- !(bio->bi_opf & REQ_PRIO) &&
+ !(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
s->iop.c->gc_stats.in_use < CUTOFF_CACHE_READA)
reada = min_t(sector_t, dc->readahead >> 9,
get_capacity(bio->bi_disk) - bio_end_sector(bio));
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 894410f3f829..ba1c93791d8d 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -111,7 +111,7 @@ void bch_cache_accounting_clear(struct cache_accounting *acc)
{
memset(&acc->total.cache_hits,
0,
- sizeof(unsigned long) * 7);
+ sizeof(struct cache_stats));
}
void bch_cache_accounting_destroy(struct cache_accounting *acc)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 4dee119c3664..a697a3a923cd 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1615,21 +1615,21 @@ static void conditional_stop_bcache_device(struct cache_set *c,
*/
pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
d->disk->disk_name);
- /*
- * There might be a small time gap that cache set is
- * released but bcache device is not. Inside this time
- * gap, regular I/O requests will directly go into
- * backing device as no cache set attached to. This
- * behavior may also introduce potential inconsistence
- * data in writeback mode while cache is dirty.
- * Therefore before calling bcache_device_stop() due
- * to a broken cache device, dc->io_disable should be
- * explicitly set to true.
- */
- dc->io_disable = true;
- /* make others know io_disable is true earlier */
- smp_mb();
- bcache_device_stop(d);
+ /*
+ * There might be a small time gap that cache set is
+ * released but bcache device is not. Inside this time
+ * gap, regular I/O requests will directly go into
+ * backing device as no cache set attached to. This
+ * behavior may also introduce potential inconsistence
+ * data in writeback mode while cache is dirty.
+ * Therefore before calling bcache_device_stop() due
+ * to a broken cache device, dc->io_disable should be
+ * explicitly set to true.
+ */
+ dc->io_disable = true;
+ /* make others know io_disable is true earlier */
+ smp_mb();
+ bcache_device_stop(d);
} else {
/*
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 557a8a3270a1..17bae9c14ca0 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -67,6 +67,8 @@ read_attribute(written);
read_attribute(btree_written);
read_attribute(metadata_written);
read_attribute(active_journal_entries);
+read_attribute(backing_dev_name);
+read_attribute(backing_dev_uuid);
sysfs_time_stats_attribute(btree_gc, sec, ms);
sysfs_time_stats_attribute(btree_split, sec, us);
@@ -243,6 +245,19 @@ SHOW(__bch_cached_dev)
return strlen(buf);
}
+ if (attr == &sysfs_backing_dev_name) {
+ snprintf(buf, BDEVNAME_SIZE + 1, "%s", dc->backing_dev_name);
+ strcat(buf, "\n");
+ return strlen(buf);
+ }
+
+ if (attr == &sysfs_backing_dev_uuid) {
+ /* convert binary uuid into 36-byte string plus '\0' */
+ snprintf(buf, 36+1, "%pU", dc->sb.uuid);
+ strcat(buf, "\n");
+ return strlen(buf);
+ }
+
#undef var
return 0;
}
@@ -262,10 +277,10 @@ STORE(__cached_dev)
sysfs_strtoul(data_csum, dc->disk.data_csum);
d_strtoul(verify);
- d_strtoul(bypass_torture_test);
- d_strtoul(writeback_metadata);
- d_strtoul(writeback_running);
- d_strtoul(writeback_delay);
+ sysfs_strtoul_bool(bypass_torture_test, dc->bypass_torture_test);
+ sysfs_strtoul_bool(writeback_metadata, dc->writeback_metadata);
+ sysfs_strtoul_bool(writeback_running, dc->writeback_running);
+ sysfs_strtoul_clamp(writeback_delay, dc->writeback_delay, 0, UINT_MAX);
sysfs_strtoul_clamp(writeback_percent, dc->writeback_percent,
0, bch_cutoff_writeback);
@@ -287,9 +302,15 @@ STORE(__cached_dev)
sysfs_strtoul_clamp(writeback_rate_update_seconds,
dc->writeback_rate_update_seconds,
1, WRITEBACK_RATE_UPDATE_SECS_MAX);
- d_strtoul(writeback_rate_i_term_inverse);
- d_strtoul_nonzero(writeback_rate_p_term_inverse);
- d_strtoul_nonzero(writeback_rate_minimum);
+ sysfs_strtoul_clamp(writeback_rate_i_term_inverse,
+ dc->writeback_rate_i_term_inverse,
+ 1, UINT_MAX);
+ sysfs_strtoul_clamp(writeback_rate_p_term_inverse,
+ dc->writeback_rate_p_term_inverse,
+ 1, UINT_MAX);
+ sysfs_strtoul_clamp(writeback_rate_minimum,
+ dc->writeback_rate_minimum,
+ 1, UINT_MAX);
sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
@@ -299,7 +320,9 @@ STORE(__cached_dev)
dc->io_disable = v ? 1 : 0;
}
- d_strtoi_h(sequential_cutoff);
+ sysfs_strtoul_clamp(sequential_cutoff,
+ dc->sequential_cutoff,
+ 0, UINT_MAX);
d_strtoi_h(readahead);
if (attr == &sysfs_clear_stats)
@@ -452,6 +475,8 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_verify,
&sysfs_bypass_torture_test,
#endif
+ &sysfs_backing_dev_name,
+ &sysfs_backing_dev_uuid,
NULL
};
KTYPE(bch_cached_dev);
@@ -761,10 +786,12 @@ STORE(__bch_cache_set)
c->shrink.scan_objects(&c->shrink, &sc);
}
- sysfs_strtoul(congested_read_threshold_us,
- c->congested_read_threshold_us);
- sysfs_strtoul(congested_write_threshold_us,
- c->congested_write_threshold_us);
+ sysfs_strtoul_clamp(congested_read_threshold_us,
+ c->congested_read_threshold_us,
+ 0, UINT_MAX);
+ sysfs_strtoul_clamp(congested_write_threshold_us,
+ c->congested_write_threshold_us,
+ 0, UINT_MAX);
if (attr == &sysfs_errors) {
v = __sysfs_match_string(error_actions, -1, buf);
@@ -774,12 +801,20 @@ STORE(__bch_cache_set)
c->on_error = v;
}
- if (attr == &sysfs_io_error_limit)
- c->error_limit = strtoul_or_return(buf);
+ sysfs_strtoul_clamp(io_error_limit, c->error_limit, 0, UINT_MAX);
/* See count_io_errors() for why 88 */
- if (attr == &sysfs_io_error_halflife)
- c->error_decay = strtoul_or_return(buf) / 88;
+ if (attr == &sysfs_io_error_halflife) {
+ unsigned long v = 0;
+ ssize_t ret;
+
+ ret = strtoul_safe_clamp(buf, v, 0, UINT_MAX);
+ if (!ret) {
+ c->error_decay = v / 88;
+ return size;
+ }
+ return ret;
+ }
if (attr == &sysfs_io_disable) {
v = strtoul_or_return(buf);
@@ -794,13 +829,15 @@ STORE(__bch_cache_set)
}
}
- sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
- sysfs_strtoul(verify, c->verify);
- sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
+ sysfs_strtoul_clamp(journal_delay_ms,
+ c->journal_delay_ms,
+ 0, USHRT_MAX);
+ sysfs_strtoul_bool(verify, c->verify);
+ sysfs_strtoul_bool(key_merging_disabled, c->key_merging_disabled);
sysfs_strtoul(expensive_debug_checks, c->expensive_debug_checks);
- sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite);
- sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled);
- sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled);
+ sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite);
+ sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
+ sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled);
/*
* write gc_after_writeback here may overwrite an already set
* BCH_DO_AUTO_GC, it doesn't matter because this flag will be
diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h
index 3fe82425859c..215df32f567b 100644
--- a/drivers/md/bcache/sysfs.h
+++ b/drivers/md/bcache/sysfs.h
@@ -79,11 +79,28 @@ do { \
return strtoul_safe(buf, var) ?: (ssize_t) size; \
} while (0)
+#define sysfs_strtoul_bool(file, var) \
+do { \
+ if (attr == &sysfs_ ## file) { \
+ unsigned long v = strtoul_or_return(buf); \
+ \
+ var = v ? 1 : 0; \
+ return size; \
+ } \
+} while (0)
+
#define sysfs_strtoul_clamp(file, var, min, max) \
do { \
- if (attr == &sysfs_ ## file) \
- return strtoul_safe_clamp(buf, var, min, max) \
- ?: (ssize_t) size; \
+ if (attr == &sysfs_ ## file) { \
+ unsigned long v = 0; \
+ ssize_t ret; \
+ ret = strtoul_safe_clamp(buf, v, min, max); \
+ if (!ret) { \
+ var = v; \
+ return size; \
+ } \
+ return ret; \
+ } \
} while (0)
#define strtoul_or_return(cp) \
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 20eddeac1531..62fb917f7a4f 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -270,7 +270,11 @@ int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp_mask)
int i;
struct bio_vec *bv;
- bio_for_each_segment_all(bv, bio, i) {
+ /*
+ * This is called on freshly new bio, so it is safe to access the
+ * bvec table directly.
+ */
+ for (i = 0, bv = bio->bi_io_vec; i < bio->bi_vcnt; bv++, i++) {
bv->bv_page = alloc_page(gfp_mask);
if (!bv->bv_page) {
while (--bv >= bio->bi_io_vec)
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index 6a743d3bb338..4e4c6810dc3c 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -71,6 +71,9 @@ static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
in_use > bch_cutoff_writeback_sync)
return false;
+ if (bio_op(bio) == REQ_OP_DISCARD)
+ return false;
+
if (dc->partial_stripes_expensive &&
bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
bio_sectors(bio)))
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index dd538e6b2748..dd6565798778 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1447,8 +1447,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
{
unsigned int i;
struct bio_vec *bv;
+ struct bvec_iter_all iter_all;
- bio_for_each_segment_all(bv, clone, i) {
+ bio_for_each_segment_all(bv, clone, i, iter_all) {
BUG_ON(!bv->bv_page);
mempool_free(bv->bv_page, &cc->page_pool);
}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a20531e5f3b4..b2c03c079a8d 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -527,7 +527,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
md->tag_set->ops = &dm_mq_ops;
md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
md->tag_set->numa_node = md->numa_node_id;
- md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE;
md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
md->tag_set->driver_data = md;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4b1be754cc41..ba9481f1bf3c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1698,14 +1698,6 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
return q && !blk_queue_add_random(q);
}
-static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
-{
- struct request_queue *q = bdev_get_queue(dev->bdev);
-
- return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
-}
-
static bool dm_table_all_devices_attribute(struct dm_table *t,
iterate_devices_callout_fn func)
{
@@ -1902,11 +1894,6 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (!dm_table_supports_write_zeroes(t))
q->limits.max_write_zeroes_sectors = 0;
- if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
- blk_queue_flag_clear(QUEUE_FLAG_NO_SG_MERGE, q);
- else
- blk_queue_flag_set(QUEUE_FLAG_NO_SG_MERGE, q);
-
dm_table_verify_integrity(t);
/*
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index d45c697c0ebe..5998d78aa189 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -96,8 +96,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
int i, cnt;
bool discard_supported = false;
- conf = kzalloc (sizeof (*conf) + raid_disks*sizeof(struct dev_info),
- GFP_KERNEL);
+ conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
if (!conf)
return NULL;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index fa47249fa3e4..fdf451aac369 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1603,11 +1603,9 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
return;
}
set_bit(Blocked, &rdev->flags);
- if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags))
mddev->degraded++;
- set_bit(Faulty, &rdev->flags);
- } else
- set_bit(Faulty, &rdev->flags);
+ set_bit(Faulty, &rdev->flags);
spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
@@ -2120,13 +2118,14 @@ static void process_checks(struct r1bio *r1_bio)
struct page **spages = get_resync_pages(sbio)->pages;
struct bio_vec *bi;
int page_len[RESYNC_PAGES] = { 0 };
+ struct bvec_iter_all iter_all;
if (sbio->bi_end_io != end_sync_read)
continue;
/* Now we can 'fixup' the error value */
sbio->bi_status = 0;
- bio_for_each_segment_all(bi, sbio, j)
+ bio_for_each_segment_all(bi, sbio, j, iter_all)
page_len[j] = bi->bv_len;
if (!status) {