summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorTony Lindgren <tony@atomide.com>2021-01-15 12:48:43 +0200
committerTony Lindgren <tony@atomide.com>2021-01-15 12:48:43 +0200
commit715a1284d89a740b197b3bad5eb20d36a397382f (patch)
tree07ad2960ab66e56b3d1e151036262019a18e3df1 /drivers/md
parent181739822cf6f8f4e12b173913af2967a28906c0 (diff)
parent06862d789ddde8a99c1e579e934ca17c15a84755 (diff)
Merge branch 'cpuidle-fix' into fixes
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig22
-rw-r--r--drivers/md/Makefile20
-rw-r--r--drivers/md/bcache/request.c9
-rw-r--r--drivers/md/bcache/super.c40
-rw-r--r--drivers/md/bcache/sysfs.c2
-rw-r--r--drivers/md/bcache/writeback.c9
-rw-r--r--drivers/md/dm-cache-target.c7
-rw-r--r--drivers/md/dm-core.h7
-rw-r--r--drivers/md/dm-crypt.c13
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-ioctl.c1
-rw-r--r--drivers/md/dm-ps-historical-service-time.c (renamed from drivers/md/dm-historical-service-time.c)0
-rw-r--r--drivers/md/dm-ps-io-affinity.c272
-rw-r--r--drivers/md/dm-ps-queue-length.c (renamed from drivers/md/dm-queue-length.c)0
-rw-r--r--drivers/md/dm-ps-round-robin.c (renamed from drivers/md/dm-round-robin.c)0
-rw-r--r--drivers/md/dm-ps-service-time.c (renamed from drivers/md/dm-service-time.c)0
-rw-r--r--drivers/md/dm-raid.c15
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-stripe.c2
-rw-r--r--drivers/md/dm-switch.c1
-rw-r--r--drivers/md/dm-table.c9
-rw-r--r--drivers/md/dm-unstripe.c1
-rw-r--r--drivers/md/dm-verity-target.c12
-rw-r--r--drivers/md/dm-verity-verify-sig.c9
-rw-r--r--drivers/md/dm-zero.c1
-rw-r--r--drivers/md/dm.c60
-rw-r--r--drivers/md/md-cluster.c75
-rw-r--r--drivers/md/md-linear.c6
-rw-r--r--drivers/md/md.c148
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid0.c8
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c9
-rw-r--r--drivers/md/raid5.c15
34 files changed, 534 insertions, 260 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 30ba3573626c..b7e2d9666614 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -463,6 +463,15 @@ config DM_MULTIPATH_HST
If unsure, say N.
+config DM_MULTIPATH_IOA
+ tristate "I/O Path Selector based on CPU submission"
+ depends on DM_MULTIPATH
+ help
+ This path selector selects the path based on the CPU the IO is
+ executed on and the CPU to path mapping setup at path addition time.
+
+ If unsure, say N.
+
config DM_DELAY
tristate "I/O delaying target"
depends on BLK_DEV_DM
@@ -530,11 +539,22 @@ config DM_VERITY_VERIFY_ROOTHASH_SIG
bool "Verity data device root hash signature verification support"
depends on DM_VERITY
select SYSTEM_DATA_VERIFICATION
- help
+ help
Add ability for dm-verity device to be validated if the
pre-generated tree of cryptographic checksums passed has a pkcs#7
signature file that can validate the roothash of the tree.
+ By default, rely on the builtin trusted keyring.
+
+ If unsure, say N.
+
+config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING
+ bool "Verity data device root hash signature verification with secondary keyring"
+ depends on DM_VERITY_VERIFY_ROOTHASH_SIG
+ depends on SECONDARY_TRUSTED_KEYRING
+ help
+ Rely on the secondary trusted keyring to verify dm-verity signatures.
+
If unsure, say N.
config DM_VERITY_FEC
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 6d3e234dc46a..ef7ddc27685c 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -7,23 +7,28 @@ dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \
dm-rq.o
dm-multipath-y += dm-path-selector.o dm-mpath.o
+dm-historical-service-time-y += dm-ps-historical-service-time.o
+dm-io-affinity-y += dm-ps-io-affinity.o
+dm-queue-length-y += dm-ps-queue-length.o
+dm-round-robin-y += dm-ps-round-robin.o
+dm-service-time-y += dm-ps-service-time.o
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
dm-mirror-y += dm-raid1.o
-dm-log-userspace-y \
- += dm-log-userspace-base.o dm-log-userspace-transfer.o
+dm-log-userspace-y += dm-log-userspace-base.o dm-log-userspace-transfer.o
dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o
dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \
dm-cache-background-tracker.o
-dm-cache-smq-y += dm-cache-policy-smq.o
+dm-cache-smq-y += dm-cache-policy-smq.o
dm-ebs-y += dm-ebs-target.o
dm-era-y += dm-era-target.o
dm-clone-y += dm-clone-target.o dm-clone-metadata.o
dm-verity-y += dm-verity-target.o
+dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
+
md-mod-y += md.o md-bitmap.o
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
-dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
linear-y += md-linear.o
multipath-y += md-multipath.o
faulty-y += md-faulty.o
@@ -59,14 +64,15 @@ obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o
obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o
obj-$(CONFIG_DM_MULTIPATH_HST) += dm-historical-service-time.o
+obj-$(CONFIG_DM_MULTIPATH_IOA) += dm-io-affinity.o
obj-$(CONFIG_DM_SWITCH) += dm-switch.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
-obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/
+obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
-obj-$(CONFIG_DM_RAID) += dm-raid.o
-obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
+obj-$(CONFIG_DM_RAID) += dm-raid.o
+obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
obj-$(CONFIG_DM_CACHE) += dm-cache.o
obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 214326383145..85b1f2a9b72d 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -475,7 +475,7 @@ struct search {
unsigned int read_dirty_data:1;
unsigned int cache_missed:1;
- struct hd_struct *part;
+ struct block_device *part;
unsigned long start_time;
struct btree_op op;
@@ -1073,7 +1073,7 @@ struct detached_dev_io_private {
unsigned long start_time;
bio_end_io_t *bi_end_io;
void *bi_private;
- struct hd_struct *part;
+ struct block_device *part;
};
static void detached_dev_end_io(struct bio *bio)
@@ -1230,8 +1230,9 @@ static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
if (dc->io_disable)
return -EIO;
-
- return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg);
+ if (!dc->bdev->bd_disk->fops->ioctl)
+ return -ENOTTY;
+ return dc->bdev->bd_disk->fops->ioctl(dc->bdev, mode, cmd, arg);
}
void bch_cached_dev_request_init(struct cached_dev *dc)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 46a00134a36a..a4752ac410dc 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1114,9 +1114,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
- struct closure cl;
-
- closure_init_stack(&cl);
BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
BUG_ON(refcount_read(&dc->count));
@@ -1130,12 +1127,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
dc->writeback_thread = NULL;
}
- memset(&dc->sb.set_uuid, 0, 16);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
-
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
-
mutex_lock(&bch_register_lock);
calc_cached_dev_sectors(dc->disk.c);
@@ -1408,7 +1399,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
q->limits.raid_partial_stripes_expensive;
ret = bcache_device_init(&dc->disk, block_size,
- dc->bdev->bd_part->nr_sects - dc->sb.data_offset,
+ bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
dc->bdev, &bcache_cached_ops);
if (ret)
return ret;
@@ -1447,8 +1438,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto err;
err = "error creating kobject";
- if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
- "bcache"))
+ if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache"))
goto err;
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
@@ -2342,9 +2332,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto err;
}
- if (kobject_add(&ca->kobj,
- &part_to_dev(bdev->bd_part)->kobj,
- "bcache")) {
+ if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) {
err = "error calling kobject_add";
ret = -ENOMEM;
goto out;
@@ -2383,38 +2371,38 @@ kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
-static bool bch_is_open_backing(struct block_device *bdev)
+static bool bch_is_open_backing(dev_t dev)
{
struct cache_set *c, *tc;
struct cached_dev *dc, *t;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
list_for_each_entry_safe(dc, t, &c->cached_devs, list)
- if (dc->bdev == bdev)
+ if (dc->bdev->bd_dev == dev)
return true;
list_for_each_entry_safe(dc, t, &uncached_devices, list)
- if (dc->bdev == bdev)
+ if (dc->bdev->bd_dev == dev)
return true;
return false;
}
-static bool bch_is_open_cache(struct block_device *bdev)
+static bool bch_is_open_cache(dev_t dev)
{
struct cache_set *c, *tc;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
struct cache *ca = c->cache;
- if (ca->bdev == bdev)
+ if (ca->bdev->bd_dev == dev)
return true;
}
return false;
}
-static bool bch_is_open(struct block_device *bdev)
+static bool bch_is_open(dev_t dev)
{
- return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
+ return bch_is_open_cache(dev) || bch_is_open_backing(dev);
}
struct async_reg_args {
@@ -2538,15 +2526,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
sb);
if (IS_ERR(bdev)) {
if (bdev == ERR_PTR(-EBUSY)) {
- bdev = lookup_bdev(strim(path));
+ dev_t dev;
+
mutex_lock(&bch_register_lock);
- if (!IS_ERR(bdev) && bch_is_open(bdev))
+ if (lookup_bdev(strim(path), &dev) == 0 &&
+ bch_is_open(dev))
err = "device already registered";
else
err = "device busy";
mutex_unlock(&bch_register_lock);
- if (!IS_ERR(bdev))
- bdput(bdev);
if (attr == &ksysfs_register_quiet)
goto done;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 554e3afc9b68..00a520c03f41 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -404,7 +404,7 @@ STORE(__cached_dev)
if (!env)
return -ENOMEM;
add_uevent_var(env, "DRIVER=bcache");
- add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
+ add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid);
add_uevent_var(env, "CACHED_LABEL=%s", buf);
kobject_uevent_env(&disk_to_dev(dc->disk.disk)->kobj,
KOBJ_CHANGE,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3c74996978da..a129e4d2707c 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -705,6 +705,15 @@ static int bch_writeback_thread(void *arg)
* bch_cached_dev_detach().
*/
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+ memset(&dc->sb.set_uuid, 0, 16);
+ SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
+
+ bch_write_bdev_super(dc, &cl);
+ closure_sync(&cl);
+
up_write(&dc->writeback_lock);
break;
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 4bc453f5bbaa..541c45027cc8 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -2840,7 +2840,6 @@ static void cache_postsuspend(struct dm_target *ti)
static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
bool dirty, uint32_t hint, bool hint_valid)
{
- int r;
struct cache *cache = context;
if (dirty) {
@@ -2849,11 +2848,7 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
} else
clear_bit(from_cblock(cblock), cache->dirty_bitset);
- r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
- if (r)
- return r;
-
- return 0;
+ return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid);
}
/*
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index d522093cb39d..086d293c2b03 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -96,19 +96,12 @@ struct mapped_device {
*/
struct workqueue_struct *wq;
- /*
- * freeze/thaw support require holding onto a super block
- */
- struct super_block *frozen_sb;
-
/* forced geometry settings */
struct hd_geometry geometry;
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
- struct block_device *bdev;
-
struct dm_stats stats;
/* for blk-mq request-based DM support */
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 392337f16ecf..5f9f9b3a226d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1090,16 +1090,16 @@ static const struct crypt_iv_operations crypt_iv_tcw_ops = {
.post = crypt_iv_tcw_post
};
-static struct crypt_iv_operations crypt_iv_random_ops = {
+static const struct crypt_iv_operations crypt_iv_random_ops = {
.generator = crypt_iv_random_gen
};
-static struct crypt_iv_operations crypt_iv_eboiv_ops = {
+static const struct crypt_iv_operations crypt_iv_eboiv_ops = {
.ctr = crypt_iv_eboiv_ctr,
.generator = crypt_iv_eboiv_gen
};
-static struct crypt_iv_operations crypt_iv_elephant_ops = {
+static const struct crypt_iv_operations crypt_iv_elephant_ops = {
.ctr = crypt_iv_elephant_ctr,
.dtr = crypt_iv_elephant_dtr,
.init = crypt_iv_elephant_init,
@@ -3166,11 +3166,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+ cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
- cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+ cc->crypt_queue = alloc_workqueue("kcryptd-%s",
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
+ WQ_UNBOUND | WQ_SYSFS,
num_online_cpus(), devname);
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index cb85610527c2..55bcfb74f51f 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -86,7 +86,7 @@ static int __ebs_rw_bvec(struct ebs_c *ec, int rw, struct bio_vec *bv, struct bv
else
ba = dm_bufio_new(ec->bufio, block, &b);
- if (unlikely(IS_ERR(ba))) {
+ if (IS_ERR(ba)) {
/*
* Carry on with next buffer, if any, to issue all possible
* data but return error.
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index cd0478d44058..5e306bba4375 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1600,6 +1600,7 @@ static int target_message(struct file *filp, struct dm_ioctl *param, size_t para
if (!argc) {
DMWARN("Empty message received.");
+ r = -EINVAL;
goto out_argv;
}
diff --git a/drivers/md/dm-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
index 186f91e2752c..186f91e2752c 100644
--- a/drivers/md/dm-historical-service-time.c
+++ b/drivers/md/dm-ps-historical-service-time.c
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
new file mode 100644
index 000000000000..077655cd4fae
--- /dev/null
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Oracle Corporation
+ *
+ * Module Author: Mike Christie
+ */
+#include "dm-path-selector.h"
+
+#include <linux/device-mapper.h>
+#include <linux/module.h>
+
+#define DM_MSG_PREFIX "multipath io-affinity"
+
+struct path_info {
+ struct dm_path *path;
+ cpumask_var_t cpumask;
+ refcount_t refcount;
+ bool failed;
+};
+
+struct selector {
+ struct path_info **path_map;
+ cpumask_var_t path_mask;
+ atomic_t map_misses;
+};
+
+static void ioa_free_path(struct selector *s, unsigned int cpu)
+{
+ struct path_info *pi = s->path_map[cpu];
+
+ if (!pi)
+ return;
+
+ if (refcount_dec_and_test(&pi->refcount)) {
+ cpumask_clear_cpu(cpu, s->path_mask);
+ free_cpumask_var(pi->cpumask);
+ kfree(pi);
+
+ s->path_map[cpu] = NULL;
+ }
+}
+
+static int ioa_add_path(struct path_selector *ps, struct dm_path *path,
+ int argc, char **argv, char **error)
+{
+ struct selector *s = ps->context;
+ struct path_info *pi = NULL;
+ unsigned int cpu;
+ int ret;
+
+ if (argc != 1) {
+ *error = "io-affinity ps: invalid number of arguments";
+ return -EINVAL;
+ }
+
+ pi = kzalloc(sizeof(*pi), GFP_KERNEL);
+ if (!pi) {
+ *error = "io-affinity ps: Error allocating path context";
+ return -ENOMEM;
+ }
+
+ pi->path = path;
+ path->pscontext = pi;
+ refcount_set(&pi->refcount, 1);
+
+ if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) {
+ *error = "io-affinity ps: Error allocating cpumask context";
+ ret = -ENOMEM;
+ goto free_pi;
+ }
+
+ ret = cpumask_parse(argv[0], pi->cpumask);
+ if (ret) {
+ *error = "io-affinity ps: invalid cpumask";
+ ret = -EINVAL;
+ goto free_mask;
+ }
+
+ for_each_cpu(cpu, pi->cpumask) {
+ if (cpu >= nr_cpu_ids) {
+ DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u",
+ cpu, nr_cpu_ids);
+ break;
+ }
+
+ if (s->path_map[cpu]) {
+ DMWARN("CPU mapping for %u exists. Ignoring.", cpu);
+ continue;
+ }
+
+ cpumask_set_cpu(cpu, s->path_mask);
+ s->path_map[cpu] = pi;
+ refcount_inc(&pi->refcount);
+ continue;
+ }
+
+ if (refcount_dec_and_test(&pi->refcount)) {
+ *error = "io-affinity ps: No new/valid CPU mapping found";
+ ret = -EINVAL;
+ goto free_mask;
+ }
+
+ return 0;
+
+free_mask:
+ free_cpumask_var(pi->cpumask);
+free_pi:
+ kfree(pi);
+ return ret;
+}
+
+static int ioa_create(struct path_selector *ps, unsigned argc, char **argv)
+{
+ struct selector *s;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
+ GFP_KERNEL);
+ if (!s->path_map)
+ goto free_selector;
+
+ if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL))
+ goto free_map;
+
+ atomic_set(&s->map_misses, 0);
+ ps->context = s;
+ return 0;
+
+free_map:
+ kfree(s->path_map);
+free_selector:
+ kfree(s);
+ return -ENOMEM;
+}
+
+static void ioa_destroy(struct path_selector *ps)
+{
+ struct selector *s = ps->context;
+ unsigned cpu;
+
+ for_each_cpu(cpu, s->path_mask)
+ ioa_free_path(s, cpu);
+
+ free_cpumask_var(s->path_mask);
+ kfree(s->path_map);
+ kfree(s);
+
+ ps->context = NULL;
+}
+
+static int ioa_status(struct path_selector *ps, struct dm_path *path,
+ status_type_t type, char *result, unsigned int maxlen)
+{
+ struct selector *s = ps->context;
+ struct path_info *pi;
+ int sz = 0;
+
+ if (!path) {
+ DMEMIT("0 ");
+ return sz;
+ }
+
+ switch(type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%d ", atomic_read(&s->map_misses));
+ break;
+ case STATUSTYPE_TABLE:
+ pi = path->pscontext;
+ DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask));
+ break;
+ }
+
+ return sz;
+}
+
+static void ioa_fail_path(struct path_selector *ps, struct dm_path *p)
+{
+ struct path_info *pi = p->pscontext;
+
+ pi->failed = true;
+}
+
+static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p)
+{
+ struct path_info *pi = p->pscontext;
+
+ pi->failed = false;
+ return 0;
+}
+
+static struct dm_path *ioa_select_path(struct path_selector *ps,
+ size_t nr_bytes)
+{
+ unsigned int cpu, node;
+ struct selector *s = ps->context;
+ const struct cpumask *cpumask;
+ struct path_info *pi;
+ int i;
+
+ cpu = get_cpu();
+
+ pi = s->path_map[cpu];
+ if (pi && !pi->failed)
+ goto done;
+
+ /*
+ * Perf is not optimal, but we at least try the local node then just
+ * try not to fail.
+ */
+ if (!pi)
+ atomic_inc(&s->map_misses);
+
+ node = cpu_to_node(cpu);
+ cpumask = cpumask_of_node(node);
+ for_each_cpu(i, cpumask) {
+ pi = s->path_map[i];
+ if (pi && !pi->failed)
+ goto done;
+ }
+
+ for_each_cpu(i, s->path_mask) {
+ pi = s->path_map[i];
+ if (pi && !pi->failed)
+ goto done;
+ }
+ pi = NULL;
+
+done:
+ put_cpu();
+ return pi ? pi->path : NULL;
+}
+
+static struct path_selector_type ioa_ps = {
+ .name = "io-affinity",
+ .module = THIS_MODULE,
+ .table_args = 1,
+ .info_args = 1,
+ .create = ioa_create,
+ .destroy = ioa_destroy,
+ .status = ioa_status,
+ .add_path = ioa_add_path,
+ .fail_path = ioa_fail_path,
+ .reinstate_path = ioa_reinstate_path,
+ .select_path = ioa_select_path,
+};
+
+static int __init dm_ioa_init(void)
+{
+ int ret = dm_register_path_selector(&ioa_ps);
+
+ if (ret < 0)
+ DMERR("register failed %d", ret);
+ return ret;
+}
+
+static void __exit dm_ioa_exit(void)
+{
+ int ret = dm_unregister_path_selector(&ioa_ps);
+
+ if (ret < 0)
+ DMERR("unregister failed %d", ret);
+}
+
+module_init(dm_ioa_init);
+module_exit(dm_ioa_exit);
+
+MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on");
+MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-ps-queue-length.c
index 5fd018d18418..5fd018d18418 100644
--- a/drivers/md/dm-queue-length.c
+++ b/drivers/md/dm-ps-queue-length.c
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-ps-round-robin.c
index bdbb7e6e8212..bdbb7e6e8212 100644
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-ps-round-robin.c
diff --git a/drivers/md/dm-service-time.c b/drivers/md/dm-ps-service-time.c
index 9cfda665e9eb..9cfda665e9eb 100644
--- a/drivers/md/dm-service-time.c
+++ b/drivers/md/dm-ps-service-time.c
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index dc8568ab96f2..23c38777e8f6 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -700,8 +700,7 @@ static void rs_set_capacity(struct raid_set *rs)
{
struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
- set_capacity(gendisk, rs->md.array_sectors);
- revalidate_disk_size(gendisk, true);
+ set_capacity_and_notify(gendisk, rs->md.array_sectors);
}
/*
@@ -3730,14 +3729,12 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
/*
- * RAID10 personality requires bio splitting,
- * RAID0/1/4/5/6 don't and process large discard bios properly.
+ * RAID1 and RAID10 personalities require bio splitting,
+ * RAID0/4/5/6 don't and process large discard bios properly.
*/
- if (rs_is_raid10(rs)) {
- limits->discard_granularity = max(chunk_size_bytes,
- limits->discard_granularity);
- limits->max_discard_sectors = min_not_zero(rs->md.chunk_sectors,
- limits->max_discard_sectors);
+ if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+ limits->discard_granularity = chunk_size_bytes;
+ limits->max_discard_sectors = rs->md.chunk_sectors;
}
}
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 729a72ec30cc..13b4385f4d5a 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -397,7 +397,7 @@ static int map_request(struct dm_rq_target_io *tio)
}
/* The target has remapped the I/O so dispatch it */
- trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
+ trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 151d022b032d..df359d33cda8 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -496,7 +496,7 @@ static void stripe_io_hints(struct dm_target *ti,
static struct target_type stripe_target = {
.name = "striped",
.version = {1, 6, 0},
- .features = DM_TARGET_PASSES_INTEGRITY,
+ .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index bff4c7fa1cd2..262e2b0fd975 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -550,6 +550,7 @@ static int switch_iterate_devices(struct dm_target *ti,
static struct target_type switch_target = {
.name = "switch",
.version = {1, 1, 0},
+ .features = DM_TARGET_NOWAIT,
.module = THIS_MODULE,
.ctr = switch_ctr,
.dtr = switch_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 7eeb7c4169c9..188f41287f18 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -347,16 +347,9 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
dev_t dm_get_dev_t(const char *path)
{
dev_t dev;
- struct block_device *bdev;
- bdev = lookup_bdev(path);
- if (IS_ERR(bdev))
+ if (lookup_bdev(path, &dev))
dev = name_to_dev_t(path);
- else {
- dev = bdev->bd_dev;
- bdput(bdev);
- }
-
return dev;
}
EXPORT_SYMBOL_GPL(dm_get_dev_t);
diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c
index e673dacf6418..7357c1bd5863 100644
--- a/drivers/md/dm-unstripe.c
+++ b/drivers/md/dm-unstripe.c
@@ -178,6 +178,7 @@ static void unstripe_io_hints(struct dm_target *ti,
static struct target_type unstripe_target = {
.name = "unstriped",
.version = {1, 1, 0},
+ .features = DM_TARGET_NOWAIT,
.module = THIS_MODULE,
.ctr = unstripe_ctr,
.dtr = unstripe_dtr,
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index f74982dcbea0..6b8e5bdd8526 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -538,6 +538,15 @@ static int verity_verify_io(struct dm_verity_io *io)
}
/*
+ * Skip verity work in response to I/O error when system is shutting down.
+ */
+static inline bool verity_is_system_shutting_down(void)
+{
+ return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
+ || system_state == SYSTEM_RESTART;
+}
+
+/*
* End one "io" structure with a given error.
*/
static void verity_finish_io(struct dm_verity_io *io, blk_status_t status)
@@ -564,7 +573,8 @@ static void verity_end_io(struct bio *bio)
{
struct dm_verity_io *io = bio->bi_private;
- if (bio->bi_status && !verity_fec_is_enabled(io->v)) {
+ if (bio->bi_status &&
+ (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) {
verity_finish_io(io, bio->bi_status);
return;
}
diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c
index 614e43db93aa..29385dc470d5 100644
--- a/drivers/md/dm-verity-verify-sig.c
+++ b/drivers/md/dm-verity-verify-sig.c
@@ -119,8 +119,13 @@ int verity_verify_root_hash(const void *root_hash, size_t root_hash_len,
}
ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data,
- sig_len, NULL, VERIFYING_UNSPECIFIED_SIGNATURE,
- NULL, NULL);
+ sig_len,
+#ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING
+ VERIFY_USE_SECONDARY_KEYRING,
+#else
+ NULL,
+#endif
+ VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL);
return ret;
}
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index b65ca8dcfbdc..faa1dbffc8b4 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -59,6 +59,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
static struct target_type zero_target = {
.name = "zero",
.version = {1, 1, 0},
+ .features = DM_TARGET_NOWAIT,
.module = THIS_MODULE,
.ctr = zero_ctr,
.map = zero_map,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4e0cbfe3f14d..b3c3c8b4cb42 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -570,7 +570,10 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
}
}
- r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
+ if (!bdev->bd_disk->fops->ioctl)
+ r = -ENOTTY;
+ else
+ r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
out:
dm_unprepare_ioctl(md, srcu_idx);
return r;
@@ -1274,8 +1277,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
break;
case DM_MAPIO_REMAPPED:
/* the bio has been remapped so dispatch it */
- trace_block_bio_remap(clone->bi_disk->queue, clone,
- bio_dev(io->orig_bio), sector);
+ trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector);
ret = submit_bio_noacct(clone);
break;
case DM_MAPIO_KILL:
@@ -1420,18 +1422,12 @@ static int __send_empty_flush(struct clone_info *ci)
*/
bio_init(&flush_bio, NULL, 0);
flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC;
+ flush_bio.bi_disk = ci->io->md->disk;
+ bio_associate_blkg(&flush_bio);
+
ci->bio = &flush_bio;
ci->sector_count = 0;
- /*
- * Empty flush uses a statically initialized bio, as the base for
- * cloning. However, blkg association requires that a bdev is
- * associated with a gendisk, which doesn't happen until the bdev is
- * opened. So, blkg association is done at issue time of the flush
- * rather than when the device is created in alloc_dev().
- */
- bio_set_dev(ci->bio, ci->io->md->bdev);
-
BUG_ON(bio_has_data(ci->bio));
while ((ti = dm_table_get_target(ci->map, target_nr++)))
__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
@@ -1590,7 +1586,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
ci.sector_count = bio_sectors(bio);
while (ci.sector_count && !error) {
error = __split_and_process_non_flush(&ci);
- if (current->bio_list && ci.sector_count && !error) {
+ if (ci.sector_count && !error) {
/*
* Remainder must be passed to submit_bio_noacct()
* so that it gets handled *after* bios already submitted
@@ -1611,12 +1607,12 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
* (by eliminating DM's splitting and just using bio_split)
*/
part_stat_lock();
- __dm_part_stat_sub(&dm_disk(md)->part0,
+ __dm_part_stat_sub(dm_disk(md)->part0,
sectors[op_stat_group(bio_op(bio))], ci.sector_count);
part_stat_unlock();
bio_chain(b, bio);
- trace_block_split(md->queue, b, bio->bi_iter.bi_sector);
+ trace_block_split(b, bio->bi_iter.bi_sector);
ret = submit_bio_noacct(bio);
break;
}
@@ -1748,11 +1744,6 @@ static void cleanup_mapped_device(struct mapped_device *md)
cleanup_srcu_struct(&md->io_barrier);
- if (md->bdev) {
- bdput(md->bdev);
- md->bdev = NULL;
- }
-
mutex_destroy(&md->suspend_lock);
mutex_destroy(&md->type_lock);
mutex_destroy(&md->table_devices_lock);
@@ -1844,10 +1835,6 @@ static struct mapped_device *alloc_dev(int minor)
if (!md->wq)
goto bad;
- md->bdev = bdget_disk(md->disk, 0);
- if (!md->bdev)
- goto bad;
-
dm_stats_init(&md->stats);
/* Populate the mapping, nobody knows we exist yet */
@@ -1972,8 +1959,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
if (size != dm_get_size(md))
memset(&md->geometry, 0, sizeof(md->geometry));
- set_capacity(md->disk, size);
- bd_set_nr_sectors(md->bdev, size);
+ set_capacity_and_notify(md->disk, size);
dm_table_event_callback(t, event_callback, md);
@@ -2256,7 +2242,7 @@ EXPORT_SYMBOL_GPL(dm_put);
static bool md_in_flight_bios(struct mapped_device *md)
{
int cpu;
- struct hd_struct *part = &dm_disk(md)->part0;
+ struct block_device *part = dm_disk(md)->part0;
long sum = 0;
for_each_possible_cpu(cpu) {
@@ -2391,27 +2377,19 @@ static int lock_fs(struct mapped_device *md)
{
int r;
- WARN_ON(md->frozen_sb);
+ WARN_ON(test_bit(DMF_FROZEN, &md->flags));
- md->frozen_sb = freeze_bdev(md->bdev);
- if (IS_ERR(md->frozen_sb)) {
- r = PTR_ERR(md->frozen_sb);
- md->frozen_sb = NULL;
- return r;
- }
-
- set_bit(DMF_FROZEN, &md->flags);
-
- return 0;
+ r = freeze_bdev(md->disk->part0);
+ if (!r)
+ set_bit(DMF_FROZEN, &md->flags);
+ return r;
}
static void unlock_fs(struct mapped_device *md)
{
if (!test_bit(DMF_FROZEN, &md->flags))
return;
-
- thaw_bdev(md->bdev, md->frozen_sb);
- md->frozen_sb = NULL;
+ thaw_bdev(md->disk->part0);
clear_bit(DMF_FROZEN, &md->flags);
}
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 4aaf4820b6f6..7fbd41e156c9 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -581,8 +581,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
process_metadata_update(mddev, msg);
break;
case CHANGE_CAPACITY:
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
break;
case RESYNCING:
set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
@@ -664,9 +663,27 @@ out:
* Takes the lock on the TOKEN lock resource so no other
* node can communicate while the operation is underway.
*/
-static int lock_token(struct md_cluster_info *cinfo, bool mddev_locked)
+static int lock_token(struct md_cluster_info *cinfo)
{
- int error, set_bit = 0;
+ int error;
+
+ error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
+ if (error) {
+ pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
+ __func__, __LINE__, error);
+ } else {
+ /* Lock the receive sequence */
+ mutex_lock(&cinfo->recv_mutex);
+ }
+ return error;
+}
+
+/* lock_comm()
+ * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel.
+ */
+static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked)
+{
+ int rv, set_bit = 0;
struct mddev *mddev = cinfo->mddev;
/*
@@ -677,34 +694,19 @@ static int lock_token(struct md_cluster_info *cinfo, bool mddev_locked)
*/
if (mddev_locked && !test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
&cinfo->state)) {
- error = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
+ rv = test_and_set_bit_lock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD,
&cinfo->state);
- WARN_ON_ONCE(error);
+ WARN_ON_ONCE(rv);
md_wakeup_thread(mddev->thread);
set_bit = 1;
}
- error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
- if (set_bit)
- clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
- if (error)
- pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
- __func__, __LINE__, error);
-
- /* Lock the receive sequence */
- mutex_lock(&cinfo->recv_mutex);
- return error;
-}
-
-/* lock_comm()
- * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel.
- */
-static int lock_comm(struct md_cluster_info *cinfo, bool mddev_locked)
-{
wait_event(cinfo->wait,
!test_and_set_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state));
-
- return lock_token(cinfo, mddev_locked);
+ rv = lock_token(cinfo);
+ if (set_bit)
+ clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
+ return rv;
}
static void unlock_comm(struct md_cluster_info *cinfo)
@@ -784,9 +786,11 @@ static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg,
{
int ret;
- lock_comm(cinfo, mddev_locked);
- ret = __sendmsg(cinfo, cmsg);
- unlock_comm(cinfo);
+ ret = lock_comm(cinfo, mddev_locked);
+ if (!ret) {
+ ret = __sendmsg(cinfo, cmsg);
+ unlock_comm(cinfo);
+ }
return ret;
}
@@ -1061,7 +1065,7 @@ static int metadata_update_start(struct mddev *mddev)
return 0;
}
- ret = lock_token(cinfo, 1);
+ ret = lock_token(cinfo);
clear_bit_unlock(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
return ret;
}
@@ -1255,7 +1259,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors)
int raid_slot = -1;
md_update_sb(mddev, 1);
- lock_comm(cinfo, 1);
+ if (lock_comm(cinfo, 1)) {
+ pr_err("%s: lock_comm failed\n", __func__);
+ return;
+ }
memset(&cmsg, 0, sizeof(cmsg));
cmsg.type = cpu_to_le32(METADATA_UPDATED);
@@ -1296,13 +1303,10 @@ static void update_size(struct mddev *mddev, sector_t old_dev_sectors)
if (ret)
pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n",
__func__, __LINE__);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
} else {
/* revert to previous sectors */
ret = mddev->pers->resize(mddev, old_dev_sectors);
- if (!ret)
- revalidate_disk_size(mddev->gendisk, true);
ret = __sendmsg(cinfo, &cmsg);
if (ret)
pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",
@@ -1407,7 +1411,8 @@ static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
cmsg.type = cpu_to_le32(NEWDISK);
memcpy(cmsg.uuid, uuid, 16);
cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
- lock_comm(cinfo, 1);
+ if (lock_comm(cinfo, 1))
+ return -EAGAIN;
ret = __sendmsg(cinfo, &cmsg);
if (ret) {
unlock_comm(cinfo);
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 5ab22069b5be..68cac7d19278 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -200,9 +200,8 @@ static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
"copied raid_disks doesn't match mddev->raid_disks");
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
- set_capacity(mddev->gendisk, mddev->array_sectors);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
mddev_resume(mddev);
- revalidate_disk_size(mddev->gendisk, true);
kfree_rcu(oldconf, rcu);
return 0;
}
@@ -258,8 +257,7 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
bio_endio(bio);
} else {
if (mddev->gendisk)
- trace_block_bio_remap(bio->bi_disk->queue,
- bio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_writesame(mddev, bio);
mddev_check_write_zeroes(mddev, bio);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0037c6ecab65..ca409428b4fc 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -464,7 +464,7 @@ struct md_io {
bio_end_io_t *orig_bi_end_io;
void *orig_bi_private;
unsigned long start_time;
- struct hd_struct *part;
+ struct block_device *part;
};
static void md_end_io(struct bio *bio)
@@ -639,7 +639,7 @@ static void md_submit_flush_data(struct work_struct *ws)
* could wait for this and below md_handle_request could wait for those
* bios because of suspend check
*/
- mddev->last_flush = mddev->start_flush;
+ mddev->prev_flush_start = mddev->start_flush;
mddev->flush_bio = NULL;
wake_up(&mddev->sb_wait);
@@ -660,13 +660,17 @@ static void md_submit_flush_data(struct work_struct *ws)
*/
bool md_flush_request(struct mddev *mddev, struct bio *bio)
{
- ktime_t start = ktime_get_boottime();
+ ktime_t req_start = ktime_get_boottime();
spin_lock_irq(&mddev->lock);
+ /* flush requests wait until ongoing flush completes,
+ * hence coalescing all the pending requests.
+ */
wait_event_lock_irq(mddev->sb_wait,
!mddev->flush_bio ||
- ktime_after(mddev->last_flush, start),
+ ktime_before(req_start, mddev->prev_flush_start),
mddev->lock);
- if (!ktime_after(mddev->last_flush, start)) {
+ /* new request after previous flush is completed */
+ if (ktime_after(req_start, mddev->prev_flush_start)) {
WARN_ON(mddev->flush_bio);
mddev->flush_bio = bio;
bio = NULL;
@@ -2414,7 +2418,6 @@ EXPORT_SYMBOL(md_integrity_add_rdev);
static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
{
char b[BDEVNAME_SIZE];
- struct kobject *ko;
int err;
/* prevent duplicates */
@@ -2477,9 +2480,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
- ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
/* failure here is OK */
- err = sysfs_create_link(&rdev->kobj, ko, "block");
+ err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block");
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
rdev->sysfs_unack_badblocks =
sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
@@ -5355,10 +5357,9 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
if (!err) {
mddev->array_sectors = sectors;
- if (mddev->pers) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
- }
+ if (mddev->pers)
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
mddev_unlock(mddev);
return err ?: len;
@@ -5765,11 +5766,12 @@ static int md_alloc(dev_t dev, char *name)
return error;
}
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
+static void md_probe(dev_t dev)
{
+ if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512)
+ return;
if (create_on_open)
md_alloc(dev, NULL);
- return NULL;
}
static int add_named_array(const char *val, const struct kernel_param *kp)
@@ -6107,8 +6109,7 @@ int do_md_run(struct mddev *mddev)
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
clear_bit(MD_NOT_READY, &mddev->flags);
mddev->changed = 1;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
@@ -6423,10 +6424,9 @@ static int do_md_stop(struct mddev *mddev, int mode,
if (rdev->raid_disk >= 0)
sysfs_unlink_rdev(mddev, rdev);
- set_capacity(disk, 0);
+ set_capacity_and_notify(disk, 0);
mutex_unlock(&mddev->open_mutex);
mddev->changed = 1;
- revalidate_disk_size(disk, true);
if (mddev->ro)
mddev->ro = 0;
@@ -6535,7 +6535,7 @@ static void autorun_devices(int part)
break;
}
- md_probe(dev, NULL, NULL);
+ md_probe(dev);
mddev = mddev_find(dev);
if (!mddev || !mddev->gendisk) {
if (mddev)
@@ -6948,8 +6948,10 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
goto busy;
kick_rdev:
- if (mddev_is_clustered(mddev))
- md_cluster_ops->remove_disk(mddev, rdev);
+ if (mddev_is_clustered(mddev)) {
+ if (md_cluster_ops->remove_disk(mddev, rdev))
+ goto busy;
+ }
md_kick_rdev_from_array(rdev);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
@@ -7257,8 +7259,8 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)
if (mddev_is_clustered(mddev))
md_cluster_ops->update_size(mddev, old_dev_sectors);
else if (mddev->queue) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
}
return rv;
@@ -7278,6 +7280,7 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
return -EINVAL;
if (mddev->sync_thread ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
+ test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) ||
mddev->reshape_position != MaxSector)
return -EBUSY;
@@ -7480,7 +7483,6 @@ static inline bool md_ioctl_valid(unsigned int cmd)
{
switch (cmd) {
case ADD_NEW_DISK:
- case BLKROSET:
case GET_ARRAY_INFO:
case GET_BITMAP_FILE:
case GET_DISK_INFO:
@@ -7507,7 +7509,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
int err = 0;
void __user *argp = (void __user *)arg;
struct mddev *mddev = NULL;
- int ro;
bool did_set_md_closing = false;
if (!md_ioctl_valid(cmd))
@@ -7590,8 +7591,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
err = -EBUSY;
goto out;
}
- WARN_ON_ONCE(test_bit(MD_CLOSING, &mddev->flags));
- set_bit(MD_CLOSING, &mddev->flags);
+ if (test_and_set_bit(MD_CLOSING, &mddev->flags)) {
+ mutex_unlock(&mddev->open_mutex);
+ err = -EBUSY;
+ goto out;
+ }
did_set_md_closing = true;
mutex_unlock(&mddev->open_mutex);
sync_blockdev(bdev);
@@ -7687,35 +7691,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
goto unlock;
}
break;
-
- case BLKROSET:
- if (get_user(ro, (int __user *)(arg))) {
- err = -EFAULT;
- goto unlock;
- }
- err = -EINVAL;
-
- /* if the bdev is going readonly the value of mddev->ro
- * does not matter, no writes are coming
- */
- if (ro)
- goto unlock;
-
- /* are we are already prepared for writes? */
- if (mddev->ro != 1)
- goto unlock;
-
- /* transitioning to readauto need only happen for
- * arrays that call md_write_start
- */
- if (mddev->pers) {
- err = restart_array(mddev);
- if (err == 0) {
- mddev->ro = 2;
- set_disk_ro(mddev->gendisk, 0);
- }
- }
- goto unlock;
}
/*
@@ -7809,6 +7784,36 @@ static int md_compat_ioctl(struct block_device *bdev, fmode_t mode,
}
#endif /* CONFIG_COMPAT */
+static int md_set_read_only(struct block_device *bdev, bool ro)
+{
+ struct mddev *mddev = bdev->bd_disk->private_data;
+ int err;
+
+ err = mddev_lock(mddev);
+ if (err)
+ return err;
+
+ if (!mddev->raid_disks && !mddev->external) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+
+ /*
+ * Transitioning to read-auto need only happen for arrays that call
+ * md_write_start and which are not ready for writes yet.
+ */
+ if (!ro && mddev->ro == 1 && mddev->pers) {
+ err = restart_array(mddev);
+ if (err)
+ goto out_unlock;
+ mddev->ro = 2;
+ }
+
+out_unlock:
+ mddev_unlock(mddev);
+ return err;
+}
+
static int md_open(struct block_device *bdev, fmode_t mode)
{
/*
@@ -7886,6 +7891,7 @@ const struct block_device_operations md_fops =
#endif
.getgeo = md_getgeo,
.check_events = md_check_events,
+ .set_read_only = md_set_read_only,
};
static int md_thread(void *arg)
@@ -8445,7 +8451,7 @@ static int is_mddev_idle(struct mddev *mddev, int init)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev) {
struct gendisk *disk = rdev->bdev->bd_disk;
- curr_events = (int)part_stat_read_accum(&disk->part0, sectors) -
+ curr_events = (int)part_stat_read_accum(disk->part0, sectors) -
atomic_read(&disk->sync_io);
/* sync IO will cause sync_io to increase before the disk_stats
* as sync_io is counted when a request starts, and
@@ -9015,10 +9021,9 @@ void md_do_sync(struct md_thread *thread)
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
- if (!mddev_is_clustered(mddev)) {
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk_size(mddev->gendisk, true);
- }
+ if (!mddev_is_clustered(mddev))
+ set_capacity_and_notify(mddev->gendisk,
+ mddev->array_sectors);
}
spin_lock(&mddev->lock);
@@ -9547,18 +9552,15 @@ static int __init md_init(void)
if (!md_rdev_misc_wq)
goto err_rdev_misc_wq;
- if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
+ ret = __register_blkdev(MD_MAJOR, "md", md_probe);
+ if (ret < 0)
goto err_md;
- if ((ret = register_blkdev(0, "mdp")) < 0)
+ ret = __register_blkdev(0, "mdp", md_probe);
+ if (ret < 0)
goto err_mdp;
mdp_major = ret;
- blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE,
- md_probe, NULL, NULL);
- blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
- md_probe, NULL, NULL);
-
register_reboot_notifier(&md_notifier);
raid_table_header = register_sysctl_table(raid_root_table);
@@ -9642,8 +9644,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
}
}
- if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
- update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
+ if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) {
+ ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
+ if (ret)
+ pr_warn("md: updating array disks failed. %d\n", ret);
+ }
/*
* Since mddev->delta_disks has already updated in update_raid_disks,
@@ -9825,9 +9830,6 @@ static __exit void md_exit(void)
struct list_head *tmp;
int delay = 1;
- blk_unregister_region(MKDEV(MD_MAJOR,0), 512);
- blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
-
unregister_blkdev(MD_MAJOR,"md");
unregister_blkdev(mdp_major, "mdp");
unregister_reboot_notifier(&md_notifier);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index bb645bc3ba6d..34070ab30a8a 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -311,7 +311,7 @@ struct mddev {
int external; /* metadata is
* managed externally */
char metadata_type[17]; /* externally set*/
- unsigned int chunk_sectors;
+ int chunk_sectors;
time64_t ctime, utime;
int level, layout;
char clevel[16];
@@ -339,7 +339,7 @@ struct mddev {
*/
sector_t reshape_position;
int delta_disks, new_level, new_layout;
- unsigned int new_chunk_sectors;
+ int new_chunk_sectors;
int reshape_backwards;
struct md_thread *thread; /* management thread */
@@ -495,9 +495,9 @@ struct mddev {
*/
struct bio *flush_bio;
atomic_t flush_pending;
- ktime_t start_flush, last_flush; /* last_flush is when the last completed
- * flush was started.
- */
+ ktime_t start_flush, prev_flush_start; /* prev_flush_start is when the previous completed
+ * flush was started.
+ */
struct work_struct flush_work;
struct work_struct event_work; /* used by dm to report failure event */
mempool_t *serial_info_pool;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 35843df15b5e..67f157f2525d 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -508,8 +508,8 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
bio_chain(discard_bio, bio);
bio_clone_blkg_association(discard_bio, bio);
if (mddev->gendisk)
- trace_block_bio_remap(bdev_get_queue(rdev->bdev),
- discard_bio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(discard_bio,
+ disk_devt(mddev->gendisk),
bio->bi_iter.bi_sector);
submit_bio_noacct(discard_bio);
}
@@ -581,8 +581,8 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
tmp_dev->data_offset;
if (mddev->gendisk)
- trace_block_bio_remap(bio->bi_disk->queue, bio,
- disk_devt(mddev->gendisk), bio_sector);
+ trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
+ bio_sector);
mddev_check_writesame(mddev, bio);
mddev_check_write_zeroes(mddev, bio);
submit_bio_noacct(bio);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 960d854c07f8..c0347997f6ff 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1305,8 +1305,8 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r1_bio;
if (mddev->gendisk)
- trace_block_bio_remap(read_bio->bi_disk->queue, read_bio,
- disk_devt(mddev->gendisk), r1_bio->sector);
+ trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
+ r1_bio->sector);
submit_bio_noacct(read_bio);
}
@@ -1517,8 +1517,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&r1_bio->remaining);
if (mddev->gendisk)
- trace_block_bio_remap(mbio->bi_disk->queue,
- mbio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(mbio, disk_devt(mddev->gendisk),
r1_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_disk = (void *)conf->mirrors[i].rdev;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3b598a3cb462..c5d88ef6a45c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1128,7 +1128,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
struct md_rdev *err_rdev = NULL;
gfp_t gfp = GFP_NOIO;
- if (r10_bio->devs[slot].rdev) {
+ if (slot >= 0 && r10_bio->devs[slot].rdev) {
/*
* This is an error retry, but we cannot
* safely dereference the rdev in the r10_bio,
@@ -1201,8 +1201,7 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_private = r10_bio;
if (mddev->gendisk)
- trace_block_bio_remap(read_bio->bi_disk->queue,
- read_bio, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk),
r10_bio->sector);
submit_bio_noacct(read_bio);
return;
@@ -1251,8 +1250,7 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_private = r10_bio;
if (conf->mddev->gendisk)
- trace_block_bio_remap(mbio->bi_disk->queue,
- mbio, disk_devt(conf->mddev->gendisk),
+ trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk),
r10_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_disk = (void *)rdev;
@@ -1493,6 +1491,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors)
r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_iter.bi_sector;
r10_bio->state = 0;
+ r10_bio->read_slot = -1;
memset(r10_bio->devs, 0, sizeof(r10_bio->devs[0]) * conf->copies);
if (bio_data_dir(bio) == READ)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 39343479ac2a..3a90cc0e43ca 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1222,9 +1222,9 @@ again:
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
if (conf->mddev->gendisk)
- trace_block_bio_remap(bi->bi_disk->queue,
- bi, disk_devt(conf->mddev->gendisk),
- sh->dev[i].sector);
+ trace_block_bio_remap(bi,
+ disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
if (should_defer && op_is_write(op))
bio_list_add(&pending_bios, bi);
else
@@ -1272,9 +1272,9 @@ again:
if (op == REQ_OP_DISCARD)
rbi->bi_vcnt = 0;
if (conf->mddev->gendisk)
- trace_block_bio_remap(rbi->bi_disk->queue,
- rbi, disk_devt(conf->mddev->gendisk),
- sh->dev[i].sector);
+ trace_block_bio_remap(rbi,
+ disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
if (should_defer && op_is_write(op))
bio_list_add(&pending_bios, rbi);
else
@@ -5468,8 +5468,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
spin_unlock_irq(&conf->device_lock);
if (mddev->gendisk)
- trace_block_bio_remap(align_bi->bi_disk->queue,
- align_bi, disk_devt(mddev->gendisk),
+ trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk),
raid_bio->bi_iter.bi_sector);
submit_bio_noacct(align_bi);
return 1;