summaryrefslogtreecommitdiff
path: root/drivers/md/raid5-ppl.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 12:41:48 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-07 12:41:48 -0700
commit3645e6d0dc80be4376f87acc9ee527768387c909 (patch)
tree78cc68cb09c9d24a6bcfaf842a3bca671ed53ee0 /drivers/md/raid5-ppl.c
parent15d8ffc96464f6571ecf22043c45fad659f11bdd (diff)
parente8a27f836f165c26f867ece7f31eb5c811692319 (diff)
Merge tag 'md/4.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li: "This update mainly fixes bugs: - Make raid5 ppl support several ppl from Pawel - Several raid5-cache bug fixes from Song - Bitmap fixes from Neil and Me - One raid1/10 regression fix since 4.12 from Me - Other small fixes and cleanup" * tag 'md/4.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: md/bitmap: disable bitmap_resize for file-backed bitmaps. raid5-ppl: Recovery support for multiple partial parity logs md: Runtime support for multiple ppls md/raid0: attach correct cgroup info in bio lib/raid6: align AVX512 constants to 512 bits, not bytes raid5: remove raid5_build_block md/r5cache: call mddev_lock/unlock() in r5c_journal_mode_show md: replace seq_release_private with seq_release md: notify about new spare disk in the container md/raid1/10: reset bio allocated from mempool md/raid5: release/flush io in raid5_do_work() md/bitmap: copy correct data for bitmap super
Diffstat (limited to 'drivers/md/raid5-ppl.c')
-rw-r--r--drivers/md/raid5-ppl.c171
1 files changed, 130 insertions, 41 deletions
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 1e237c40d6fa..cd026c88f7ef 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -87,6 +87,8 @@
* The current io_unit accepting new stripes is always at the end of the list.
*/
+#define PPL_SPACE_SIZE (128 * 1024)
+
struct ppl_conf {
struct mddev *mddev;
@@ -122,6 +124,10 @@ struct ppl_log {
* always at the end of io_list */
spinlock_t io_list_lock;
struct list_head io_list; /* all io_units of this log */
+
+ sector_t next_io_sector;
+ unsigned int entry_space;
+ bool use_multippl;
};
#define PPL_IO_INLINE_BVECS 32
@@ -264,13 +270,12 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
int i;
sector_t data_sector = 0;
int data_disks = 0;
- unsigned int entry_space = (log->rdev->ppl.size << 9) - PPL_HEADER_SIZE;
struct r5conf *conf = sh->raid_conf;
pr_debug("%s: stripe: %llu\n", __func__, (unsigned long long)sh->sector);
/* check if current io_unit is full */
- if (io && (io->pp_size == entry_space ||
+ if (io && (io->pp_size == log->entry_space ||
io->entries_count == PPL_HDR_MAX_ENTRIES)) {
pr_debug("%s: add io_unit blocked by seq: %llu\n",
__func__, io->seq);
@@ -451,12 +456,25 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
pplhdr->entries_count = cpu_to_le32(io->entries_count);
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
+ /* Rewind the buffer if current PPL is larger then remaining space */
+ if (log->use_multippl &&
+ log->rdev->ppl.sector + log->rdev->ppl.size - log->next_io_sector <
+ (PPL_HEADER_SIZE + io->pp_size) >> 9)
+ log->next_io_sector = log->rdev->ppl.sector;
+
+
bio->bi_end_io = ppl_log_endio;
bio->bi_opf = REQ_OP_WRITE | REQ_FUA;
bio_set_dev(bio, log->rdev->bdev);
- bio->bi_iter.bi_sector = log->rdev->ppl.sector;
+ bio->bi_iter.bi_sector = log->next_io_sector;
bio_add_page(bio, io->header_page, PAGE_SIZE, 0);
+ pr_debug("%s: log->current_io_sector: %llu\n", __func__,
+ (unsigned long long)log->next_io_sector);
+
+ if (log->use_multippl)
+ log->next_io_sector += (PPL_HEADER_SIZE + io->pp_size) >> 9;
+
list_for_each_entry(sh, &io->stripe_list, log_list) {
/* entries for full stripe writes have no partial parity */
if (test_bit(STRIPE_FULL_WRITE, &sh->state))
@@ -813,12 +831,14 @@ out:
return ret;
}
-static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr)
+static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
+ sector_t offset)
{
struct ppl_conf *ppl_conf = log->ppl_conf;
struct md_rdev *rdev = log->rdev;
struct mddev *mddev = rdev->mddev;
- sector_t ppl_sector = rdev->ppl.sector + (PPL_HEADER_SIZE >> 9);
+ sector_t ppl_sector = rdev->ppl.sector + offset +
+ (PPL_HEADER_SIZE >> 9);
struct page *page;
int i;
int ret = 0;
@@ -902,6 +922,9 @@ static int ppl_write_empty_header(struct ppl_log *log)
return -ENOMEM;
pplhdr = page_address(page);
+ /* zero out PPL space to avoid collision with old PPLs */
+ blkdev_issue_zeroout(rdev->bdev, rdev->ppl.sector,
+ log->rdev->ppl.size, GFP_NOIO, 0);
memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
@@ -922,63 +945,110 @@ static int ppl_load_distributed(struct ppl_log *log)
struct ppl_conf *ppl_conf = log->ppl_conf;
struct md_rdev *rdev = log->rdev;
struct mddev *mddev = rdev->mddev;
- struct page *page;
- struct ppl_header *pplhdr;
+ struct page *page, *page2, *tmp;
+ struct ppl_header *pplhdr = NULL, *prev_pplhdr = NULL;
u32 crc, crc_stored;
u32 signature;
- int ret = 0;
+ int ret = 0, i;
+ sector_t pplhdr_offset = 0, prev_pplhdr_offset = 0;
pr_debug("%s: disk: %d\n", __func__, rdev->raid_disk);
-
- /* read PPL header */
+ /* read PPL headers, find the recent one */
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
- PAGE_SIZE, page, REQ_OP_READ, 0, false)) {
- md_error(mddev, rdev);
- ret = -EIO;
- goto out;
+ page2 = alloc_page(GFP_KERNEL);
+ if (!page2) {
+ __free_page(page);
+ return -ENOMEM;
}
- pplhdr = page_address(page);
- /* check header validity */
- crc_stored = le32_to_cpu(pplhdr->checksum);
- pplhdr->checksum = 0;
- crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
+ /* searching ppl area for latest ppl */
+ while (pplhdr_offset < rdev->ppl.size - (PPL_HEADER_SIZE >> 9)) {
+ if (!sync_page_io(rdev,
+ rdev->ppl.sector - rdev->data_offset +
+ pplhdr_offset, PAGE_SIZE, page, REQ_OP_READ,
+ 0, false)) {
+ md_error(mddev, rdev);
+ ret = -EIO;
+ /* if not able to read - don't recover any PPL */
+ pplhdr = NULL;
+ break;
+ }
+ pplhdr = page_address(page);
+
+ /* check header validity */
+ crc_stored = le32_to_cpu(pplhdr->checksum);
+ pplhdr->checksum = 0;
+ crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
+
+ if (crc_stored != crc) {
+ pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
+ __func__, crc_stored, crc,
+ (unsigned long long)pplhdr_offset);
+ pplhdr = prev_pplhdr;
+ pplhdr_offset = prev_pplhdr_offset;
+ break;
+ }
- if (crc_stored != crc) {
- pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x\n",
- __func__, crc_stored, crc);
- ppl_conf->mismatch_count++;
- goto out;
- }
+ signature = le32_to_cpu(pplhdr->signature);
- signature = le32_to_cpu(pplhdr->signature);
+ if (mddev->external) {
+ /*
+ * For external metadata the header signature is set and
+ * validated in userspace.
+ */
+ ppl_conf->signature = signature;
+ } else if (ppl_conf->signature != signature) {
+ pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x (offset: %llu)\n",
+ __func__, signature, ppl_conf->signature,
+ (unsigned long long)pplhdr_offset);
+ pplhdr = prev_pplhdr;
+ pplhdr_offset = prev_pplhdr_offset;
+ break;
+ }
- if (mddev->external) {
- /*
- * For external metadata the header signature is set and
- * validated in userspace.
- */
- ppl_conf->signature = signature;
- } else if (ppl_conf->signature != signature) {
- pr_debug("%s: ppl header signature does not match: stored: 0x%x configured: 0x%x\n",
- __func__, signature, ppl_conf->signature);
- ppl_conf->mismatch_count++;
- goto out;
+ if (prev_pplhdr && le64_to_cpu(prev_pplhdr->generation) >
+ le64_to_cpu(pplhdr->generation)) {
+ /* previous was newest */
+ pplhdr = prev_pplhdr;
+ pplhdr_offset = prev_pplhdr_offset;
+ break;
+ }
+
+ prev_pplhdr_offset = pplhdr_offset;
+ prev_pplhdr = pplhdr;
+
+ tmp = page;
+ page = page2;
+ page2 = tmp;
+
+ /* calculate next potential ppl offset */
+ for (i = 0; i < le32_to_cpu(pplhdr->entries_count); i++)
+ pplhdr_offset +=
+ le32_to_cpu(pplhdr->entries[i].pp_size) >> 9;
+ pplhdr_offset += PPL_HEADER_SIZE >> 9;
}
+ /* no valid ppl found */
+ if (!pplhdr)
+ ppl_conf->mismatch_count++;
+ else
+ pr_debug("%s: latest PPL found at offset: %llu, with generation: %llu\n",
+ __func__, (unsigned long long)pplhdr_offset,
+ le64_to_cpu(pplhdr->generation));
+
/* attempt to recover from log if we are starting a dirty array */
- if (!mddev->pers && mddev->recovery_cp != MaxSector)
- ret = ppl_recover(log, pplhdr);
-out:
+ if (pplhdr && !mddev->pers && mddev->recovery_cp != MaxSector)
+ ret = ppl_recover(log, pplhdr, pplhdr_offset);
+
/* write empty header if we are starting the array */
if (!ret && !mddev->pers)
ret = ppl_write_empty_header(log);
__free_page(page);
+ __free_page(page2);
pr_debug("%s: return: %d mismatch_count: %d recovered_entries: %d\n",
__func__, ret, ppl_conf->mismatch_count,
@@ -1031,6 +1101,7 @@ static int ppl_load(struct ppl_conf *ppl_conf)
static void __ppl_exit_log(struct ppl_conf *ppl_conf)
{
clear_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
+ clear_bit(MD_HAS_MULTIPLE_PPLS, &ppl_conf->mddev->flags);
kfree(ppl_conf->child_logs);
@@ -1099,6 +1170,22 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
return 0;
}
+static void ppl_init_child_log(struct ppl_log *log, struct md_rdev *rdev)
+{
+ if ((rdev->ppl.size << 9) >= (PPL_SPACE_SIZE +
+ PPL_HEADER_SIZE) * 2) {
+ log->use_multippl = true;
+ set_bit(MD_HAS_MULTIPLE_PPLS,
+ &log->ppl_conf->mddev->flags);
+ log->entry_space = PPL_SPACE_SIZE;
+ } else {
+ log->use_multippl = false;
+ log->entry_space = (log->rdev->ppl.size << 9) -
+ PPL_HEADER_SIZE;
+ }
+ log->next_io_sector = rdev->ppl.sector;
+}
+
int ppl_init_log(struct r5conf *conf)
{
struct ppl_conf *ppl_conf;
@@ -1196,6 +1283,7 @@ int ppl_init_log(struct r5conf *conf)
q = bdev_get_queue(rdev->bdev);
if (test_bit(QUEUE_FLAG_WC, &q->queue_flags))
need_cache_flush = true;
+ ppl_init_child_log(log, rdev);
}
}
@@ -1261,6 +1349,7 @@ int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add)
if (!ret) {
log->rdev = rdev;
ret = ppl_write_empty_header(log);
+ ppl_init_child_log(log, rdev);
}
} else {
log->rdev = NULL;