/* * Bad block management * * - Heavily based on MD badblocks code from Neil Brown * * Copyright (c) 2015, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ #include <linux/badblocks.h> #include <linux/seqlock.h> #include <linux/device.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/slab.h> /** * badblocks_check() - check a given range for bad sectors * @bb: the badblocks structure that holds all badblock information * @s: sector (start) at which to check for badblocks * @sectors: number of sectors to check for badblocks * @first_bad: pointer to store location of the first badblock * @bad_sectors: pointer to store number of badblocks after @first_bad * * We can record which blocks on each device are 'bad' and so just * fail those blocks, or that stripe, rather than the whole device. * Entries in the bad-block table are 64bits wide. This comprises: * Length of bad-range, in sectors: 0-511 for lengths 1-512 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) * A 'shift' can be set so that larger blocks are tracked and * consequently larger devices can be covered. * 'Acknowledged' flag - 1 bit. - the most significant bit. * * Locking of the bad-block table uses a seqlock so badblocks_check * might need to retry if it is very unlucky. * We will sometimes want to check for bad blocks in a bi_end_io function, * so we use the write_seqlock_irq variant. * * When looking for a bad block we specify a range and want to * know if any block in the range is bad. So we binary-search * to the last range that starts at-or-before the given endpoint, * (or "before the sector after the target range") * then see if it ends after the given start. * * Return: * 0: there are no known bad blocks in the range * 1: there are known bad block which are all acknowledged * -1: there are bad blocks which have not yet been acknowledged in metadata. * plus the start/length of the first bad section we overlap. */ int badblocks_check(struct badblocks *bb, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors) { int hi; int lo; u64 *p = bb->page; int rv; sector_t target = s + sectors; unsigned seq; if (bb->shift > 0) { /* round the start down, and the end up */ s >>= bb->shift; target += (1<<bb->shift) - 1; target >>= bb->shift; sectors = target - s; } /* 'target' is now the first block after the bad range */ retry: seq = read_seqbegin(&bb->lock); lo = 0; rv = 0; hi = bb->count; /* Binary search between lo and hi for 'target' * i.e. for the last range that starts before 'target' */ /* INVARIANT: ranges before 'lo' and at-or-after 'hi' * are known not to be the last range before target. * VARIANT: hi-lo is the number of possible * ranges, and decreases until it reaches 1 */ while (hi - lo > 1) { int mid = (lo + hi) / 2; sector_t a = BB_OFFSET(p[mid]); if (a < target) /* This could still be the one, earlier ranges * could not. */ lo = mid; else /* This and later ranges are definitely out. */ hi = mid; } /* 'lo' might be the last that started before target, but 'hi' isn't */ if (hi > lo) { /* need to check all range that end after 's' to see if * any are unacknowledged. */ while (lo >= 0 && BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { if (BB_OFFSET(p[lo]) < target) { /* starts before the end, and finishes after * the start, so they must overlap */ if (rv != -1 && BB_ACK(p[lo])) rv = 1; else rv = -1; *first_bad = BB_OFFSET(p[lo]); *bad_sectors = BB_LEN(p[lo]); } lo--; } } if (read_seqretry(&bb->lock, seq)) goto retry; return rv; } EXPORT_SYMBOL_GPL(badblocks_check); /** * badblocks_set() - Add a range of bad blocks to the table. * @bb: the badblocks structure that holds all badblock information * @s: first sector to mark as bad * @sectors: number of sectors to mark as bad * @acknowledged: weather to mark the bad sectors as acknowledged * * This might extend the table, or might contract it if two adjacent ranges * can be merged. We binary-search to find the 'insertion' point, then * decide how best to handle it. * * Return: * 0: success * 1: failed to set badblocks (out of space) */ int badblocks_set(struct badblocks *bb, sector_t s, int sectors, int acknowledged) { u64 *p; int lo, hi; int rv = 0; unsigned long flags; if (bb->shift < 0) /* badblocks are disabled */ return 0; if (bb->shift) { /* round the start down, and the end up */ sector_t next = s + sectors; s >>= bb->shift; next += (1<<bb->shift) - 1; next >>= bb->shift; sectors = next - s; } write_seqlock_irqsave(&bb->lock, flags); p = bb->page; lo = 0; hi = bb->count; /* Find the last range that starts at-or-before 's' */ while (hi - lo > 1) { int mid = (lo + hi) / 2; sector_t a = BB_OFFSET(p[mid]); if (a <= s) lo = mid; else hi = mid; } if (hi > lo && BB_OFFSET(p[lo]) > s) hi = lo; if (hi > lo) { /* we found a range that might merge with the start * of our new range */ sector_t a = BB_OFFSET(p[lo]); sector_t e = a + BB_LEN(p[lo]); int ack = BB_ACK(p[lo]); if (e >= s) { /* Yes, we can merge with a previous range */ if (s == a && s + sectors >= e) /* new range covers old */ ack = acknowledged; else ack = ack && acknowledged; if (e < s + sectors) e = s + sectors; if (e - a <= BB_MAX_LEN) { p[lo] = BB_MAKE(a, e-a, ack); s = e; } else { /* does not all fit in one range, * make p[lo] maximal */ if (BB_LEN(p[lo]) != BB_MAX_LEN) p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); s = a + BB_MAX_LEN; } sectors = e - s; } } if (sectors && hi < bb->count) { /* 'hi' points to the first range that starts after 's'. * Maybe we can merge with the start of that range */ sector_t a = BB_OFFSET(p[hi]); sector_t e = a + BB_LEN(p[hi]); int ack = BB_ACK(p[hi]); if (a <= s + sectors) { /* merging is possible */ if (e <= s + sectors) { /* full overlap */ e = s + sectors; ack = acknowledged; } else ack = ack && acknowledged; a = s; if (e - a <= BB_MAX_LEN) { p[hi] = BB_MAKE(a, e-a, ack); s = e; } else { p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); s = a + BB_MAX_LEN; } sectors = e - s; lo = hi; hi++; } } if (sectors == 0 && hi < bb->count) { /* we might be able to combine lo and hi */ /* Note: 's' is at the end of 'lo' */ sector_t a = BB_OFFSET(p[hi]); int lolen = BB_LEN(p[lo]); int hilen = BB_LEN(p[hi]); int newlen = lolen + hilen - (s - a); if (s >= a && newlen < BB_MAX_LEN) { /* yes, we can combine them */ int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); memmove(p + hi, p + hi + 1, (bb->count - hi - 1) * 8); bb->count--; } } while (sectors) { /* didn't merge (it all). * Need to add a range just before 'hi' */ if (bb->count >= MAX_BADBLOCKS) { /* No room for more */ rv = 1; break; } else { int this_sectors = sectors; memmove(p + hi + 1, p + hi, (bb->count - hi) * 8); bb->count++; if (this_sectors > BB_MAX_LEN) this_sectors = BB_MAX_LEN; p[hi] = BB_MAKE(s, this_sectors, acknowledged); sectors -= this_sectors; s += this_sectors; } } bb->changed = 1; if (!acknowledged) bb->unacked_exist = 1; write_sequnlock_irqrestore(&bb->lock, flags); return rv; } EXPORT_SYMBOL_GPL(badblocks_set); /** * badblocks_clear() - Remove a range of bad blocks to the table. * @bb: the badblocks structure that holds all badblock information * @s: first sector to mark as bad * @sectors: number of sectors to mark as bad * * This may involve extending the table if we spilt a region, * but it must not fail. So if the table becomes full, we just * drop the remove request. * * Return: * 0: success * 1: failed to clear badblocks */ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) { u64 *p; int lo, hi; sector_t target = s + sectors; int rv = 0; if (bb->shift > 0) { /* When clearing we round the start up and the end down. * This should not matter as the shift should align with * the block size and no rounding should ever be needed. * However it is better the think a block is bad when it * isn't than to think a block is not bad when it is. */ s += (1<<bb->shift) - 1; s >>= bb->shift; target >>= bb->shift; sectors = target - s; } write_seqlock_irq(&bb->lock); p = bb->page; lo = 0; hi = bb->count; /* Find the last range that starts before 'target' */ while (hi - lo > 1) { int mid = (lo + hi) / 2; sector_t a = BB_OFFSET(p[mid]); if (a < target) lo = mid; else hi = mid; } if (hi > lo) { /* p[lo] is the last range that could overlap the * current range. Earlier ranges could also overlap, * but only this one can overlap the end of the range. */ if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) { /* Partial overlap, leave the tail of this range */ int ack = BB_ACK(p[lo]); sector_t a = BB_OFFSET(p[lo]); sector_t end = a + BB_LEN(p[lo]); if (a < s) { /* we need to split this range */ if (bb->count >= MAX_BADBLOCKS) { rv = -ENOSPC; goto out; } memmove(p+lo+1, p+lo, (bb->count - lo) * 8); bb->count++; p[lo] = BB_MAKE(a, s-a, ack); lo++; } p[lo] = BB_MAKE(target, end - target, ack); /* there is no longer an overlap */ hi = lo; lo--; } while (lo >= 0 && BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { /* This range does overlap */ if (BB_OFFSET(p[lo]) < s) { /* Keep the early parts of this range. */ int ack = BB_ACK(p[lo]); sector_t start = BB_OFFSET(p[lo]); p[lo] = BB_MAKE(start, s - start, ack); /* now low doesn't overlap, so.. */ break; } lo--; } /* 'lo' is strictly before, 'hi' is strictly after, * anything between needs to be discarded */ if (hi - lo > 1) { memmove(p+lo+1, p+hi, (bb->count - hi) * 8); bb->count -= (hi - lo - 1); } } bb->changed = 1; out: write_sequnlock_irq(&bb->lock); return rv; } EXPORT_SYMBOL_GPL(badblocks_clear); /** * ack_all_badblocks() - Acknowledge all bad blocks in a list. * @bb: the badblocks structure that holds all badblock information * * This only succeeds if ->changed is clear. It is used by * in-kernel metadata updates */ void ack_all_badblocks(struct badblocks *bb) { if (bb->page == NULL || bb->changed) /* no point even trying */ return; write_seqlock_irq(&bb->lock); if (bb->changed == 0 && bb->unacked_exist) { u64 *p = bb->page; int i; for (i = 0; i < bb->count ; i++) { if (!BB_ACK(p[i])) { sector_t start = BB_OFFSET(p[i]); int len = BB_LEN(p[i]); p[i] = BB_MAKE(start, len, 1); } } bb->unacked_exist = 0; } write_sequnlock_irq(&bb->lock); } EXPORT_SYMBOL_GPL(ack_all_badblocks); /** * badblocks_show() - sysfs access to bad-blocks list * @bb: the badblocks structure that holds all badblock information * @page: buffer received from sysfs * @unack: weather to show unacknowledged badblocks * * Return: * Length of returned data */ ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) { size_t len; int i; u64 *p = bb->page; unsigned seq; if (bb->shift < 0) return 0; retry: seq = read_seqbegin(&bb->lock); len = 0; i = 0; while (len < PAGE_SIZE && i < bb->count) { sector_t s = BB_OFFSET(p[i]); unsigned int length = BB_LEN(p[i]); int ack = BB_ACK(p[i]); i++; if (unack && ack) continue; len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", (unsigned long long)s << bb->shift, length << bb->shift); } if (unack && len == 0) bb->unacked_exist = 0; if (read_seqretry(&bb->lock, seq)) goto retry; return len; } EXPORT_SYMBOL_GPL(badblocks_show); /** * badblocks_store() - sysfs access to bad-blocks list * @bb: the badblocks structure that holds all badblock information * @page: buffer received from sysfs * @len: length of data received from sysfs * @unack: weather to show unacknowledged badblocks * * Return: * Length of the buffer processed or -ve error. */ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack) { unsigned long long sector; int length; char newline; switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { case 3: if (newline != '\n') return -EINVAL; case 2: if (length <= 0) return -EINVAL; break; default: return -EINVAL; } if (badblocks_set(bb, sector, length, !unack)) return -ENOSPC; else return len; } EXPORT_SYMBOL_GPL(badblocks_store); static int __badblocks_init(struct device *dev, struct badblocks *bb, int enable) { bb->dev = dev; bb->count = 0; if (enable) bb->shift = 0; else bb->shift = -1; if (dev) bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); else bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!bb->page) { bb->shift = -1; return -ENOMEM; } seqlock_init(&bb->lock); return 0; } /** * badblocks_init() - initialize the badblocks structure * @bb: the badblocks structure that holds all badblock information * @enable: weather to enable badblocks accounting * * Return: * 0: success * -ve errno: on error */ int badblocks_init(struct badblocks *bb, int enable) { return __badblocks_init(NULL, bb, enable); } EXPORT_SYMBOL_GPL(badblocks_init); int devm_init_badblocks(struct device *dev, struct badblocks *bb) { if (!bb) return -EINVAL; return __badblocks_init(dev, bb, 1); } EXPORT_SYMBOL_GPL(devm_init_badblocks); /** * badblocks_exit() - free the badblocks structure * @bb: the badblocks structure that holds all badblock information */ void badblocks_exit(struct badblocks *bb) { if (!bb) return; if (bb->dev) devm_kfree(bb->dev, bb->page); else kfree(bb->page); bb->page = NULL; } EXPORT_SYMBOL_GPL(badblocks_exit);