linux/block/badblocks.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Bad block management
   4 *
   5 * - Heavily based on MD badblocks code from Neil Brown
   6 *
   7 * Copyright (c) 2015, Intel Corporation.
   8 */
   9
  10#include <linux/badblocks.h>
  11#include <linux/seqlock.h>
  12#include <linux/device.h>
  13#include <linux/kernel.h>
  14#include <linux/module.h>
  15#include <linux/stddef.h>
  16#include <linux/types.h>
  17#include <linux/slab.h>
  18
  19/**
  20 * badblocks_check() - check a given range for bad sectors
  21 * @bb:         the badblocks structure that holds all badblock information
  22 * @s:          sector (start) at which to check for badblocks
  23 * @sectors:    number of sectors to check for badblocks
  24 * @first_bad:  pointer to store location of the first badblock
  25 * @bad_sectors: pointer to store number of badblocks after @first_bad
  26 *
  27 * We can record which blocks on each device are 'bad' and so just
  28 * fail those blocks, or that stripe, rather than the whole device.
  29 * Entries in the bad-block table are 64bits wide.  This comprises:
  30 * Length of bad-range, in sectors: 0-511 for lengths 1-512
  31 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  32 *  A 'shift' can be set so that larger blocks are tracked and
  33 *  consequently larger devices can be covered.
  34 * 'Acknowledged' flag - 1 bit. - the most significant bit.
  35 *
  36 * Locking of the bad-block table uses a seqlock so badblocks_check
  37 * might need to retry if it is very unlucky.
  38 * We will sometimes want to check for bad blocks in a bi_end_io function,
  39 * so we use the write_seqlock_irq variant.
  40 *
  41 * When looking for a bad block we specify a range and want to
  42 * know if any block in the range is bad.  So we binary-search
  43 * to the last range that starts at-or-before the given endpoint,
  44 * (or "before the sector after the target range")
  45 * then see if it ends after the given start.
  46 *
  47 * Return:
  48 *  0: there are no known bad blocks in the range
  49 *  1: there are known bad block which are all acknowledged
  50 * -1: there are bad blocks which have not yet been acknowledged in metadata.
  51 * plus the start/length of the first bad section we overlap.
  52 */
  53int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  54                        sector_t *first_bad, int *bad_sectors)
  55{
  56        int hi;
  57        int lo;
  58        u64 *p = bb->page;
  59        int rv;
  60        sector_t target = s + sectors;
  61        unsigned seq;
  62
  63        if (bb->shift > 0) {
  64                /* round the start down, and the end up */
  65                s >>= bb->shift;
  66                target += (1<<bb->shift) - 1;
  67                target >>= bb->shift;
  68                sectors = target - s;
  69        }
  70        /* 'target' is now the first block after the bad range */
  71
  72retry:
  73        seq = read_seqbegin(&bb->lock);
  74        lo = 0;
  75        rv = 0;
  76        hi = bb->count;
  77
  78        /* Binary search between lo and hi for 'target'
  79         * i.e. for the last range that starts before 'target'
  80         */
  81        /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
  82         * are known not to be the last range before target.
  83         * VARIANT: hi-lo is the number of possible
  84         * ranges, and decreases until it reaches 1
  85         */
  86        while (hi - lo > 1) {
  87                int mid = (lo + hi) / 2;
  88                sector_t a = BB_OFFSET(p[mid]);
  89
  90                if (a < target)
  91                        /* This could still be the one, earlier ranges
  92                         * could not.
  93                         */
  94                        lo = mid;
  95                else
  96                        /* This and later ranges are definitely out. */
  97                        hi = mid;
  98        }
  99        /* 'lo' might be the last that started before target, but 'hi' isn't */
 100        if (hi > lo) {
 101                /* need to check all range that end after 's' to see if
 102                 * any are unacknowledged.
 103                 */
 104                while (lo >= 0 &&
 105                       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
 106                        if (BB_OFFSET(p[lo]) < target) {
 107                                /* starts before the end, and finishes after
 108                                 * the start, so they must overlap
 109                                 */
 110                                if (rv != -1 && BB_ACK(p[lo]))
 111                                        rv = 1;
 112                                else
 113                                        rv = -1;
 114                                *first_bad = BB_OFFSET(p[lo]);
 115                                *bad_sectors = BB_LEN(p[lo]);
 116                        }
 117                        lo--;
 118                }
 119        }
 120
 121        if (read_seqretry(&bb->lock, seq))
 122                goto retry;
 123
 124        return rv;
 125}
 126EXPORT_SYMBOL_GPL(badblocks_check);
 127
 128static void badblocks_update_acked(struct badblocks *bb)
 129{
 130        u64 *p = bb->page;
 131        int i;
 132        bool unacked = false;
 133
 134        if (!bb->unacked_exist)
 135                return;
 136
 137        for (i = 0; i < bb->count ; i++) {
 138                if (!BB_ACK(p[i])) {
 139                        unacked = true;
 140                        break;
 141                }
 142        }
 143
 144        if (!unacked)
 145                bb->unacked_exist = 0;
 146}
 147
 148/**
 149 * badblocks_set() - Add a range of bad blocks to the table.
 150 * @bb:         the badblocks structure that holds all badblock information
 151 * @s:          first sector to mark as bad
 152 * @sectors:    number of sectors to mark as bad
 153 * @acknowledged: weather to mark the bad sectors as acknowledged
 154 *
 155 * This might extend the table, or might contract it if two adjacent ranges
 156 * can be merged. We binary-search to find the 'insertion' point, then
 157 * decide how best to handle it.
 158 *
 159 * Return:
 160 *  0: success
 161 *  1: failed to set badblocks (out of space)
 162 */
 163int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
 164                        int acknowledged)
 165{
 166        u64 *p;
 167        int lo, hi;
 168        int rv = 0;
 169        unsigned long flags;
 170
 171        if (bb->shift < 0)
 172                /* badblocks are disabled */
 173                return 1;
 174
 175        if (bb->shift) {
 176                /* round the start down, and the end up */
 177                sector_t next = s + sectors;
 178
 179                s >>= bb->shift;
 180                next += (1<<bb->shift) - 1;
 181                next >>= bb->shift;
 182                sectors = next - s;
 183        }
 184
 185        write_seqlock_irqsave(&bb->lock, flags);
 186
 187        p = bb->page;
 188        lo = 0;
 189        hi = bb->count;
 190        /* Find the last range that starts at-or-before 's' */
 191        while (hi - lo > 1) {
 192                int mid = (lo + hi) / 2;
 193                sector_t a = BB_OFFSET(p[mid]);
 194
 195                if (a <= s)
 196                        lo = mid;
 197                else
 198                        hi = mid;
 199        }
 200        if (hi > lo && BB_OFFSET(p[lo]) > s)
 201                hi = lo;
 202
 203        if (hi > lo) {
 204                /* we found a range that might merge with the start
 205                 * of our new range
 206                 */
 207                sector_t a = BB_OFFSET(p[lo]);
 208                sector_t e = a + BB_LEN(p[lo]);
 209                int ack = BB_ACK(p[lo]);
 210
 211                if (e >= s) {
 212                        /* Yes, we can merge with a previous range */
 213                        if (s == a && s + sectors >= e)
 214                                /* new range covers old */
 215                                ack = acknowledged;
 216                        else
 217                                ack = ack && acknowledged;
 218
 219                        if (e < s + sectors)
 220                                e = s + sectors;
 221                        if (e - a <= BB_MAX_LEN) {
 222                                p[lo] = BB_MAKE(a, e-a, ack);
 223                                s = e;
 224                        } else {
 225                                /* does not all fit in one range,
 226                                 * make p[lo] maximal
 227                                 */
 228                                if (BB_LEN(p[lo]) != BB_MAX_LEN)
 229                                        p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
 230                                s = a + BB_MAX_LEN;
 231                        }
 232                        sectors = e - s;
 233                }
 234        }
 235        if (sectors && hi < bb->count) {
 236                /* 'hi' points to the first range that starts after 's'.
 237                 * Maybe we can merge with the start of that range
 238                 */
 239                sector_t a = BB_OFFSET(p[hi]);
 240                sector_t e = a + BB_LEN(p[hi]);
 241                int ack = BB_ACK(p[hi]);
 242
 243                if (a <= s + sectors) {
 244                        /* merging is possible */
 245                        if (e <= s + sectors) {
 246                                /* full overlap */
 247                                e = s + sectors;
 248                                ack = acknowledged;
 249                        } else
 250                                ack = ack && acknowledged;
 251
 252                        a = s;
 253                        if (e - a <= BB_MAX_LEN) {
 254                                p[hi] = BB_MAKE(a, e-a, ack);
 255                                s = e;
 256                        } else {
 257                                p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
 258                                s = a + BB_MAX_LEN;
 259                        }
 260                        sectors = e - s;
 261                        lo = hi;
 262                        hi++;
 263                }
 264        }
 265        if (sectors == 0 && hi < bb->count) {
 266                /* we might be able to combine lo and hi */
 267                /* Note: 's' is at the end of 'lo' */
 268                sector_t a = BB_OFFSET(p[hi]);
 269                int lolen = BB_LEN(p[lo]);
 270                int hilen = BB_LEN(p[hi]);
 271                int newlen = lolen + hilen - (s - a);
 272
 273                if (s >= a && newlen < BB_MAX_LEN) {
 274                        /* yes, we can combine them */
 275                        int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
 276
 277                        p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
 278                        memmove(p + hi, p + hi + 1,
 279                                (bb->count - hi - 1) * 8);
 280                        bb->count--;
 281                }
 282        }
 283        while (sectors) {
 284                /* didn't merge (it all).
 285                 * Need to add a range just before 'hi'
 286                 */
 287                if (bb->count >= MAX_BADBLOCKS) {
 288                        /* No room for more */
 289                        rv = 1;
 290                        break;
 291                } else {
 292                        int this_sectors = sectors;
 293
 294                        memmove(p + hi + 1, p + hi,
 295                                (bb->count - hi) * 8);
 296                        bb->count++;
 297
 298                        if (this_sectors > BB_MAX_LEN)
 299                                this_sectors = BB_MAX_LEN;
 300                        p[hi] = BB_MAKE(s, this_sectors, acknowledged);
 301                        sectors -= this_sectors;
 302                        s += this_sectors;
 303                }
 304        }
 305
 306        bb->changed = 1;
 307        if (!acknowledged)
 308                bb->unacked_exist = 1;
 309        else
 310                badblocks_update_acked(bb);
 311        write_sequnlock_irqrestore(&bb->lock, flags);
 312
 313        return rv;
 314}
 315EXPORT_SYMBOL_GPL(badblocks_set);
 316
 317/**
 318 * badblocks_clear() - Remove a range of bad blocks to the table.
 319 * @bb:         the badblocks structure that holds all badblock information
 320 * @s:          first sector to mark as bad
 321 * @sectors:    number of sectors to mark as bad
 322 *
 323 * This may involve extending the table if we spilt a region,
 324 * but it must not fail.  So if the table becomes full, we just
 325 * drop the remove request.
 326 *
 327 * Return:
 328 *  0: success
 329 *  1: failed to clear badblocks
 330 */
 331int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
 332{
 333        u64 *p;
 334        int lo, hi;
 335        sector_t target = s + sectors;
 336        int rv = 0;
 337
 338        if (bb->shift > 0) {
 339                /* When clearing we round the start up and the end down.
 340                 * This should not matter as the shift should align with
 341                 * the block size and no rounding should ever be needed.
 342                 * However it is better the think a block is bad when it
 343                 * isn't than to think a block is not bad when it is.
 344                 */
 345                s += (1<<bb->shift) - 1;
 346                s >>= bb->shift;
 347                target >>= bb->shift;
 348                sectors = target - s;
 349        }
 350
 351        write_seqlock_irq(&bb->lock);
 352
 353        p = bb->page;
 354        lo = 0;
 355        hi = bb->count;
 356        /* Find the last range that starts before 'target' */
 357        while (hi - lo > 1) {
 358                int mid = (lo + hi) / 2;
 359                sector_t a = BB_OFFSET(p[mid]);
 360
 361                if (a < target)
 362                        lo = mid;
 363                else
 364                        hi = mid;
 365        }
 366        if (hi > lo) {
 367                /* p[lo] is the last range that could overlap the
 368                 * current range.  Earlier ranges could also overlap,
 369                 * but only this one can overlap the end of the range.
 370                 */
 371                if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
 372                    (BB_OFFSET(p[lo]) < target)) {
 373                        /* Partial overlap, leave the tail of this range */
 374                        int ack = BB_ACK(p[lo]);
 375                        sector_t a = BB_OFFSET(p[lo]);
 376                        sector_t end = a + BB_LEN(p[lo]);
 377
 378                        if (a < s) {
 379                                /* we need to split this range */
 380                                if (bb->count >= MAX_BADBLOCKS) {
 381                                        rv = -ENOSPC;
 382                                        goto out;
 383                                }
 384                                memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
 385                                bb->count++;
 386                                p[lo] = BB_MAKE(a, s-a, ack);
 387                                lo++;
 388                        }
 389                        p[lo] = BB_MAKE(target, end - target, ack);
 390                        /* there is no longer an overlap */
 391                        hi = lo;
 392                        lo--;
 393                }
 394                while (lo >= 0 &&
 395                       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
 396                       (BB_OFFSET(p[lo]) < target)) {
 397                        /* This range does overlap */
 398                        if (BB_OFFSET(p[lo]) < s) {
 399                                /* Keep the early parts of this range. */
 400                                int ack = BB_ACK(p[lo]);
 401                                sector_t start = BB_OFFSET(p[lo]);
 402
 403                                p[lo] = BB_MAKE(start, s - start, ack);
 404                                /* now low doesn't overlap, so.. */
 405                                break;
 406                        }
 407                        lo--;
 408                }
 409                /* 'lo' is strictly before, 'hi' is strictly after,
 410                 * anything between needs to be discarded
 411                 */
 412                if (hi - lo > 1) {
 413                        memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
 414                        bb->count -= (hi - lo - 1);
 415                }
 416        }
 417
 418        badblocks_update_acked(bb);
 419        bb->changed = 1;
 420out:
 421        write_sequnlock_irq(&bb->lock);
 422        return rv;
 423}
 424EXPORT_SYMBOL_GPL(badblocks_clear);
 425
 426/**
 427 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
 428 * @bb:         the badblocks structure that holds all badblock information
 429 *
 430 * This only succeeds if ->changed is clear.  It is used by
 431 * in-kernel metadata updates
 432 */
 433void ack_all_badblocks(struct badblocks *bb)
 434{
 435        if (bb->page == NULL || bb->changed)
 436                /* no point even trying */
 437                return;
 438        write_seqlock_irq(&bb->lock);
 439
 440        if (bb->changed == 0 && bb->unacked_exist) {
 441                u64 *p = bb->page;
 442                int i;
 443
 444                for (i = 0; i < bb->count ; i++) {
 445                        if (!BB_ACK(p[i])) {
 446                                sector_t start = BB_OFFSET(p[i]);
 447                                int len = BB_LEN(p[i]);
 448
 449                                p[i] = BB_MAKE(start, len, 1);
 450                        }
 451                }
 452                bb->unacked_exist = 0;
 453        }
 454        write_sequnlock_irq(&bb->lock);
 455}
 456EXPORT_SYMBOL_GPL(ack_all_badblocks);
 457
 458/**
 459 * badblocks_show() - sysfs access to bad-blocks list
 460 * @bb:         the badblocks structure that holds all badblock information
 461 * @page:       buffer received from sysfs
 462 * @unack:      weather to show unacknowledged badblocks
 463 *
 464 * Return:
 465 *  Length of returned data
 466 */
 467ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
 468{
 469        size_t len;
 470        int i;
 471        u64 *p = bb->page;
 472        unsigned seq;
 473
 474        if (bb->shift < 0)
 475                return 0;
 476
 477retry:
 478        seq = read_seqbegin(&bb->lock);
 479
 480        len = 0;
 481        i = 0;
 482
 483        while (len < PAGE_SIZE && i < bb->count) {
 484                sector_t s = BB_OFFSET(p[i]);
 485                unsigned int length = BB_LEN(p[i]);
 486                int ack = BB_ACK(p[i]);
 487
 488                i++;
 489
 490                if (unack && ack)
 491                        continue;
 492
 493                len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
 494                                (unsigned long long)s << bb->shift,
 495                                length << bb->shift);
 496        }
 497        if (unack && len == 0)
 498                bb->unacked_exist = 0;
 499
 500        if (read_seqretry(&bb->lock, seq))
 501                goto retry;
 502
 503        return len;
 504}
 505EXPORT_SYMBOL_GPL(badblocks_show);
 506
 507/**
 508 * badblocks_store() - sysfs access to bad-blocks list
 509 * @bb:         the badblocks structure that holds all badblock information
 510 * @page:       buffer received from sysfs
 511 * @len:        length of data received from sysfs
 512 * @unack:      weather to show unacknowledged badblocks
 513 *
 514 * Return:
 515 *  Length of the buffer processed or -ve error.
 516 */
 517ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 518                        int unack)
 519{
 520        unsigned long long sector;
 521        int length;
 522        char newline;
 523
 524        switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
 525        case 3:
 526                if (newline != '\n')
 527                        return -EINVAL;
 528                /* fall through */
 529        case 2:
 530                if (length <= 0)
 531                        return -EINVAL;
 532                break;
 533        default:
 534                return -EINVAL;
 535        }
 536
 537        if (badblocks_set(bb, sector, length, !unack))
 538                return -ENOSPC;
 539        else
 540                return len;
 541}
 542EXPORT_SYMBOL_GPL(badblocks_store);
 543
 544static int __badblocks_init(struct device *dev, struct badblocks *bb,
 545                int enable)
 546{
 547        bb->dev = dev;
 548        bb->count = 0;
 549        if (enable)
 550                bb->shift = 0;
 551        else
 552                bb->shift = -1;
 553        if (dev)
 554                bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
 555        else
 556                bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 557        if (!bb->page) {
 558                bb->shift = -1;
 559                return -ENOMEM;
 560        }
 561        seqlock_init(&bb->lock);
 562
 563        return 0;
 564}
 565
 566/**
 567 * badblocks_init() - initialize the badblocks structure
 568 * @bb:         the badblocks structure that holds all badblock information
 569 * @enable:     weather to enable badblocks accounting
 570 *
 571 * Return:
 572 *  0: success
 573 *  -ve errno: on error
 574 */
 575int badblocks_init(struct badblocks *bb, int enable)
 576{
 577        return __badblocks_init(NULL, bb, enable);
 578}
 579EXPORT_SYMBOL_GPL(badblocks_init);
 580
 581int devm_init_badblocks(struct device *dev, struct badblocks *bb)
 582{
 583        if (!bb)
 584                return -EINVAL;
 585        return __badblocks_init(dev, bb, 1);
 586}
 587EXPORT_SYMBOL_GPL(devm_init_badblocks);
 588
 589/**
 590 * badblocks_exit() - free the badblocks structure
 591 * @bb:         the badblocks structure that holds all badblock information
 592 */
 593void badblocks_exit(struct badblocks *bb)
 594{
 595        if (!bb)
 596                return;
 597        if (bb->dev)
 598                devm_kfree(bb->dev, bb->page);
 599        else
 600                kfree(bb->page);
 601        bb->page = NULL;
 602}
 603EXPORT_SYMBOL_GPL(badblocks_exit);
 604