linux/block/badblocks.c
<<
>>
Prefs
   1/*
   2 * Bad block management
   3 *
   4 * - Heavily based on MD badblocks code from Neil Brown
   5 *
   6 * Copyright (c) 2015, Intel Corporation.
   7 *
   8 * This program is free software; you can redistribute it and/or modify it
   9 * under the terms and conditions of the GNU General Public License,
  10 * version 2, as published by the Free Software Foundation.
  11 *
  12 * This program is distributed in the hope it will be useful, but WITHOUT
  13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  15 * more details.
  16 */
  17
  18#include <linux/badblocks.h>
  19#include <linux/seqlock.h>
  20#include <linux/device.h>
  21#include <linux/kernel.h>
  22#include <linux/module.h>
  23#include <linux/stddef.h>
  24#include <linux/types.h>
  25#include <linux/slab.h>
  26
  27/**
  28 * badblocks_check() - check a given range for bad sectors
  29 * @bb:         the badblocks structure that holds all badblock information
  30 * @s:          sector (start) at which to check for badblocks
  31 * @sectors:    number of sectors to check for badblocks
  32 * @first_bad:  pointer to store location of the first badblock
  33 * @bad_sectors: pointer to store number of badblocks after @first_bad
  34 *
  35 * We can record which blocks on each device are 'bad' and so just
  36 * fail those blocks, or that stripe, rather than the whole device.
  37 * Entries in the bad-block table are 64bits wide.  This comprises:
  38 * Length of bad-range, in sectors: 0-511 for lengths 1-512
  39 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
  40 *  A 'shift' can be set so that larger blocks are tracked and
  41 *  consequently larger devices can be covered.
  42 * 'Acknowledged' flag - 1 bit. - the most significant bit.
  43 *
  44 * Locking of the bad-block table uses a seqlock so badblocks_check
  45 * might need to retry if it is very unlucky.
  46 * We will sometimes want to check for bad blocks in a bi_end_io function,
  47 * so we use the write_seqlock_irq variant.
  48 *
  49 * When looking for a bad block we specify a range and want to
  50 * know if any block in the range is bad.  So we binary-search
  51 * to the last range that starts at-or-before the given endpoint,
  52 * (or "before the sector after the target range")
  53 * then see if it ends after the given start.
  54 *
  55 * Return:
  56 *  0: there are no known bad blocks in the range
  57 *  1: there are known bad block which are all acknowledged
  58 * -1: there are bad blocks which have not yet been acknowledged in metadata.
  59 * plus the start/length of the first bad section we overlap.
  60 */
  61int badblocks_check(struct badblocks *bb, sector_t s, int sectors,
  62                        sector_t *first_bad, int *bad_sectors)
  63{
  64        int hi;
  65        int lo;
  66        u64 *p = bb->page;
  67        int rv;
  68        sector_t target = s + sectors;
  69        unsigned seq;
  70
  71        if (bb->shift > 0) {
  72                /* round the start down, and the end up */
  73                s >>= bb->shift;
  74                target += (1<<bb->shift) - 1;
  75                target >>= bb->shift;
  76                sectors = target - s;
  77        }
  78        /* 'target' is now the first block after the bad range */
  79
  80retry:
  81        seq = read_seqbegin(&bb->lock);
  82        lo = 0;
  83        rv = 0;
  84        hi = bb->count;
  85
  86        /* Binary search between lo and hi for 'target'
  87         * i.e. for the last range that starts before 'target'
  88         */
  89        /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
  90         * are known not to be the last range before target.
  91         * VARIANT: hi-lo is the number of possible
  92         * ranges, and decreases until it reaches 1
  93         */
  94        while (hi - lo > 1) {
  95                int mid = (lo + hi) / 2;
  96                sector_t a = BB_OFFSET(p[mid]);
  97
  98                if (a < target)
  99                        /* This could still be the one, earlier ranges
 100                         * could not.
 101                         */
 102                        lo = mid;
 103                else
 104                        /* This and later ranges are definitely out. */
 105                        hi = mid;
 106        }
 107        /* 'lo' might be the last that started before target, but 'hi' isn't */
 108        if (hi > lo) {
 109                /* need to check all range that end after 's' to see if
 110                 * any are unacknowledged.
 111                 */
 112                while (lo >= 0 &&
 113                       BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
 114                        if (BB_OFFSET(p[lo]) < target) {
 115                                /* starts before the end, and finishes after
 116                                 * the start, so they must overlap
 117                                 */
 118                                if (rv != -1 && BB_ACK(p[lo]))
 119                                        rv = 1;
 120                                else
 121                                        rv = -1;
 122                                *first_bad = BB_OFFSET(p[lo]);
 123                                *bad_sectors = BB_LEN(p[lo]);
 124                        }
 125                        lo--;
 126                }
 127        }
 128
 129        if (read_seqretry(&bb->lock, seq))
 130                goto retry;
 131
 132        return rv;
 133}
 134EXPORT_SYMBOL_GPL(badblocks_check);
 135
 136static void badblocks_update_acked(struct badblocks *bb)
 137{
 138        u64 *p = bb->page;
 139        int i;
 140        bool unacked = false;
 141
 142        if (!bb->unacked_exist)
 143                return;
 144
 145        for (i = 0; i < bb->count ; i++) {
 146                if (!BB_ACK(p[i])) {
 147                        unacked = true;
 148                        break;
 149                }
 150        }
 151
 152        if (!unacked)
 153                bb->unacked_exist = 0;
 154}
 155
 156/**
 157 * badblocks_set() - Add a range of bad blocks to the table.
 158 * @bb:         the badblocks structure that holds all badblock information
 159 * @s:          first sector to mark as bad
 160 * @sectors:    number of sectors to mark as bad
 161 * @acknowledged: weather to mark the bad sectors as acknowledged
 162 *
 163 * This might extend the table, or might contract it if two adjacent ranges
 164 * can be merged. We binary-search to find the 'insertion' point, then
 165 * decide how best to handle it.
 166 *
 167 * Return:
 168 *  0: success
 169 *  1: failed to set badblocks (out of space)
 170 */
 171int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
 172                        int acknowledged)
 173{
 174        u64 *p;
 175        int lo, hi;
 176        int rv = 0;
 177        unsigned long flags;
 178
 179        if (bb->shift < 0)
 180                /* badblocks are disabled */
 181                return 0;
 182
 183        if (bb->shift) {
 184                /* round the start down, and the end up */
 185                sector_t next = s + sectors;
 186
 187                s >>= bb->shift;
 188                next += (1<<bb->shift) - 1;
 189                next >>= bb->shift;
 190                sectors = next - s;
 191        }
 192
 193        write_seqlock_irqsave(&bb->lock, flags);
 194
 195        p = bb->page;
 196        lo = 0;
 197        hi = bb->count;
 198        /* Find the last range that starts at-or-before 's' */
 199        while (hi - lo > 1) {
 200                int mid = (lo + hi) / 2;
 201                sector_t a = BB_OFFSET(p[mid]);
 202
 203                if (a <= s)
 204                        lo = mid;
 205                else
 206                        hi = mid;
 207        }
 208        if (hi > lo && BB_OFFSET(p[lo]) > s)
 209                hi = lo;
 210
 211        if (hi > lo) {
 212                /* we found a range that might merge with the start
 213                 * of our new range
 214                 */
 215                sector_t a = BB_OFFSET(p[lo]);
 216                sector_t e = a + BB_LEN(p[lo]);
 217                int ack = BB_ACK(p[lo]);
 218
 219                if (e >= s) {
 220                        /* Yes, we can merge with a previous range */
 221                        if (s == a && s + sectors >= e)
 222                                /* new range covers old */
 223                                ack = acknowledged;
 224                        else
 225                                ack = ack && acknowledged;
 226
 227                        if (e < s + sectors)
 228                                e = s + sectors;
 229                        if (e - a <= BB_MAX_LEN) {
 230                                p[lo] = BB_MAKE(a, e-a, ack);
 231                                s = e;
 232                        } else {
 233                                /* does not all fit in one range,
 234                                 * make p[lo] maximal
 235                                 */
 236                                if (BB_LEN(p[lo]) != BB_MAX_LEN)
 237                                        p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
 238                                s = a + BB_MAX_LEN;
 239                        }
 240                        sectors = e - s;
 241                }
 242        }
 243        if (sectors && hi < bb->count) {
 244                /* 'hi' points to the first range that starts after 's'.
 245                 * Maybe we can merge with the start of that range
 246                 */
 247                sector_t a = BB_OFFSET(p[hi]);
 248                sector_t e = a + BB_LEN(p[hi]);
 249                int ack = BB_ACK(p[hi]);
 250
 251                if (a <= s + sectors) {
 252                        /* merging is possible */
 253                        if (e <= s + sectors) {
 254                                /* full overlap */
 255                                e = s + sectors;
 256                                ack = acknowledged;
 257                        } else
 258                                ack = ack && acknowledged;
 259
 260                        a = s;
 261                        if (e - a <= BB_MAX_LEN) {
 262                                p[hi] = BB_MAKE(a, e-a, ack);
 263                                s = e;
 264                        } else {
 265                                p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
 266                                s = a + BB_MAX_LEN;
 267                        }
 268                        sectors = e - s;
 269                        lo = hi;
 270                        hi++;
 271                }
 272        }
 273        if (sectors == 0 && hi < bb->count) {
 274                /* we might be able to combine lo and hi */
 275                /* Note: 's' is at the end of 'lo' */
 276                sector_t a = BB_OFFSET(p[hi]);
 277                int lolen = BB_LEN(p[lo]);
 278                int hilen = BB_LEN(p[hi]);
 279                int newlen = lolen + hilen - (s - a);
 280
 281                if (s >= a && newlen < BB_MAX_LEN) {
 282                        /* yes, we can combine them */
 283                        int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
 284
 285                        p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
 286                        memmove(p + hi, p + hi + 1,
 287                                (bb->count - hi - 1) * 8);
 288                        bb->count--;
 289                }
 290        }
 291        while (sectors) {
 292                /* didn't merge (it all).
 293                 * Need to add a range just before 'hi'
 294                 */
 295                if (bb->count >= MAX_BADBLOCKS) {
 296                        /* No room for more */
 297                        rv = 1;
 298                        break;
 299                } else {
 300                        int this_sectors = sectors;
 301
 302                        memmove(p + hi + 1, p + hi,
 303                                (bb->count - hi) * 8);
 304                        bb->count++;
 305
 306                        if (this_sectors > BB_MAX_LEN)
 307                                this_sectors = BB_MAX_LEN;
 308                        p[hi] = BB_MAKE(s, this_sectors, acknowledged);
 309                        sectors -= this_sectors;
 310                        s += this_sectors;
 311                }
 312        }
 313
 314        bb->changed = 1;
 315        if (!acknowledged)
 316                bb->unacked_exist = 1;
 317        else
 318                badblocks_update_acked(bb);
 319        write_sequnlock_irqrestore(&bb->lock, flags);
 320
 321        return rv;
 322}
 323EXPORT_SYMBOL_GPL(badblocks_set);
 324
 325/**
 326 * badblocks_clear() - Remove a range of bad blocks to the table.
 327 * @bb:         the badblocks structure that holds all badblock information
 328 * @s:          first sector to mark as bad
 329 * @sectors:    number of sectors to mark as bad
 330 *
 331 * This may involve extending the table if we spilt a region,
 332 * but it must not fail.  So if the table becomes full, we just
 333 * drop the remove request.
 334 *
 335 * Return:
 336 *  0: success
 337 *  1: failed to clear badblocks
 338 */
 339int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
 340{
 341        u64 *p;
 342        int lo, hi;
 343        sector_t target = s + sectors;
 344        int rv = 0;
 345
 346        if (bb->shift > 0) {
 347                /* When clearing we round the start up and the end down.
 348                 * This should not matter as the shift should align with
 349                 * the block size and no rounding should ever be needed.
 350                 * However it is better the think a block is bad when it
 351                 * isn't than to think a block is not bad when it is.
 352                 */
 353                s += (1<<bb->shift) - 1;
 354                s >>= bb->shift;
 355                target >>= bb->shift;
 356                sectors = target - s;
 357        }
 358
 359        write_seqlock_irq(&bb->lock);
 360
 361        p = bb->page;
 362        lo = 0;
 363        hi = bb->count;
 364        /* Find the last range that starts before 'target' */
 365        while (hi - lo > 1) {
 366                int mid = (lo + hi) / 2;
 367                sector_t a = BB_OFFSET(p[mid]);
 368
 369                if (a < target)
 370                        lo = mid;
 371                else
 372                        hi = mid;
 373        }
 374        if (hi > lo) {
 375                /* p[lo] is the last range that could overlap the
 376                 * current range.  Earlier ranges could also overlap,
 377                 * but only this one can overlap the end of the range.
 378                 */
 379                if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) &&
 380                    (BB_OFFSET(p[lo]) < target)) {
 381                        /* Partial overlap, leave the tail of this range */
 382                        int ack = BB_ACK(p[lo]);
 383                        sector_t a = BB_OFFSET(p[lo]);
 384                        sector_t end = a + BB_LEN(p[lo]);
 385
 386                        if (a < s) {
 387                                /* we need to split this range */
 388                                if (bb->count >= MAX_BADBLOCKS) {
 389                                        rv = -ENOSPC;
 390                                        goto out;
 391                                }
 392                                memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
 393                                bb->count++;
 394                                p[lo] = BB_MAKE(a, s-a, ack);
 395                                lo++;
 396                        }
 397                        p[lo] = BB_MAKE(target, end - target, ack);
 398                        /* there is no longer an overlap */
 399                        hi = lo;
 400                        lo--;
 401                }
 402                while (lo >= 0 &&
 403                       (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) &&
 404                       (BB_OFFSET(p[lo]) < target)) {
 405                        /* This range does overlap */
 406                        if (BB_OFFSET(p[lo]) < s) {
 407                                /* Keep the early parts of this range. */
 408                                int ack = BB_ACK(p[lo]);
 409                                sector_t start = BB_OFFSET(p[lo]);
 410
 411                                p[lo] = BB_MAKE(start, s - start, ack);
 412                                /* now low doesn't overlap, so.. */
 413                                break;
 414                        }
 415                        lo--;
 416                }
 417                /* 'lo' is strictly before, 'hi' is strictly after,
 418                 * anything between needs to be discarded
 419                 */
 420                if (hi - lo > 1) {
 421                        memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
 422                        bb->count -= (hi - lo - 1);
 423                }
 424        }
 425
 426        badblocks_update_acked(bb);
 427        bb->changed = 1;
 428out:
 429        write_sequnlock_irq(&bb->lock);
 430        return rv;
 431}
 432EXPORT_SYMBOL_GPL(badblocks_clear);
 433
 434/**
 435 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
 436 * @bb:         the badblocks structure that holds all badblock information
 437 *
 438 * This only succeeds if ->changed is clear.  It is used by
 439 * in-kernel metadata updates
 440 */
 441void ack_all_badblocks(struct badblocks *bb)
 442{
 443        if (bb->page == NULL || bb->changed)
 444                /* no point even trying */
 445                return;
 446        write_seqlock_irq(&bb->lock);
 447
 448        if (bb->changed == 0 && bb->unacked_exist) {
 449                u64 *p = bb->page;
 450                int i;
 451
 452                for (i = 0; i < bb->count ; i++) {
 453                        if (!BB_ACK(p[i])) {
 454                                sector_t start = BB_OFFSET(p[i]);
 455                                int len = BB_LEN(p[i]);
 456
 457                                p[i] = BB_MAKE(start, len, 1);
 458                        }
 459                }
 460                bb->unacked_exist = 0;
 461        }
 462        write_sequnlock_irq(&bb->lock);
 463}
 464EXPORT_SYMBOL_GPL(ack_all_badblocks);
 465
 466/**
 467 * badblocks_show() - sysfs access to bad-blocks list
 468 * @bb:         the badblocks structure that holds all badblock information
 469 * @page:       buffer received from sysfs
 470 * @unack:      weather to show unacknowledged badblocks
 471 *
 472 * Return:
 473 *  Length of returned data
 474 */
 475ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
 476{
 477        size_t len;
 478        int i;
 479        u64 *p = bb->page;
 480        unsigned seq;
 481
 482        if (bb->shift < 0)
 483                return 0;
 484
 485retry:
 486        seq = read_seqbegin(&bb->lock);
 487
 488        len = 0;
 489        i = 0;
 490
 491        while (len < PAGE_SIZE && i < bb->count) {
 492                sector_t s = BB_OFFSET(p[i]);
 493                unsigned int length = BB_LEN(p[i]);
 494                int ack = BB_ACK(p[i]);
 495
 496                i++;
 497
 498                if (unack && ack)
 499                        continue;
 500
 501                len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
 502                                (unsigned long long)s << bb->shift,
 503                                length << bb->shift);
 504        }
 505        if (unack && len == 0)
 506                bb->unacked_exist = 0;
 507
 508        if (read_seqretry(&bb->lock, seq))
 509                goto retry;
 510
 511        return len;
 512}
 513EXPORT_SYMBOL_GPL(badblocks_show);
 514
 515/**
 516 * badblocks_store() - sysfs access to bad-blocks list
 517 * @bb:         the badblocks structure that holds all badblock information
 518 * @page:       buffer received from sysfs
 519 * @len:        length of data received from sysfs
 520 * @unack:      weather to show unacknowledged badblocks
 521 *
 522 * Return:
 523 *  Length of the buffer processed or -ve error.
 524 */
 525ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
 526                        int unack)
 527{
 528        unsigned long long sector;
 529        int length;
 530        char newline;
 531
 532        switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
 533        case 3:
 534                if (newline != '\n')
 535                        return -EINVAL;
 536        case 2:
 537                if (length <= 0)
 538                        return -EINVAL;
 539                break;
 540        default:
 541                return -EINVAL;
 542        }
 543
 544        if (badblocks_set(bb, sector, length, !unack))
 545                return -ENOSPC;
 546        else
 547                return len;
 548}
 549EXPORT_SYMBOL_GPL(badblocks_store);
 550
 551static int __badblocks_init(struct device *dev, struct badblocks *bb,
 552                int enable)
 553{
 554        bb->dev = dev;
 555        bb->count = 0;
 556        if (enable)
 557                bb->shift = 0;
 558        else
 559                bb->shift = -1;
 560        if (dev)
 561                bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
 562        else
 563                bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 564        if (!bb->page) {
 565                bb->shift = -1;
 566                return -ENOMEM;
 567        }
 568        seqlock_init(&bb->lock);
 569
 570        return 0;
 571}
 572
 573/**
 574 * badblocks_init() - initialize the badblocks structure
 575 * @bb:         the badblocks structure that holds all badblock information
 576 * @enable:     weather to enable badblocks accounting
 577 *
 578 * Return:
 579 *  0: success
 580 *  -ve errno: on error
 581 */
 582int badblocks_init(struct badblocks *bb, int enable)
 583{
 584        return __badblocks_init(NULL, bb, enable);
 585}
 586EXPORT_SYMBOL_GPL(badblocks_init);
 587
 588int devm_init_badblocks(struct device *dev, struct badblocks *bb)
 589{
 590        if (!bb)
 591                return -EINVAL;
 592        return __badblocks_init(dev, bb, 1);
 593}
 594EXPORT_SYMBOL_GPL(devm_init_badblocks);
 595
 596/**
 597 * badblocks_exit() - free the badblocks structure
 598 * @bb:         the badblocks structure that holds all badblock information
 599 */
 600void badblocks_exit(struct badblocks *bb)
 601{
 602        if (!bb)
 603                return;
 604        if (bb->dev)
 605                devm_kfree(bb->dev, bb->page);
 606        else
 607                kfree(bb->page);
 608        bb->page = NULL;
 609}
 610EXPORT_SYMBOL_GPL(badblocks_exit);
 611