linux/drivers/md/dm-stripe.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001-2003 Sistina Software (UK) Limited.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm.h"
   8#include <linux/device-mapper.h>
   9
  10#include <linux/module.h>
  11#include <linux/init.h>
  12#include <linux/blkdev.h>
  13#include <linux/bio.h>
  14#include <linux/dax.h>
  15#include <linux/slab.h>
  16#include <linux/log2.h>
  17
  18#define DM_MSG_PREFIX "striped"
  19#define DM_IO_ERROR_THRESHOLD 15
  20
  21struct stripe {
  22        struct dm_dev *dev;
  23        sector_t physical_start;
  24
  25        atomic_t error_count;
  26};
  27
  28struct stripe_c {
  29        uint32_t stripes;
  30        int stripes_shift;
  31
  32        /* The size of this target / num. stripes */
  33        sector_t stripe_width;
  34
  35        uint32_t chunk_size;
  36        int chunk_size_shift;
  37
  38        /* Needed for handling events */
  39        struct dm_target *ti;
  40
  41        /* Work struct used for triggering events*/
  42        struct work_struct trigger_event;
  43
  44        struct stripe stripe[];
  45};
  46
  47/*
  48 * An event is triggered whenever a drive
  49 * drops out of a stripe volume.
  50 */
  51static void trigger_event(struct work_struct *work)
  52{
  53        struct stripe_c *sc = container_of(work, struct stripe_c,
  54                                           trigger_event);
  55        dm_table_event(sc->ti->table);
  56}
  57
  58/*
  59 * Parse a single <dev> <sector> pair
  60 */
  61static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
  62                      unsigned int stripe, char **argv)
  63{
  64        unsigned long long start;
  65        char dummy;
  66        int ret;
  67
  68        if (sscanf(argv[1], "%llu%c", &start, &dummy) != 1)
  69                return -EINVAL;
  70
  71        ret = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table),
  72                            &sc->stripe[stripe].dev);
  73        if (ret)
  74                return ret;
  75
  76        sc->stripe[stripe].physical_start = start;
  77
  78        return 0;
  79}
  80
  81/*
  82 * Construct a striped mapping.
  83 * <number of stripes> <chunk size> [<dev_path> <offset>]+
  84 */
  85static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
  86{
  87        struct stripe_c *sc;
  88        sector_t width, tmp_len;
  89        uint32_t stripes;
  90        uint32_t chunk_size;
  91        int r;
  92        unsigned int i;
  93
  94        if (argc < 2) {
  95                ti->error = "Not enough arguments";
  96                return -EINVAL;
  97        }
  98
  99        if (kstrtouint(argv[0], 10, &stripes) || !stripes) {
 100                ti->error = "Invalid stripe count";
 101                return -EINVAL;
 102        }
 103
 104        if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) {
 105                ti->error = "Invalid chunk_size";
 106                return -EINVAL;
 107        }
 108
 109        width = ti->len;
 110        if (sector_div(width, stripes)) {
 111                ti->error = "Target length not divisible by "
 112                    "number of stripes";
 113                return -EINVAL;
 114        }
 115
 116        tmp_len = width;
 117        if (sector_div(tmp_len, chunk_size)) {
 118                ti->error = "Target length not divisible by "
 119                    "chunk size";
 120                return -EINVAL;
 121        }
 122
 123        /*
 124         * Do we have enough arguments for that many stripes ?
 125         */
 126        if (argc != (2 + 2 * stripes)) {
 127                ti->error = "Not enough destinations "
 128                        "specified";
 129                return -EINVAL;
 130        }
 131
 132        sc = kmalloc(struct_size(sc, stripe, stripes), GFP_KERNEL);
 133        if (!sc) {
 134                ti->error = "Memory allocation for striped context "
 135                    "failed";
 136                return -ENOMEM;
 137        }
 138
 139        INIT_WORK(&sc->trigger_event, trigger_event);
 140
 141        /* Set pointer to dm target; used in trigger_event */
 142        sc->ti = ti;
 143        sc->stripes = stripes;
 144        sc->stripe_width = width;
 145
 146        if (stripes & (stripes - 1))
 147                sc->stripes_shift = -1;
 148        else
 149                sc->stripes_shift = __ffs(stripes);
 150
 151        r = dm_set_target_max_io_len(ti, chunk_size);
 152        if (r) {
 153                kfree(sc);
 154                return r;
 155        }
 156
 157        ti->num_flush_bios = stripes;
 158        ti->num_discard_bios = stripes;
 159        ti->num_secure_erase_bios = stripes;
 160        ti->num_write_same_bios = stripes;
 161        ti->num_write_zeroes_bios = stripes;
 162
 163        sc->chunk_size = chunk_size;
 164        if (chunk_size & (chunk_size - 1))
 165                sc->chunk_size_shift = -1;
 166        else
 167                sc->chunk_size_shift = __ffs(chunk_size);
 168
 169        /*
 170         * Get the stripe destinations.
 171         */
 172        for (i = 0; i < stripes; i++) {
 173                argv += 2;
 174
 175                r = get_stripe(ti, sc, i, argv);
 176                if (r < 0) {
 177                        ti->error = "Couldn't parse stripe destination";
 178                        while (i--)
 179                                dm_put_device(ti, sc->stripe[i].dev);
 180                        kfree(sc);
 181                        return r;
 182                }
 183                atomic_set(&(sc->stripe[i].error_count), 0);
 184        }
 185
 186        ti->private = sc;
 187
 188        return 0;
 189}
 190
 191static void stripe_dtr(struct dm_target *ti)
 192{
 193        unsigned int i;
 194        struct stripe_c *sc = (struct stripe_c *) ti->private;
 195
 196        for (i = 0; i < sc->stripes; i++)
 197                dm_put_device(ti, sc->stripe[i].dev);
 198
 199        flush_work(&sc->trigger_event);
 200        kfree(sc);
 201}
 202
 203static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
 204                              uint32_t *stripe, sector_t *result)
 205{
 206        sector_t chunk = dm_target_offset(sc->ti, sector);
 207        sector_t chunk_offset;
 208
 209        if (sc->chunk_size_shift < 0)
 210                chunk_offset = sector_div(chunk, sc->chunk_size);
 211        else {
 212                chunk_offset = chunk & (sc->chunk_size - 1);
 213                chunk >>= sc->chunk_size_shift;
 214        }
 215
 216        if (sc->stripes_shift < 0)
 217                *stripe = sector_div(chunk, sc->stripes);
 218        else {
 219                *stripe = chunk & (sc->stripes - 1);
 220                chunk >>= sc->stripes_shift;
 221        }
 222
 223        if (sc->chunk_size_shift < 0)
 224                chunk *= sc->chunk_size;
 225        else
 226                chunk <<= sc->chunk_size_shift;
 227
 228        *result = chunk + chunk_offset;
 229}
 230
 231static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
 232                                    uint32_t target_stripe, sector_t *result)
 233{
 234        uint32_t stripe;
 235
 236        stripe_map_sector(sc, sector, &stripe, result);
 237        if (stripe == target_stripe)
 238                return;
 239
 240        /* round down */
 241        sector = *result;
 242        if (sc->chunk_size_shift < 0)
 243                *result -= sector_div(sector, sc->chunk_size);
 244        else
 245                *result = sector & ~(sector_t)(sc->chunk_size - 1);
 246
 247        if (target_stripe < stripe)
 248                *result += sc->chunk_size;              /* next chunk */
 249}
 250
 251static int stripe_map_range(struct stripe_c *sc, struct bio *bio,
 252                            uint32_t target_stripe)
 253{
 254        sector_t begin, end;
 255
 256        stripe_map_range_sector(sc, bio->bi_iter.bi_sector,
 257                                target_stripe, &begin);
 258        stripe_map_range_sector(sc, bio_end_sector(bio),
 259                                target_stripe, &end);
 260        if (begin < end) {
 261                bio_set_dev(bio, sc->stripe[target_stripe].dev->bdev);
 262                bio->bi_iter.bi_sector = begin +
 263                        sc->stripe[target_stripe].physical_start;
 264                bio->bi_iter.bi_size = to_bytes(end - begin);
 265                return DM_MAPIO_REMAPPED;
 266        } else {
 267                /* The range doesn't map to the target stripe */
 268                bio_endio(bio);
 269                return DM_MAPIO_SUBMITTED;
 270        }
 271}
 272
 273static int stripe_map(struct dm_target *ti, struct bio *bio)
 274{
 275        struct stripe_c *sc = ti->private;
 276        uint32_t stripe;
 277        unsigned target_bio_nr;
 278
 279        if (bio->bi_opf & REQ_PREFLUSH) {
 280                target_bio_nr = dm_bio_get_target_bio_nr(bio);
 281                BUG_ON(target_bio_nr >= sc->stripes);
 282                bio_set_dev(bio, sc->stripe[target_bio_nr].dev->bdev);
 283                return DM_MAPIO_REMAPPED;
 284        }
 285        if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
 286            unlikely(bio_op(bio) == REQ_OP_SECURE_ERASE) ||
 287            unlikely(bio_op(bio) == REQ_OP_WRITE_ZEROES) ||
 288            unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
 289                target_bio_nr = dm_bio_get_target_bio_nr(bio);
 290                BUG_ON(target_bio_nr >= sc->stripes);
 291                return stripe_map_range(sc, bio, target_bio_nr);
 292        }
 293
 294        stripe_map_sector(sc, bio->bi_iter.bi_sector,
 295                          &stripe, &bio->bi_iter.bi_sector);
 296
 297        bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start;
 298        bio_set_dev(bio, sc->stripe[stripe].dev->bdev);
 299
 300        return DM_MAPIO_REMAPPED;
 301}
 302
 303#if IS_ENABLED(CONFIG_DAX_DRIVER)
 304static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 305                long nr_pages, void **kaddr, pfn_t *pfn)
 306{
 307        sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
 308        struct stripe_c *sc = ti->private;
 309        struct dax_device *dax_dev;
 310        struct block_device *bdev;
 311        uint32_t stripe;
 312        long ret;
 313
 314        stripe_map_sector(sc, sector, &stripe, &dev_sector);
 315        dev_sector += sc->stripe[stripe].physical_start;
 316        dax_dev = sc->stripe[stripe].dev->dax_dev;
 317        bdev = sc->stripe[stripe].dev->bdev;
 318
 319        ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff);
 320        if (ret)
 321                return ret;
 322        return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
 323}
 324
 325static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 326                void *addr, size_t bytes, struct iov_iter *i)
 327{
 328        sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
 329        struct stripe_c *sc = ti->private;
 330        struct dax_device *dax_dev;
 331        struct block_device *bdev;
 332        uint32_t stripe;
 333
 334        stripe_map_sector(sc, sector, &stripe, &dev_sector);
 335        dev_sector += sc->stripe[stripe].physical_start;
 336        dax_dev = sc->stripe[stripe].dev->dax_dev;
 337        bdev = sc->stripe[stripe].dev->bdev;
 338
 339        if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
 340                return 0;
 341        return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 342}
 343
 344static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff,
 345                void *addr, size_t bytes, struct iov_iter *i)
 346{
 347        sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
 348        struct stripe_c *sc = ti->private;
 349        struct dax_device *dax_dev;
 350        struct block_device *bdev;
 351        uint32_t stripe;
 352
 353        stripe_map_sector(sc, sector, &stripe, &dev_sector);
 354        dev_sector += sc->stripe[stripe].physical_start;
 355        dax_dev = sc->stripe[stripe].dev->dax_dev;
 356        bdev = sc->stripe[stripe].dev->bdev;
 357
 358        if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
 359                return 0;
 360        return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i);
 361}
 362
 363static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff,
 364                                      size_t nr_pages)
 365{
 366        int ret;
 367        sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
 368        struct stripe_c *sc = ti->private;
 369        struct dax_device *dax_dev;
 370        struct block_device *bdev;
 371        uint32_t stripe;
 372
 373        stripe_map_sector(sc, sector, &stripe, &dev_sector);
 374        dev_sector += sc->stripe[stripe].physical_start;
 375        dax_dev = sc->stripe[stripe].dev->dax_dev;
 376        bdev = sc->stripe[stripe].dev->bdev;
 377
 378        ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff);
 379        if (ret)
 380                return ret;
 381        return dax_zero_page_range(dax_dev, pgoff, nr_pages);
 382}
 383
 384#else
 385#define stripe_dax_direct_access NULL
 386#define stripe_dax_copy_from_iter NULL
 387#define stripe_dax_copy_to_iter NULL
 388#define stripe_dax_zero_page_range NULL
 389#endif
 390
 391/*
 392 * Stripe status:
 393 *
 394 * INFO
 395 * #stripes [stripe_name <stripe_name>] [group word count]
 396 * [error count 'A|D' <error count 'A|D'>]
 397 *
 398 * TABLE
 399 * #stripes [stripe chunk size]
 400 * [stripe_name physical_start <stripe_name physical_start>]
 401 *
 402 */
 403
 404static void stripe_status(struct dm_target *ti, status_type_t type,
 405                          unsigned status_flags, char *result, unsigned maxlen)
 406{
 407        struct stripe_c *sc = (struct stripe_c *) ti->private;
 408        unsigned int sz = 0;
 409        unsigned int i;
 410
 411        switch (type) {
 412        case STATUSTYPE_INFO:
 413                DMEMIT("%d ", sc->stripes);
 414                for (i = 0; i < sc->stripes; i++)  {
 415                        DMEMIT("%s ", sc->stripe[i].dev->name);
 416                }
 417                DMEMIT("1 ");
 418                for (i = 0; i < sc->stripes; i++) {
 419                        DMEMIT("%c", atomic_read(&(sc->stripe[i].error_count)) ?
 420                               'D' : 'A');
 421                }
 422                break;
 423
 424        case STATUSTYPE_TABLE:
 425                DMEMIT("%d %llu", sc->stripes,
 426                        (unsigned long long)sc->chunk_size);
 427                for (i = 0; i < sc->stripes; i++)
 428                        DMEMIT(" %s %llu", sc->stripe[i].dev->name,
 429                            (unsigned long long)sc->stripe[i].physical_start);
 430                break;
 431
 432        case STATUSTYPE_IMA:
 433                DMEMIT_TARGET_NAME_VERSION(ti->type);
 434                DMEMIT(",stripes=%d,chunk_size=%llu", sc->stripes,
 435                       (unsigned long long)sc->chunk_size);
 436
 437                for (i = 0; i < sc->stripes; i++) {
 438                        DMEMIT(",stripe_%d_device_name=%s", i, sc->stripe[i].dev->name);
 439                        DMEMIT(",stripe_%d_physical_start=%llu", i,
 440                               (unsigned long long)sc->stripe[i].physical_start);
 441                        DMEMIT(",stripe_%d_status=%c", i,
 442                               atomic_read(&(sc->stripe[i].error_count)) ? 'D' : 'A');
 443                }
 444                DMEMIT(";");
 445                break;
 446        }
 447}
 448
 449static int stripe_end_io(struct dm_target *ti, struct bio *bio,
 450                blk_status_t *error)
 451{
 452        unsigned i;
 453        char major_minor[16];
 454        struct stripe_c *sc = ti->private;
 455
 456        if (!*error)
 457                return DM_ENDIO_DONE; /* I/O complete */
 458
 459        if (bio->bi_opf & REQ_RAHEAD)
 460                return DM_ENDIO_DONE;
 461
 462        if (*error == BLK_STS_NOTSUPP)
 463                return DM_ENDIO_DONE;
 464
 465        memset(major_minor, 0, sizeof(major_minor));
 466        sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)));
 467
 468        /*
 469         * Test to see which stripe drive triggered the event
 470         * and increment error count for all stripes on that device.
 471         * If the error count for a given device exceeds the threshold
 472         * value we will no longer trigger any further events.
 473         */
 474        for (i = 0; i < sc->stripes; i++)
 475                if (!strcmp(sc->stripe[i].dev->name, major_minor)) {
 476                        atomic_inc(&(sc->stripe[i].error_count));
 477                        if (atomic_read(&(sc->stripe[i].error_count)) <
 478                            DM_IO_ERROR_THRESHOLD)
 479                                schedule_work(&sc->trigger_event);
 480                }
 481
 482        return DM_ENDIO_DONE;
 483}
 484
 485static int stripe_iterate_devices(struct dm_target *ti,
 486                                  iterate_devices_callout_fn fn, void *data)
 487{
 488        struct stripe_c *sc = ti->private;
 489        int ret = 0;
 490        unsigned i = 0;
 491
 492        do {
 493                ret = fn(ti, sc->stripe[i].dev,
 494                         sc->stripe[i].physical_start,
 495                         sc->stripe_width, data);
 496        } while (!ret && ++i < sc->stripes);
 497
 498        return ret;
 499}
 500
 501static void stripe_io_hints(struct dm_target *ti,
 502                            struct queue_limits *limits)
 503{
 504        struct stripe_c *sc = ti->private;
 505        unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT;
 506
 507        blk_limits_io_min(limits, chunk_size);
 508        blk_limits_io_opt(limits, chunk_size * sc->stripes);
 509}
 510
 511static struct target_type stripe_target = {
 512        .name   = "striped",
 513        .version = {1, 6, 0},
 514        .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
 515        .module = THIS_MODULE,
 516        .ctr    = stripe_ctr,
 517        .dtr    = stripe_dtr,
 518        .map    = stripe_map,
 519        .end_io = stripe_end_io,
 520        .status = stripe_status,
 521        .iterate_devices = stripe_iterate_devices,
 522        .io_hints = stripe_io_hints,
 523        .direct_access = stripe_dax_direct_access,
 524        .dax_copy_from_iter = stripe_dax_copy_from_iter,
 525        .dax_copy_to_iter = stripe_dax_copy_to_iter,
 526        .dax_zero_page_range = stripe_dax_zero_page_range,
 527};
 528
 529int __init dm_stripe_init(void)
 530{
 531        int r;
 532
 533        r = dm_register_target(&stripe_target);
 534        if (r < 0)
 535                DMWARN("target registration failed");
 536
 537        return r;
 538}
 539
 540void dm_stripe_exit(void)
 541{
 542        dm_unregister_target(&stripe_target);
 543}
 544