linux/drivers/md/dm-table.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2001 Sistina Software (UK) Limited.
   3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
   4 *
   5 * This file is released under the GPL.
   6 */
   7
   8#include "dm.h"
   9
  10#include <linux/module.h>
  11#include <linux/vmalloc.h>
  12#include <linux/blkdev.h>
  13#include <linux/namei.h>
  14#include <linux/ctype.h>
  15#include <linux/slab.h>
  16#include <linux/interrupt.h>
  17#include <linux/mutex.h>
  18#include <linux/delay.h>
  19#include <asm/atomic.h>
  20
  21#define DM_MSG_PREFIX "table"
  22
  23#define MAX_DEPTH 16
  24#define NODE_SIZE L1_CACHE_BYTES
  25#define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t))
  26#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
  27
  28/*
  29 * The table has always exactly one reference from either mapped_device->map
  30 * or hash_cell->new_map. This reference is not counted in table->holders.
  31 * A pair of dm_create_table/dm_destroy_table functions is used for table
  32 * creation/destruction.
  33 *
  34 * Temporary references from the other code increase table->holders. A pair
  35 * of dm_table_get/dm_table_put functions is used to manipulate it.
  36 *
  37 * When the table is about to be destroyed, we wait for table->holders to
  38 * drop to zero.
  39 */
  40
  41struct dm_table {
  42        struct mapped_device *md;
  43        atomic_t holders;
  44        unsigned type;
  45
  46        /* btree table */
  47        unsigned int depth;
  48        unsigned int counts[MAX_DEPTH]; /* in nodes */
  49        sector_t *index[MAX_DEPTH];
  50
  51        unsigned int num_targets;
  52        unsigned int num_allocated;
  53        sector_t *highs;
  54        struct dm_target *targets;
  55
  56        /*
  57         * Indicates the rw permissions for the new logical
  58         * device.  This should be a combination of FMODE_READ
  59         * and FMODE_WRITE.
  60         */
  61        fmode_t mode;
  62
  63        /* a list of devices used by this table */
  64        struct list_head devices;
  65
  66        /* events get handed up using this callback */
  67        void (*event_fn)(void *);
  68        void *event_context;
  69
  70        struct dm_md_mempools *mempools;
  71};
  72
  73/*
  74 * Similar to ceiling(log_size(n))
  75 */
  76static unsigned int int_log(unsigned int n, unsigned int base)
  77{
  78        int result = 0;
  79
  80        while (n > 1) {
  81                n = dm_div_up(n, base);
  82                result++;
  83        }
  84
  85        return result;
  86}
  87
  88/*
  89 * Calculate the index of the child node of the n'th node k'th key.
  90 */
  91static inline unsigned int get_child(unsigned int n, unsigned int k)
  92{
  93        return (n * CHILDREN_PER_NODE) + k;
  94}
  95
  96/*
  97 * Return the n'th node of level l from table t.
  98 */
  99static inline sector_t *get_node(struct dm_table *t,
 100                                 unsigned int l, unsigned int n)
 101{
 102        return t->index[l] + (n * KEYS_PER_NODE);
 103}
 104
 105/*
 106 * Return the highest key that you could lookup from the n'th
 107 * node on level l of the btree.
 108 */
 109static sector_t high(struct dm_table *t, unsigned int l, unsigned int n)
 110{
 111        for (; l < t->depth - 1; l++)
 112                n = get_child(n, CHILDREN_PER_NODE - 1);
 113
 114        if (n >= t->counts[l])
 115                return (sector_t) - 1;
 116
 117        return get_node(t, l, n)[KEYS_PER_NODE - 1];
 118}
 119
 120/*
 121 * Fills in a level of the btree based on the highs of the level
 122 * below it.
 123 */
 124static int setup_btree_index(unsigned int l, struct dm_table *t)
 125{
 126        unsigned int n, k;
 127        sector_t *node;
 128
 129        for (n = 0U; n < t->counts[l]; n++) {
 130                node = get_node(t, l, n);
 131
 132                for (k = 0U; k < KEYS_PER_NODE; k++)
 133                        node[k] = high(t, l + 1, get_child(n, k));
 134        }
 135
 136        return 0;
 137}
 138
 139void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size)
 140{
 141        unsigned long size;
 142        void *addr;
 143
 144        /*
 145         * Check that we're not going to overflow.
 146         */
 147        if (nmemb > (ULONG_MAX / elem_size))
 148                return NULL;
 149
 150        size = nmemb * elem_size;
 151        addr = vmalloc(size);
 152        if (addr)
 153                memset(addr, 0, size);
 154
 155        return addr;
 156}
 157
 158/*
 159 * highs, and targets are managed as dynamic arrays during a
 160 * table load.
 161 */
 162static int alloc_targets(struct dm_table *t, unsigned int num)
 163{
 164        sector_t *n_highs;
 165        struct dm_target *n_targets;
 166        int n = t->num_targets;
 167
 168        /*
 169         * Allocate both the target array and offset array at once.
 170         * Append an empty entry to catch sectors beyond the end of
 171         * the device.
 172         */
 173        n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) +
 174                                          sizeof(sector_t));
 175        if (!n_highs)
 176                return -ENOMEM;
 177
 178        n_targets = (struct dm_target *) (n_highs + num);
 179
 180        if (n) {
 181                memcpy(n_highs, t->highs, sizeof(*n_highs) * n);
 182                memcpy(n_targets, t->targets, sizeof(*n_targets) * n);
 183        }
 184
 185        memset(n_highs + n, -1, sizeof(*n_highs) * (num - n));
 186        vfree(t->highs);
 187
 188        t->num_allocated = num;
 189        t->highs = n_highs;
 190        t->targets = n_targets;
 191
 192        return 0;
 193}
 194
 195int dm_table_create(struct dm_table **result, fmode_t mode,
 196                    unsigned num_targets, struct mapped_device *md)
 197{
 198        struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL);
 199
 200        if (!t)
 201                return -ENOMEM;
 202
 203        INIT_LIST_HEAD(&t->devices);
 204        atomic_set(&t->holders, 0);
 205
 206        if (!num_targets)
 207                num_targets = KEYS_PER_NODE;
 208
 209        num_targets = dm_round_up(num_targets, KEYS_PER_NODE);
 210
 211        if (alloc_targets(t, num_targets)) {
 212                kfree(t);
 213                t = NULL;
 214                return -ENOMEM;
 215        }
 216
 217        t->mode = mode;
 218        t->md = md;
 219        *result = t;
 220        return 0;
 221}
 222
 223static void free_devices(struct list_head *devices)
 224{
 225        struct list_head *tmp, *next;
 226
 227        list_for_each_safe(tmp, next, devices) {
 228                struct dm_dev_internal *dd =
 229                    list_entry(tmp, struct dm_dev_internal, list);
 230                DMWARN("dm_table_destroy: dm_put_device call missing for %s",
 231                       dd->dm_dev.name);
 232                kfree(dd);
 233        }
 234}
 235
 236void dm_table_destroy(struct dm_table *t)
 237{
 238        unsigned int i;
 239
 240        while (atomic_read(&t->holders))
 241                msleep(1);
 242        smp_mb();
 243
 244        /* free the indexes (see dm_table_complete) */
 245        if (t->depth >= 2)
 246                vfree(t->index[t->depth - 2]);
 247
 248        /* free the targets */
 249        for (i = 0; i < t->num_targets; i++) {
 250                struct dm_target *tgt = t->targets + i;
 251
 252                if (tgt->type->dtr)
 253                        tgt->type->dtr(tgt);
 254
 255                dm_put_target_type(tgt->type);
 256        }
 257
 258        vfree(t->highs);
 259
 260        /* free the device list */
 261        if (t->devices.next != &t->devices)
 262                free_devices(&t->devices);
 263
 264        dm_free_md_mempools(t->mempools);
 265
 266        kfree(t);
 267}
 268
 269void dm_table_get(struct dm_table *t)
 270{
 271        atomic_inc(&t->holders);
 272}
 273
 274void dm_table_put(struct dm_table *t)
 275{
 276        if (!t)
 277                return;
 278
 279        smp_mb__before_atomic_dec();
 280        atomic_dec(&t->holders);
 281}
 282
 283/*
 284 * Checks to see if we need to extend highs or targets.
 285 */
 286static inline int check_space(struct dm_table *t)
 287{
 288        if (t->num_targets >= t->num_allocated)
 289                return alloc_targets(t, t->num_allocated * 2);
 290
 291        return 0;
 292}
 293
 294/*
 295 * See if we've already got a device in the list.
 296 */
 297static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
 298{
 299        struct dm_dev_internal *dd;
 300
 301        list_for_each_entry (dd, l, list)
 302                if (dd->dm_dev.bdev->bd_dev == dev)
 303                        return dd;
 304
 305        return NULL;
 306}
 307
 308/*
 309 * Open a device so we can use it as a map destination.
 310 */
 311static int open_dev(struct dm_dev_internal *d, dev_t dev,
 312                    struct mapped_device *md)
 313{
 314        static char *_claim_ptr = "I belong to device-mapper";
 315        struct block_device *bdev;
 316
 317        int r;
 318
 319        BUG_ON(d->dm_dev.bdev);
 320
 321        bdev = open_by_devnum(dev, d->dm_dev.mode);
 322        if (IS_ERR(bdev))
 323                return PTR_ERR(bdev);
 324        r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
 325        if (r)
 326                blkdev_put(bdev, d->dm_dev.mode);
 327        else
 328                d->dm_dev.bdev = bdev;
 329        return r;
 330}
 331
 332/*
 333 * Close a device that we've been using.
 334 */
 335static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
 336{
 337        if (!d->dm_dev.bdev)
 338                return;
 339
 340        bd_release_from_disk(d->dm_dev.bdev, dm_disk(md));
 341        blkdev_put(d->dm_dev.bdev, d->dm_dev.mode);
 342        d->dm_dev.bdev = NULL;
 343}
 344
 345/*
 346 * If possible, this checks an area of a destination device is invalid.
 347 */
 348static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
 349                                  sector_t start, sector_t len, void *data)
 350{
 351        struct queue_limits *limits = data;
 352        struct block_device *bdev = dev->bdev;
 353        sector_t dev_size =
 354                i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
 355        unsigned short logical_block_size_sectors =
 356                limits->logical_block_size >> SECTOR_SHIFT;
 357        char b[BDEVNAME_SIZE];
 358
 359        if (!dev_size)
 360                return 0;
 361
 362        if ((start >= dev_size) || (start + len > dev_size)) {
 363                DMWARN("%s: %s too small for target: "
 364                       "start=%llu, len=%llu, dev_size=%llu",
 365                       dm_device_name(ti->table->md), bdevname(bdev, b),
 366                       (unsigned long long)start,
 367                       (unsigned long long)len,
 368                       (unsigned long long)dev_size);
 369                return 1;
 370        }
 371
 372        if (logical_block_size_sectors <= 1)
 373                return 0;
 374
 375        if (start & (logical_block_size_sectors - 1)) {
 376                DMWARN("%s: start=%llu not aligned to h/w "
 377                       "logical block size %u of %s",
 378                       dm_device_name(ti->table->md),
 379                       (unsigned long long)start,
 380                       limits->logical_block_size, bdevname(bdev, b));
 381                return 1;
 382        }
 383
 384        if (len & (logical_block_size_sectors - 1)) {
 385                DMWARN("%s: len=%llu not aligned to h/w "
 386                       "logical block size %u of %s",
 387                       dm_device_name(ti->table->md),
 388                       (unsigned long long)len,
 389                       limits->logical_block_size, bdevname(bdev, b));
 390                return 1;
 391        }
 392
 393        return 0;
 394}
 395
 396/*
 397 * This upgrades the mode on an already open dm_dev, being
 398 * careful to leave things as they were if we fail to reopen the
 399 * device and not to touch the existing bdev field in case
 400 * it is accessed concurrently inside dm_table_any_congested().
 401 */
 402static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
 403                        struct mapped_device *md)
 404{
 405        int r;
 406        struct dm_dev_internal dd_new, dd_old;
 407
 408        dd_new = dd_old = *dd;
 409
 410        dd_new.dm_dev.mode |= new_mode;
 411        dd_new.dm_dev.bdev = NULL;
 412
 413        r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
 414        if (r)
 415                return r;
 416
 417        dd->dm_dev.mode |= new_mode;
 418        close_dev(&dd_old, md);
 419
 420        return 0;
 421}
 422
 423/*
 424 * Add a device to the list, or just increment the usage count if
 425 * it's already present.
 426 */
 427static int __table_get_device(struct dm_table *t, struct dm_target *ti,
 428                              const char *path, sector_t start, sector_t len,
 429                              fmode_t mode, struct dm_dev **result)
 430{
 431        int r;
 432        dev_t uninitialized_var(dev);
 433        struct dm_dev_internal *dd;
 434        unsigned int major, minor;
 435
 436        BUG_ON(!t);
 437
 438        if (sscanf(path, "%u:%u", &major, &minor) == 2) {
 439                /* Extract the major/minor numbers */
 440                dev = MKDEV(major, minor);
 441                if (MAJOR(dev) != major || MINOR(dev) != minor)
 442                        return -EOVERFLOW;
 443        } else {
 444                /* convert the path to a device */
 445                struct block_device *bdev = lookup_bdev(path);
 446
 447                if (IS_ERR(bdev))
 448                        return PTR_ERR(bdev);
 449                dev = bdev->bd_dev;
 450                bdput(bdev);
 451        }
 452
 453        dd = find_device(&t->devices, dev);
 454        if (!dd) {
 455                dd = kmalloc(sizeof(*dd), GFP_KERNEL);
 456                if (!dd)
 457                        return -ENOMEM;
 458
 459                dd->dm_dev.mode = mode;
 460                dd->dm_dev.bdev = NULL;
 461
 462                if ((r = open_dev(dd, dev, t->md))) {
 463                        kfree(dd);
 464                        return r;
 465                }
 466
 467                format_dev_t(dd->dm_dev.name, dev);
 468
 469                atomic_set(&dd->count, 0);
 470                list_add(&dd->list, &t->devices);
 471
 472        } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) {
 473                r = upgrade_mode(dd, mode, t->md);
 474                if (r)
 475                        return r;
 476        }
 477        atomic_inc(&dd->count);
 478
 479        *result = &dd->dm_dev;
 480        return 0;
 481}
 482
 483/*
 484 * Returns the minimum that is _not_ zero, unless both are zero.
 485 */
 486#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 487
 488int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 489                         sector_t start, sector_t len, void *data)
 490{
 491        struct queue_limits *limits = data;
 492        struct block_device *bdev = dev->bdev;
 493        struct request_queue *q = bdev_get_queue(bdev);
 494        char b[BDEVNAME_SIZE];
 495
 496        if (unlikely(!q)) {
 497                DMWARN("%s: Cannot set limits for nonexistent device %s",
 498                       dm_device_name(ti->table->md), bdevname(bdev, b));
 499                return 0;
 500        }
 501
 502        if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
 503                DMWARN("%s: target device %s is misaligned: "
 504                       "physical_block_size=%u, logical_block_size=%u, "
 505                       "alignment_offset=%u, start=%llu",
 506                       dm_device_name(ti->table->md), bdevname(bdev, b),
 507                       q->limits.physical_block_size,
 508                       q->limits.logical_block_size,
 509                       q->limits.alignment_offset,
 510                       (unsigned long long) start << 9);
 511
 512
 513        /*
 514         * Check if merge fn is supported.
 515         * If not we'll force DM to use PAGE_SIZE or
 516         * smaller I/O, just to be safe.
 517         */
 518
 519        if (q->merge_bvec_fn && !ti->type->merge)
 520                limits->max_sectors =
 521                        min_not_zero(limits->max_sectors,
 522                                     (unsigned int) (PAGE_SIZE >> 9));
 523        return 0;
 524}
 525EXPORT_SYMBOL_GPL(dm_set_device_limits);
 526
 527int dm_get_device(struct dm_target *ti, const char *path, sector_t start,
 528                  sector_t len, fmode_t mode, struct dm_dev **result)
 529{
 530        return __table_get_device(ti->table, ti, path,
 531                                  start, len, mode, result);
 532}
 533
 534
 535/*
 536 * Decrement a devices use count and remove it if necessary.
 537 */
 538void dm_put_device(struct dm_target *ti, struct dm_dev *d)
 539{
 540        struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal,
 541                                                  dm_dev);
 542
 543        if (atomic_dec_and_test(&dd->count)) {
 544                close_dev(dd, ti->table->md);
 545                list_del(&dd->list);
 546                kfree(dd);
 547        }
 548}
 549
 550/*
 551 * Checks to see if the target joins onto the end of the table.
 552 */
 553static int adjoin(struct dm_table *table, struct dm_target *ti)
 554{
 555        struct dm_target *prev;
 556
 557        if (!table->num_targets)
 558                return !ti->begin;
 559
 560        prev = &table->targets[table->num_targets - 1];
 561        return (ti->begin == (prev->begin + prev->len));
 562}
 563
 564/*
 565 * Used to dynamically allocate the arg array.
 566 */
 567static char **realloc_argv(unsigned *array_size, char **old_argv)
 568{
 569        char **argv;
 570        unsigned new_size;
 571
 572        new_size = *array_size ? *array_size * 2 : 64;
 573        argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL);
 574        if (argv) {
 575                memcpy(argv, old_argv, *array_size * sizeof(*argv));
 576                *array_size = new_size;
 577        }
 578
 579        kfree(old_argv);
 580        return argv;
 581}
 582
 583/*
 584 * Destructively splits up the argument list to pass to ctr.
 585 */
 586int dm_split_args(int *argc, char ***argvp, char *input)
 587{
 588        char *start, *end = input, *out, **argv = NULL;
 589        unsigned array_size = 0;
 590
 591        *argc = 0;
 592
 593        if (!input) {
 594                *argvp = NULL;
 595                return 0;
 596        }
 597
 598        argv = realloc_argv(&array_size, argv);
 599        if (!argv)
 600                return -ENOMEM;
 601
 602        while (1) {
 603                start = end;
 604
 605                /* Skip whitespace */
 606                while (*start && isspace(*start))
 607                        start++;
 608
 609                if (!*start)
 610                        break;  /* success, we hit the end */
 611
 612                /* 'out' is used to remove any back-quotes */
 613                end = out = start;
 614                while (*end) {
 615                        /* Everything apart from '\0' can be quoted */
 616                        if (*end == '\\' && *(end + 1)) {
 617                                *out++ = *(end + 1);
 618                                end += 2;
 619                                continue;
 620                        }
 621
 622                        if (isspace(*end))
 623                                break;  /* end of token */
 624
 625                        *out++ = *end++;
 626                }
 627
 628                /* have we already filled the array ? */
 629                if ((*argc + 1) > array_size) {
 630                        argv = realloc_argv(&array_size, argv);
 631                        if (!argv)
 632                                return -ENOMEM;
 633                }
 634
 635                /* we know this is whitespace */
 636                if (*end)
 637                        end++;
 638
 639                /* terminate the string and put it in the array */
 640                *out = '\0';
 641                argv[*argc] = start;
 642                (*argc)++;
 643        }
 644
 645        *argvp = argv;
 646        return 0;
 647}
 648
 649/*
 650 * Impose necessary and sufficient conditions on a devices's table such
 651 * that any incoming bio which respects its logical_block_size can be
 652 * processed successfully.  If it falls across the boundary between
 653 * two or more targets, the size of each piece it gets split into must
 654 * be compatible with the logical_block_size of the target processing it.
 655 */
 656static int validate_hardware_logical_block_alignment(struct dm_table *table,
 657                                                 struct queue_limits *limits)
 658{
 659        /*
 660         * This function uses arithmetic modulo the logical_block_size
 661         * (in units of 512-byte sectors).
 662         */
 663        unsigned short device_logical_block_size_sects =
 664                limits->logical_block_size >> SECTOR_SHIFT;
 665
 666        /*
 667         * Offset of the start of the next table entry, mod logical_block_size.
 668         */
 669        unsigned short next_target_start = 0;
 670
 671        /*
 672         * Given an aligned bio that extends beyond the end of a
 673         * target, how many sectors must the next target handle?
 674         */
 675        unsigned short remaining = 0;
 676
 677        struct dm_target *uninitialized_var(ti);
 678        struct queue_limits ti_limits;
 679        unsigned i = 0;
 680
 681        /*
 682         * Check each entry in the table in turn.
 683         */
 684        while (i < dm_table_get_num_targets(table)) {
 685                ti = dm_table_get_target(table, i++);
 686
 687                blk_set_default_limits(&ti_limits);
 688
 689                /* combine all target devices' limits */
 690                if (ti->type->iterate_devices)
 691                        ti->type->iterate_devices(ti, dm_set_device_limits,
 692                                                  &ti_limits);
 693
 694                /*
 695                 * If the remaining sectors fall entirely within this
 696                 * table entry are they compatible with its logical_block_size?
 697                 */
 698                if (remaining < ti->len &&
 699                    remaining & ((ti_limits.logical_block_size >>
 700                                  SECTOR_SHIFT) - 1))
 701                        break;  /* Error */
 702
 703                next_target_start =
 704                    (unsigned short) ((next_target_start + ti->len) &
 705                                      (device_logical_block_size_sects - 1));
 706                remaining = next_target_start ?
 707                    device_logical_block_size_sects - next_target_start : 0;
 708        }
 709
 710        if (remaining) {
 711                DMWARN("%s: table line %u (start sect %llu len %llu) "
 712                       "not aligned to h/w logical block size %u",
 713                       dm_device_name(table->md), i,
 714                       (unsigned long long) ti->begin,
 715                       (unsigned long long) ti->len,
 716                       limits->logical_block_size);
 717                return -EINVAL;
 718        }
 719
 720        return 0;
 721}
 722
 723int dm_table_add_target(struct dm_table *t, const char *type,
 724                        sector_t start, sector_t len, char *params)
 725{
 726        int r = -EINVAL, argc;
 727        char **argv;
 728        struct dm_target *tgt;
 729
 730        if ((r = check_space(t)))
 731                return r;
 732
 733        tgt = t->targets + t->num_targets;
 734        memset(tgt, 0, sizeof(*tgt));
 735
 736        if (!len) {
 737                DMERR("%s: zero-length target", dm_device_name(t->md));
 738                return -EINVAL;
 739        }
 740
 741        tgt->type = dm_get_target_type(type);
 742        if (!tgt->type) {
 743                DMERR("%s: %s: unknown target type", dm_device_name(t->md),
 744                      type);
 745                return -EINVAL;
 746        }
 747
 748        tgt->table = t;
 749        tgt->begin = start;
 750        tgt->len = len;
 751        tgt->error = "Unknown error";
 752
 753        /*
 754         * Does this target adjoin the previous one ?
 755         */
 756        if (!adjoin(t, tgt)) {
 757                tgt->error = "Gap in table";
 758                r = -EINVAL;
 759                goto bad;
 760        }
 761
 762        r = dm_split_args(&argc, &argv, params);
 763        if (r) {
 764                tgt->error = "couldn't split parameters (insufficient memory)";
 765                goto bad;
 766        }
 767
 768        r = tgt->type->ctr(tgt, argc, argv);
 769        kfree(argv);
 770        if (r)
 771                goto bad;
 772
 773        t->highs[t->num_targets++] = tgt->begin + tgt->len - 1;
 774
 775        return 0;
 776
 777 bad:
 778        DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error);
 779        dm_put_target_type(tgt->type);
 780        return r;
 781}
 782
 783int dm_table_set_type(struct dm_table *t)
 784{
 785        unsigned i;
 786        unsigned bio_based = 0, request_based = 0;
 787        struct dm_target *tgt;
 788        struct dm_dev_internal *dd;
 789        struct list_head *devices;
 790
 791        for (i = 0; i < t->num_targets; i++) {
 792                tgt = t->targets + i;
 793                if (dm_target_request_based(tgt))
 794                        request_based = 1;
 795                else
 796                        bio_based = 1;
 797
 798                if (bio_based && request_based) {
 799                        DMWARN("Inconsistent table: different target types"
 800                               " can't be mixed up");
 801                        return -EINVAL;
 802                }
 803        }
 804
 805        if (bio_based) {
 806                /* We must use this table as bio-based */
 807                t->type = DM_TYPE_BIO_BASED;
 808                return 0;
 809        }
 810
 811        BUG_ON(!request_based); /* No targets in this table */
 812
 813        /* Non-request-stackable devices can't be used for request-based dm */
 814        devices = dm_table_get_devices(t);
 815        list_for_each_entry(dd, devices, list) {
 816                if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) {
 817                        DMWARN("table load rejected: including"
 818                               " non-request-stackable devices");
 819                        return -EINVAL;
 820                }
 821        }
 822
 823        /*
 824         * Request-based dm supports only tables that have a single target now.
 825         * To support multiple targets, request splitting support is needed,
 826         * and that needs lots of changes in the block-layer.
 827         * (e.g. request completion process for partial completion.)
 828         */
 829        if (t->num_targets > 1) {
 830                DMWARN("Request-based dm doesn't support multiple targets yet");
 831                return -EINVAL;
 832        }
 833
 834        t->type = DM_TYPE_REQUEST_BASED;
 835
 836        return 0;
 837}
 838
 839unsigned dm_table_get_type(struct dm_table *t)
 840{
 841        return t->type;
 842}
 843
 844bool dm_table_request_based(struct dm_table *t)
 845{
 846        return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
 847}
 848
 849int dm_table_alloc_md_mempools(struct dm_table *t)
 850{
 851        unsigned type = dm_table_get_type(t);
 852
 853        if (unlikely(type == DM_TYPE_NONE)) {
 854                DMWARN("no table type is set, can't allocate mempools");
 855                return -EINVAL;
 856        }
 857
 858        t->mempools = dm_alloc_md_mempools(type);
 859        if (!t->mempools)
 860                return -ENOMEM;
 861
 862        return 0;
 863}
 864
 865void dm_table_free_md_mempools(struct dm_table *t)
 866{
 867        dm_free_md_mempools(t->mempools);
 868        t->mempools = NULL;
 869}
 870
 871struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
 872{
 873        return t->mempools;
 874}
 875
 876static int setup_indexes(struct dm_table *t)
 877{
 878        int i;
 879        unsigned int total = 0;
 880        sector_t *indexes;
 881
 882        /* allocate the space for *all* the indexes */
 883        for (i = t->depth - 2; i >= 0; i--) {
 884                t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE);
 885                total += t->counts[i];
 886        }
 887
 888        indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE);
 889        if (!indexes)
 890                return -ENOMEM;
 891
 892        /* set up internal nodes, bottom-up */
 893        for (i = t->depth - 2; i >= 0; i--) {
 894                t->index[i] = indexes;
 895                indexes += (KEYS_PER_NODE * t->counts[i]);
 896                setup_btree_index(i, t);
 897        }
 898
 899        return 0;
 900}
 901
 902/*
 903 * Builds the btree to index the map.
 904 */
 905int dm_table_complete(struct dm_table *t)
 906{
 907        int r = 0;
 908        unsigned int leaf_nodes;
 909
 910        /* how many indexes will the btree have ? */
 911        leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE);
 912        t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE);
 913
 914        /* leaf layer has already been set up */
 915        t->counts[t->depth - 1] = leaf_nodes;
 916        t->index[t->depth - 1] = t->highs;
 917
 918        if (t->depth >= 2)
 919                r = setup_indexes(t);
 920
 921        return r;
 922}
 923
 924static DEFINE_MUTEX(_event_lock);
 925void dm_table_event_callback(struct dm_table *t,
 926                             void (*fn)(void *), void *context)
 927{
 928        mutex_lock(&_event_lock);
 929        t->event_fn = fn;
 930        t->event_context = context;
 931        mutex_unlock(&_event_lock);
 932}
 933
 934void dm_table_event(struct dm_table *t)
 935{
 936        /*
 937         * You can no longer call dm_table_event() from interrupt
 938         * context, use a bottom half instead.
 939         */
 940        BUG_ON(in_interrupt());
 941
 942        mutex_lock(&_event_lock);
 943        if (t->event_fn)
 944                t->event_fn(t->event_context);
 945        mutex_unlock(&_event_lock);
 946}
 947
 948sector_t dm_table_get_size(struct dm_table *t)
 949{
 950        return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0;
 951}
 952
 953struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index)
 954{
 955        if (index >= t->num_targets)
 956                return NULL;
 957
 958        return t->targets + index;
 959}
 960
 961/*
 962 * Search the btree for the correct target.
 963 *
 964 * Caller should check returned pointer with dm_target_is_valid()
 965 * to trap I/O beyond end of device.
 966 */
 967struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector)
 968{
 969        unsigned int l, n = 0, k = 0;
 970        sector_t *node;
 971
 972        for (l = 0; l < t->depth; l++) {
 973                n = get_child(n, k);
 974                node = get_node(t, l, n);
 975
 976                for (k = 0; k < KEYS_PER_NODE; k++)
 977                        if (node[k] >= sector)
 978                                break;
 979        }
 980
 981        return &t->targets[(KEYS_PER_NODE * n) + k];
 982}
 983
 984/*
 985 * Establish the new table's queue_limits and validate them.
 986 */
 987int dm_calculate_queue_limits(struct dm_table *table,
 988                              struct queue_limits *limits)
 989{
 990        struct dm_target *uninitialized_var(ti);
 991        struct queue_limits ti_limits;
 992        unsigned i = 0;
 993
 994        blk_set_default_limits(limits);
 995
 996        while (i < dm_table_get_num_targets(table)) {
 997                blk_set_default_limits(&ti_limits);
 998
 999                ti = dm_table_get_target(table, i++);
1000
1001                if (!ti->type->iterate_devices)
1002                        goto combine_limits;
1003
1004                /*
1005                 * Combine queue limits of all the devices this target uses.
1006                 */
1007                ti->type->iterate_devices(ti, dm_set_device_limits,
1008                                          &ti_limits);
1009
1010                /* Set I/O hints portion of queue limits */
1011                if (ti->type->io_hints)
1012                        ti->type->io_hints(ti, &ti_limits);
1013
1014                /*
1015                 * Check each device area is consistent with the target's
1016                 * overall queue limits.
1017                 */
1018                if (ti->type->iterate_devices(ti, device_area_is_invalid,
1019                                              &ti_limits))
1020                        return -EINVAL;
1021
1022combine_limits:
1023                /*
1024                 * Merge this target's queue limits into the overall limits
1025                 * for the table.
1026                 */
1027                if (blk_stack_limits(limits, &ti_limits, 0) < 0)
1028                        DMWARN("%s: target device "
1029                               "(start sect %llu len %llu) "
1030                               "is misaligned",
1031                               dm_device_name(table->md),
1032                               (unsigned long long) ti->begin,
1033                               (unsigned long long) ti->len);
1034        }
1035
1036        return validate_hardware_logical_block_alignment(table, limits);
1037}
1038
1039/*
1040 * Set the integrity profile for this device if all devices used have
1041 * matching profiles.
1042 */
1043static void dm_table_set_integrity(struct dm_table *t)
1044{
1045        struct list_head *devices = dm_table_get_devices(t);
1046        struct dm_dev_internal *prev = NULL, *dd = NULL;
1047
1048        if (!blk_get_integrity(dm_disk(t->md)))
1049                return;
1050
1051        list_for_each_entry(dd, devices, list) {
1052                if (prev &&
1053                    blk_integrity_compare(prev->dm_dev.bdev->bd_disk,
1054                                          dd->dm_dev.bdev->bd_disk) < 0) {
1055                        DMWARN("%s: integrity not set: %s and %s mismatch",
1056                               dm_device_name(t->md),
1057                               prev->dm_dev.bdev->bd_disk->disk_name,
1058                               dd->dm_dev.bdev->bd_disk->disk_name);
1059                        goto no_integrity;
1060                }
1061                prev = dd;
1062        }
1063
1064        if (!prev || !bdev_get_integrity(prev->dm_dev.bdev))
1065                goto no_integrity;
1066
1067        blk_integrity_register(dm_disk(t->md),
1068                               bdev_get_integrity(prev->dm_dev.bdev));
1069
1070        return;
1071
1072no_integrity:
1073        blk_integrity_register(dm_disk(t->md), NULL);
1074
1075        return;
1076}
1077
1078void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1079                               struct queue_limits *limits)
1080{
1081        /*
1082         * Each target device in the table has a data area that should normally
1083         * be aligned such that the DM device's alignment_offset is 0.
1084         * FIXME: Propagate alignment_offsets up the stack and warn of
1085         *        sub-optimal or inconsistent settings.
1086         */
1087        limits->alignment_offset = 0;
1088        limits->misaligned = 0;
1089
1090        /*
1091         * Copy table's limits to the DM device's request_queue
1092         */
1093        q->limits = *limits;
1094
1095        if (limits->no_cluster)
1096                queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
1097        else
1098                queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q);
1099
1100        dm_table_set_integrity(t);
1101
1102        /*
1103         * QUEUE_FLAG_STACKABLE must be set after all queue settings are
1104         * visible to other CPUs because, once the flag is set, incoming bios
1105         * are processed by request-based dm, which refers to the queue
1106         * settings.
1107         * Until the flag set, bios are passed to bio-based dm and queued to
1108         * md->deferred where queue settings are not needed yet.
1109         * Those bios are passed to request-based dm at the resume time.
1110         */
1111        smp_mb();
1112        if (dm_table_request_based(t))
1113                queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q);
1114}
1115
1116unsigned int dm_table_get_num_targets(struct dm_table *t)
1117{
1118        return t->num_targets;
1119}
1120
1121struct list_head *dm_table_get_devices(struct dm_table *t)
1122{
1123        return &t->devices;
1124}
1125
1126fmode_t dm_table_get_mode(struct dm_table *t)
1127{
1128        return t->mode;
1129}
1130
1131static void suspend_targets(struct dm_table *t, unsigned postsuspend)
1132{
1133        int i = t->num_targets;
1134        struct dm_target *ti = t->targets;
1135
1136        while (i--) {
1137                if (postsuspend) {
1138                        if (ti->type->postsuspend)
1139                                ti->type->postsuspend(ti);
1140                } else if (ti->type->presuspend)
1141                        ti->type->presuspend(ti);
1142
1143                ti++;
1144        }
1145}
1146
1147void dm_table_presuspend_targets(struct dm_table *t)
1148{
1149        if (!t)
1150                return;
1151
1152        suspend_targets(t, 0);
1153}
1154
1155void dm_table_postsuspend_targets(struct dm_table *t)
1156{
1157        if (!t)
1158                return;
1159
1160        suspend_targets(t, 1);
1161}
1162
1163int dm_table_resume_targets(struct dm_table *t)
1164{
1165        int i, r = 0;
1166
1167        for (i = 0; i < t->num_targets; i++) {
1168                struct dm_target *ti = t->targets + i;
1169
1170                if (!ti->type->preresume)
1171                        continue;
1172
1173                r = ti->type->preresume(ti);
1174                if (r)
1175                        return r;
1176        }
1177
1178        for (i = 0; i < t->num_targets; i++) {
1179                struct dm_target *ti = t->targets + i;
1180
1181                if (ti->type->resume)
1182                        ti->type->resume(ti);
1183        }
1184
1185        return 0;
1186}
1187
1188int dm_table_any_congested(struct dm_table *t, int bdi_bits)
1189{
1190        struct dm_dev_internal *dd;
1191        struct list_head *devices = dm_table_get_devices(t);
1192        int r = 0;
1193
1194        list_for_each_entry(dd, devices, list) {
1195                struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
1196                char b[BDEVNAME_SIZE];
1197
1198                if (likely(q))
1199                        r |= bdi_congested(&q->backing_dev_info, bdi_bits);
1200                else
1201                        DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
1202                                     dm_device_name(t->md),
1203                                     bdevname(dd->dm_dev.bdev, b));
1204        }
1205
1206        return r;
1207}
1208
1209int dm_table_any_busy_target(struct dm_table *t)
1210{
1211        unsigned i;
1212        struct dm_target *ti;
1213
1214        for (i = 0; i < t->num_targets; i++) {
1215                ti = t->targets + i;
1216                if (ti->type->busy && ti->type->busy(ti))
1217                        return 1;
1218        }
1219
1220        return 0;
1221}
1222
1223void dm_table_unplug_all(struct dm_table *t)
1224{
1225        struct dm_dev_internal *dd;
1226        struct list_head *devices = dm_table_get_devices(t);
1227
1228        list_for_each_entry(dd, devices, list) {
1229                struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
1230                char b[BDEVNAME_SIZE];
1231
1232                if (likely(q))
1233                        blk_unplug(q);
1234                else
1235                        DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s",
1236                                     dm_device_name(t->md),
1237                                     bdevname(dd->dm_dev.bdev, b));
1238        }
1239}
1240
1241struct mapped_device *dm_table_get_md(struct dm_table *t)
1242{
1243        dm_get(t->md);
1244
1245        return t->md;
1246}
1247
1248EXPORT_SYMBOL(dm_vcalloc);
1249EXPORT_SYMBOL(dm_get_device);
1250EXPORT_SYMBOL(dm_put_device);
1251EXPORT_SYMBOL(dm_table_event);
1252EXPORT_SYMBOL(dm_table_get_size);
1253EXPORT_SYMBOL(dm_table_get_mode);
1254EXPORT_SYMBOL(dm_table_get_md);
1255EXPORT_SYMBOL(dm_table_put);
1256EXPORT_SYMBOL(dm_table_get);
1257EXPORT_SYMBOL(dm_table_unplug_all);
1258