linux/block/partitions/core.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 1991-1998  Linus Torvalds
   4 * Re-organised Feb 1998 Russell King
   5 * Copyright (C) 2020 Christoph Hellwig
   6 */
   7#include <linux/fs.h>
   8#include <linux/slab.h>
   9#include <linux/ctype.h>
  10#include <linux/genhd.h>
  11#include <linux/vmalloc.h>
  12#include <linux/blktrace_api.h>
  13#include <linux/raid/detect.h>
  14#include "check.h"
  15
  16static int (*check_part[])(struct parsed_partitions *) = {
  17        /*
  18         * Probe partition formats with tables at disk address 0
  19         * that also have an ADFS boot block at 0xdc0.
  20         */
  21#ifdef CONFIG_ACORN_PARTITION_ICS
  22        adfspart_check_ICS,
  23#endif
  24#ifdef CONFIG_ACORN_PARTITION_POWERTEC
  25        adfspart_check_POWERTEC,
  26#endif
  27#ifdef CONFIG_ACORN_PARTITION_EESOX
  28        adfspart_check_EESOX,
  29#endif
  30
  31        /*
  32         * Now move on to formats that only have partition info at
  33         * disk address 0xdc0.  Since these may also have stale
  34         * PC/BIOS partition tables, they need to come before
  35         * the msdos entry.
  36         */
  37#ifdef CONFIG_ACORN_PARTITION_CUMANA
  38        adfspart_check_CUMANA,
  39#endif
  40#ifdef CONFIG_ACORN_PARTITION_ADFS
  41        adfspart_check_ADFS,
  42#endif
  43
  44#ifdef CONFIG_CMDLINE_PARTITION
  45        cmdline_partition,
  46#endif
  47#ifdef CONFIG_EFI_PARTITION
  48        efi_partition,          /* this must come before msdos */
  49#endif
  50#ifdef CONFIG_SGI_PARTITION
  51        sgi_partition,
  52#endif
  53#ifdef CONFIG_LDM_PARTITION
  54        ldm_partition,          /* this must come before msdos */
  55#endif
  56#ifdef CONFIG_MSDOS_PARTITION
  57        msdos_partition,
  58#endif
  59#ifdef CONFIG_OSF_PARTITION
  60        osf_partition,
  61#endif
  62#ifdef CONFIG_SUN_PARTITION
  63        sun_partition,
  64#endif
  65#ifdef CONFIG_AMIGA_PARTITION
  66        amiga_partition,
  67#endif
  68#ifdef CONFIG_ATARI_PARTITION
  69        atari_partition,
  70#endif
  71#ifdef CONFIG_MAC_PARTITION
  72        mac_partition,
  73#endif
  74#ifdef CONFIG_ULTRIX_PARTITION
  75        ultrix_partition,
  76#endif
  77#ifdef CONFIG_IBM_PARTITION
  78        ibm_partition,
  79#endif
  80#ifdef CONFIG_KARMA_PARTITION
  81        karma_partition,
  82#endif
  83#ifdef CONFIG_SYSV68_PARTITION
  84        sysv68_partition,
  85#endif
  86        NULL
  87};
  88
  89static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
  90{
  91        spin_lock(&bdev->bd_size_lock);
  92        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
  93        spin_unlock(&bdev->bd_size_lock);
  94}
  95
  96static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
  97{
  98        struct parsed_partitions *state;
  99        int nr;
 100
 101        state = kzalloc(sizeof(*state), GFP_KERNEL);
 102        if (!state)
 103                return NULL;
 104
 105        nr = disk_max_parts(hd);
 106        state->parts = vzalloc(array_size(nr, sizeof(state->parts[0])));
 107        if (!state->parts) {
 108                kfree(state);
 109                return NULL;
 110        }
 111
 112        state->limit = nr;
 113
 114        return state;
 115}
 116
 117static void free_partitions(struct parsed_partitions *state)
 118{
 119        vfree(state->parts);
 120        kfree(state);
 121}
 122
 123static struct parsed_partitions *check_partition(struct gendisk *hd,
 124                struct block_device *bdev)
 125{
 126        struct parsed_partitions *state;
 127        int i, res, err;
 128
 129        state = allocate_partitions(hd);
 130        if (!state)
 131                return NULL;
 132        state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
 133        if (!state->pp_buf) {
 134                free_partitions(state);
 135                return NULL;
 136        }
 137        state->pp_buf[0] = '\0';
 138
 139        state->bdev = bdev;
 140        disk_name(hd, 0, state->name);
 141        snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
 142        if (isdigit(state->name[strlen(state->name)-1]))
 143                sprintf(state->name, "p");
 144
 145        i = res = err = 0;
 146        while (!res && check_part[i]) {
 147                memset(state->parts, 0, state->limit * sizeof(state->parts[0]));
 148                res = check_part[i++](state);
 149                if (res < 0) {
 150                        /*
 151                         * We have hit an I/O error which we don't report now.
 152                         * But record it, and let the others do their job.
 153                         */
 154                        err = res;
 155                        res = 0;
 156                }
 157
 158        }
 159        if (res > 0) {
 160                printk(KERN_INFO "%s", state->pp_buf);
 161
 162                free_page((unsigned long)state->pp_buf);
 163                return state;
 164        }
 165        if (state->access_beyond_eod)
 166                err = -ENOSPC;
 167        /*
 168         * The partition is unrecognized. So report I/O errors if there were any
 169         */
 170        if (err)
 171                res = err;
 172        if (res) {
 173                strlcat(state->pp_buf,
 174                        " unable to read partition table\n", PAGE_SIZE);
 175                printk(KERN_INFO "%s", state->pp_buf);
 176        }
 177
 178        free_page((unsigned long)state->pp_buf);
 179        free_partitions(state);
 180        return ERR_PTR(res);
 181}
 182
 183static ssize_t part_partition_show(struct device *dev,
 184                                   struct device_attribute *attr, char *buf)
 185{
 186        return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_partno);
 187}
 188
 189static ssize_t part_start_show(struct device *dev,
 190                               struct device_attribute *attr, char *buf)
 191{
 192        return sprintf(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect);
 193}
 194
 195static ssize_t part_ro_show(struct device *dev,
 196                            struct device_attribute *attr, char *buf)
 197{
 198        return sprintf(buf, "%d\n", bdev_read_only(dev_to_bdev(dev)));
 199}
 200
 201static ssize_t part_alignment_offset_show(struct device *dev,
 202                                          struct device_attribute *attr, char *buf)
 203{
 204        struct block_device *bdev = dev_to_bdev(dev);
 205
 206        return sprintf(buf, "%u\n",
 207                queue_limit_alignment_offset(&bdev->bd_disk->queue->limits,
 208                                bdev->bd_start_sect));
 209}
 210
 211static ssize_t part_discard_alignment_show(struct device *dev,
 212                                           struct device_attribute *attr, char *buf)
 213{
 214        struct block_device *bdev = dev_to_bdev(dev);
 215
 216        return sprintf(buf, "%u\n",
 217                queue_limit_discard_alignment(&bdev->bd_disk->queue->limits,
 218                                bdev->bd_start_sect));
 219}
 220
 221static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
 222static DEVICE_ATTR(start, 0444, part_start_show, NULL);
 223static DEVICE_ATTR(size, 0444, part_size_show, NULL);
 224static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
 225static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
 226static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
 227static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
 228static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
 229#ifdef CONFIG_FAIL_MAKE_REQUEST
 230static struct device_attribute dev_attr_fail =
 231        __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
 232#endif
 233
 234static struct attribute *part_attrs[] = {
 235        &dev_attr_partition.attr,
 236        &dev_attr_start.attr,
 237        &dev_attr_size.attr,
 238        &dev_attr_ro.attr,
 239        &dev_attr_alignment_offset.attr,
 240        &dev_attr_discard_alignment.attr,
 241        &dev_attr_stat.attr,
 242        &dev_attr_inflight.attr,
 243#ifdef CONFIG_FAIL_MAKE_REQUEST
 244        &dev_attr_fail.attr,
 245#endif
 246        NULL
 247};
 248
 249static struct attribute_group part_attr_group = {
 250        .attrs = part_attrs,
 251};
 252
 253static const struct attribute_group *part_attr_groups[] = {
 254        &part_attr_group,
 255#ifdef CONFIG_BLK_DEV_IO_TRACE
 256        &blk_trace_attr_group,
 257#endif
 258        NULL
 259};
 260
 261static void part_release(struct device *dev)
 262{
 263        blk_free_devt(dev->devt);
 264        bdput(dev_to_bdev(dev));
 265}
 266
 267static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
 268{
 269        struct block_device *part = dev_to_bdev(dev);
 270
 271        add_uevent_var(env, "PARTN=%u", part->bd_partno);
 272        if (part->bd_meta_info && part->bd_meta_info->volname[0])
 273                add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname);
 274        return 0;
 275}
 276
 277struct device_type part_type = {
 278        .name           = "partition",
 279        .groups         = part_attr_groups,
 280        .release        = part_release,
 281        .uevent         = part_uevent,
 282};
 283
 284/*
 285 * Must be called either with bd_mutex held, before a disk can be opened or
 286 * after all disk users are gone.
 287 */
 288static void delete_partition(struct block_device *part)
 289{
 290        fsync_bdev(part);
 291        __invalidate_device(part, true);
 292
 293        xa_erase(&part->bd_disk->part_tbl, part->bd_partno);
 294        kobject_put(part->bd_holder_dir);
 295        device_del(&part->bd_device);
 296
 297        /*
 298         * Remove the block device from the inode hash, so that it cannot be
 299         * looked up any more even when openers still hold references.
 300         */
 301        remove_inode_hash(part->bd_inode);
 302
 303        put_device(&part->bd_device);
 304}
 305
 306static ssize_t whole_disk_show(struct device *dev,
 307                               struct device_attribute *attr, char *buf)
 308{
 309        return 0;
 310}
 311static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
 312
 313/*
 314 * Must be called either with bd_mutex held, before a disk can be opened or
 315 * after all disk users are gone.
 316 */
 317static struct block_device *add_partition(struct gendisk *disk, int partno,
 318                                sector_t start, sector_t len, int flags,
 319                                struct partition_meta_info *info)
 320{
 321        dev_t devt = MKDEV(0, 0);
 322        struct device *ddev = disk_to_dev(disk);
 323        struct device *pdev;
 324        struct block_device *bdev;
 325        const char *dname;
 326        int err;
 327
 328        /*
 329         * disk_max_parts() won't be zero, either GENHD_FL_EXT_DEVT is set
 330         * or 'minors' is passed to alloc_disk().
 331         */
 332        if (partno >= disk_max_parts(disk))
 333                return ERR_PTR(-EINVAL);
 334
 335        /*
 336         * Partitions are not supported on zoned block devices that are used as
 337         * such.
 338         */
 339        switch (disk->queue->limits.zoned) {
 340        case BLK_ZONED_HM:
 341                pr_warn("%s: partitions not supported on host managed zoned block device\n",
 342                        disk->disk_name);
 343                return ERR_PTR(-ENXIO);
 344        case BLK_ZONED_HA:
 345                pr_info("%s: disabling host aware zoned block device support due to partitions\n",
 346                        disk->disk_name);
 347                blk_queue_set_zoned(disk, BLK_ZONED_NONE);
 348                break;
 349        case BLK_ZONED_NONE:
 350                break;
 351        }
 352
 353        if (xa_load(&disk->part_tbl, partno))
 354                return ERR_PTR(-EBUSY);
 355
 356        bdev = bdev_alloc(disk, partno);
 357        if (!bdev)
 358                return ERR_PTR(-ENOMEM);
 359
 360        bdev->bd_start_sect = start;
 361        bdev_set_nr_sectors(bdev, len);
 362
 363        if (info) {
 364                err = -ENOMEM;
 365                bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL);
 366                if (!bdev->bd_meta_info)
 367                        goto out_bdput;
 368        }
 369
 370        pdev = &bdev->bd_device;
 371        dname = dev_name(ddev);
 372        if (isdigit(dname[strlen(dname) - 1]))
 373                dev_set_name(pdev, "%sp%d", dname, partno);
 374        else
 375                dev_set_name(pdev, "%s%d", dname, partno);
 376
 377        device_initialize(pdev);
 378        pdev->class = &block_class;
 379        pdev->type = &part_type;
 380        pdev->parent = ddev;
 381
 382        err = blk_alloc_devt(bdev, &devt);
 383        if (err)
 384                goto out_put;
 385        pdev->devt = devt;
 386
 387        /* delay uevent until 'holders' subdir is created */
 388        dev_set_uevent_suppress(pdev, 1);
 389        err = device_add(pdev);
 390        if (err)
 391                goto out_put;
 392
 393        err = -ENOMEM;
 394        bdev->bd_holder_dir = kobject_create_and_add("holders", &pdev->kobj);
 395        if (!bdev->bd_holder_dir)
 396                goto out_del;
 397
 398        dev_set_uevent_suppress(pdev, 0);
 399        if (flags & ADDPART_FLAG_WHOLEDISK) {
 400                err = device_create_file(pdev, &dev_attr_whole_disk);
 401                if (err)
 402                        goto out_del;
 403        }
 404
 405        /* everything is up and running, commence */
 406        err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL);
 407        if (err)
 408                goto out_del;
 409        bdev_add(bdev, devt);
 410
 411        /* suppress uevent if the disk suppresses it */
 412        if (!dev_get_uevent_suppress(ddev))
 413                kobject_uevent(&pdev->kobj, KOBJ_ADD);
 414        return bdev;
 415
 416out_bdput:
 417        bdput(bdev);
 418        return ERR_PTR(err);
 419out_del:
 420        kobject_put(bdev->bd_holder_dir);
 421        device_del(pdev);
 422out_put:
 423        put_device(pdev);
 424        return ERR_PTR(err);
 425}
 426
 427static bool partition_overlaps(struct gendisk *disk, sector_t start,
 428                sector_t length, int skip_partno)
 429{
 430        struct block_device *part;
 431        bool overlap = false;
 432        unsigned long idx;
 433
 434        rcu_read_lock();
 435        xa_for_each_start(&disk->part_tbl, idx, part, 1) {
 436                if (part->bd_partno != skip_partno &&
 437                    start < part->bd_start_sect + bdev_nr_sectors(part) &&
 438                    start + length > part->bd_start_sect) {
 439                        overlap = true;
 440                        break;
 441                }
 442        }
 443        rcu_read_unlock();
 444
 445        return overlap;
 446}
 447
 448int bdev_add_partition(struct block_device *bdev, int partno,
 449                sector_t start, sector_t length)
 450{
 451        struct block_device *part;
 452
 453        mutex_lock(&bdev->bd_mutex);
 454        if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
 455                mutex_unlock(&bdev->bd_mutex);
 456                return -EBUSY;
 457        }
 458
 459        part = add_partition(bdev->bd_disk, partno, start, length,
 460                        ADDPART_FLAG_NONE, NULL);
 461        mutex_unlock(&bdev->bd_mutex);
 462        return PTR_ERR_OR_ZERO(part);
 463}
 464
 465int bdev_del_partition(struct block_device *bdev, int partno)
 466{
 467        struct block_device *part;
 468        int ret;
 469
 470        part = bdget_disk(bdev->bd_disk, partno);
 471        if (!part)
 472                return -ENXIO;
 473
 474        mutex_lock(&part->bd_mutex);
 475        mutex_lock_nested(&bdev->bd_mutex, 1);
 476
 477        ret = -EBUSY;
 478        if (part->bd_openers)
 479                goto out_unlock;
 480
 481        delete_partition(part);
 482        ret = 0;
 483out_unlock:
 484        mutex_unlock(&bdev->bd_mutex);
 485        mutex_unlock(&part->bd_mutex);
 486        bdput(part);
 487        return ret;
 488}
 489
 490int bdev_resize_partition(struct block_device *bdev, int partno,
 491                sector_t start, sector_t length)
 492{
 493        struct block_device *part;
 494        int ret = 0;
 495
 496        part = bdget_disk(bdev->bd_disk, partno);
 497        if (!part)
 498                return -ENXIO;
 499
 500        mutex_lock(&part->bd_mutex);
 501        mutex_lock_nested(&bdev->bd_mutex, 1);
 502        ret = -EINVAL;
 503        if (start != part->bd_start_sect)
 504                goto out_unlock;
 505
 506        ret = -EBUSY;
 507        if (partition_overlaps(bdev->bd_disk, start, length, partno))
 508                goto out_unlock;
 509
 510        bdev_set_nr_sectors(part, length);
 511
 512        ret = 0;
 513out_unlock:
 514        mutex_unlock(&part->bd_mutex);
 515        mutex_unlock(&bdev->bd_mutex);
 516        bdput(part);
 517        return ret;
 518}
 519
 520static bool disk_unlock_native_capacity(struct gendisk *disk)
 521{
 522        const struct block_device_operations *bdops = disk->fops;
 523
 524        if (bdops->unlock_native_capacity &&
 525            !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
 526                printk(KERN_CONT "enabling native capacity\n");
 527                bdops->unlock_native_capacity(disk);
 528                disk->flags |= GENHD_FL_NATIVE_CAPACITY;
 529                return true;
 530        } else {
 531                printk(KERN_CONT "truncated\n");
 532                return false;
 533        }
 534}
 535
 536void blk_drop_partitions(struct gendisk *disk)
 537{
 538        struct block_device *part;
 539        unsigned long idx;
 540
 541        lockdep_assert_held(&disk->part0->bd_mutex);
 542
 543        xa_for_each_start(&disk->part_tbl, idx, part, 1) {
 544                if (!bdgrab(part))
 545                        continue;
 546                delete_partition(part);
 547                bdput(part);
 548        }
 549}
 550
 551static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
 552                struct parsed_partitions *state, int p)
 553{
 554        sector_t size = state->parts[p].size;
 555        sector_t from = state->parts[p].from;
 556        struct block_device *part;
 557
 558        if (!size)
 559                return true;
 560
 561        if (from >= get_capacity(disk)) {
 562                printk(KERN_WARNING
 563                       "%s: p%d start %llu is beyond EOD, ",
 564                       disk->disk_name, p, (unsigned long long) from);
 565                if (disk_unlock_native_capacity(disk))
 566                        return false;
 567                return true;
 568        }
 569
 570        if (from + size > get_capacity(disk)) {
 571                printk(KERN_WARNING
 572                       "%s: p%d size %llu extends beyond EOD, ",
 573                       disk->disk_name, p, (unsigned long long) size);
 574
 575                if (disk_unlock_native_capacity(disk))
 576                        return false;
 577
 578                /*
 579                 * We can not ignore partitions of broken tables created by for
 580                 * example camera firmware, but we limit them to the end of the
 581                 * disk to avoid creating invalid block devices.
 582                 */
 583                size = get_capacity(disk) - from;
 584        }
 585
 586        part = add_partition(disk, p, from, size, state->parts[p].flags,
 587                             &state->parts[p].info);
 588        if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
 589                printk(KERN_ERR " %s: p%d could not be added: %ld\n",
 590                       disk->disk_name, p, -PTR_ERR(part));
 591                return true;
 592        }
 593
 594        if (IS_BUILTIN(CONFIG_BLK_DEV_MD) &&
 595            (state->parts[p].flags & ADDPART_FLAG_RAID))
 596                md_autodetect_dev(part->bd_dev);
 597
 598        return true;
 599}
 600
 601int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
 602{
 603        struct parsed_partitions *state;
 604        int ret = -EAGAIN, p;
 605
 606        if (!disk_part_scan_enabled(disk))
 607                return 0;
 608
 609        state = check_partition(disk, bdev);
 610        if (!state)
 611                return 0;
 612        if (IS_ERR(state)) {
 613                /*
 614                 * I/O error reading the partition table.  If we tried to read
 615                 * beyond EOD, retry after unlocking the native capacity.
 616                 */
 617                if (PTR_ERR(state) == -ENOSPC) {
 618                        printk(KERN_WARNING "%s: partition table beyond EOD, ",
 619                               disk->disk_name);
 620                        if (disk_unlock_native_capacity(disk))
 621                                return -EAGAIN;
 622                }
 623                return -EIO;
 624        }
 625
 626        /*
 627         * Partitions are not supported on host managed zoned block devices.
 628         */
 629        if (disk->queue->limits.zoned == BLK_ZONED_HM) {
 630                pr_warn("%s: ignoring partition table on host managed zoned block device\n",
 631                        disk->disk_name);
 632                ret = 0;
 633                goto out_free_state;
 634        }
 635
 636        /*
 637         * If we read beyond EOD, try unlocking native capacity even if the
 638         * partition table was successfully read as we could be missing some
 639         * partitions.
 640         */
 641        if (state->access_beyond_eod) {
 642                printk(KERN_WARNING
 643                       "%s: partition table partially beyond EOD, ",
 644                       disk->disk_name);
 645                if (disk_unlock_native_capacity(disk))
 646                        goto out_free_state;
 647        }
 648
 649        /* tell userspace that the media / partition table may have changed */
 650        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 651
 652        for (p = 1; p < state->limit; p++)
 653                if (!blk_add_partition(disk, bdev, state, p))
 654                        goto out_free_state;
 655
 656        ret = 0;
 657out_free_state:
 658        free_partitions(state);
 659        return ret;
 660}
 661
 662void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p)
 663{
 664        struct address_space *mapping = state->bdev->bd_inode->i_mapping;
 665        struct page *page;
 666
 667        if (n >= get_capacity(state->bdev->bd_disk)) {
 668                state->access_beyond_eod = true;
 669                return NULL;
 670        }
 671
 672        page = read_mapping_page(mapping,
 673                        (pgoff_t)(n >> (PAGE_SHIFT - 9)), NULL);
 674        if (IS_ERR(page))
 675                goto out;
 676        if (PageError(page))
 677                goto out_put_page;
 678
 679        p->v = page;
 680        return (unsigned char *)page_address(page) +
 681                        ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << SECTOR_SHIFT);
 682out_put_page:
 683        put_page(page);
 684out:
 685        p->v = NULL;
 686        return NULL;
 687}
 688