linux/block/partition-generic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Code extracted from drivers/block/genhd.c
   4 *  Copyright (C) 1991-1998  Linus Torvalds
   5 *  Re-organised Feb 1998 Russell King
   6 *
   7 *  We now have independent partition support from the
   8 *  block drivers, which allows all the partition code to
   9 *  be grouped in one location, and it to be mostly self
  10 *  contained.
  11 */
  12
  13#include <linux/init.h>
  14#include <linux/module.h>
  15#include <linux/fs.h>
  16#include <linux/slab.h>
  17#include <linux/kmod.h>
  18#include <linux/ctype.h>
  19#include <linux/genhd.h>
  20#include <linux/blktrace_api.h>
  21
  22#include "partitions/check.h"
  23
  24#ifdef CONFIG_BLK_DEV_MD
  25extern void md_autodetect_dev(dev_t dev);
  26#endif
  27 
  28/*
  29 * disk_name() is used by partition check code and the genhd driver.
  30 * It formats the devicename of the indicated disk into
  31 * the supplied buffer (of size at least 32), and returns
  32 * a pointer to that same buffer (for convenience).
  33 */
  34
  35char *disk_name(struct gendisk *hd, int partno, char *buf)
  36{
  37        if (!partno)
  38                snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
  39        else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
  40                snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
  41        else
  42                snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
  43
  44        return buf;
  45}
  46
  47const char *bdevname(struct block_device *bdev, char *buf)
  48{
  49        return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
  50}
  51
  52EXPORT_SYMBOL(bdevname);
  53
  54const char *bio_devname(struct bio *bio, char *buf)
  55{
  56        return disk_name(bio->bi_disk, bio->bi_partno, buf);
  57}
  58EXPORT_SYMBOL(bio_devname);
  59
  60/*
  61 * There's very little reason to use this, you should really
  62 * have a struct block_device just about everywhere and use
  63 * bdevname() instead.
  64 */
  65const char *__bdevname(dev_t dev, char *buffer)
  66{
  67        scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
  68                                MAJOR(dev), MINOR(dev));
  69        return buffer;
  70}
  71
  72EXPORT_SYMBOL(__bdevname);
  73
  74static ssize_t part_partition_show(struct device *dev,
  75                                   struct device_attribute *attr, char *buf)
  76{
  77        struct hd_struct *p = dev_to_part(dev);
  78
  79        return sprintf(buf, "%d\n", p->partno);
  80}
  81
  82static ssize_t part_start_show(struct device *dev,
  83                               struct device_attribute *attr, char *buf)
  84{
  85        struct hd_struct *p = dev_to_part(dev);
  86
  87        return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
  88}
  89
  90ssize_t part_size_show(struct device *dev,
  91                       struct device_attribute *attr, char *buf)
  92{
  93        struct hd_struct *p = dev_to_part(dev);
  94        return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
  95}
  96
  97static ssize_t part_ro_show(struct device *dev,
  98                            struct device_attribute *attr, char *buf)
  99{
 100        struct hd_struct *p = dev_to_part(dev);
 101        return sprintf(buf, "%d\n", p->policy ? 1 : 0);
 102}
 103
 104static ssize_t part_alignment_offset_show(struct device *dev,
 105                                          struct device_attribute *attr, char *buf)
 106{
 107        struct hd_struct *p = dev_to_part(dev);
 108        return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
 109}
 110
 111static ssize_t part_discard_alignment_show(struct device *dev,
 112                                           struct device_attribute *attr, char *buf)
 113{
 114        struct hd_struct *p = dev_to_part(dev);
 115        return sprintf(buf, "%u\n", p->discard_alignment);
 116}
 117
 118ssize_t part_stat_show(struct device *dev,
 119                       struct device_attribute *attr, char *buf)
 120{
 121        struct hd_struct *p = dev_to_part(dev);
 122        struct request_queue *q = part_to_disk(p)->queue;
 123        unsigned int inflight;
 124
 125        inflight = part_in_flight(q, p);
 126        return sprintf(buf,
 127                "%8lu %8lu %8llu %8u "
 128                "%8lu %8lu %8llu %8u "
 129                "%8u %8u %8u "
 130                "%8lu %8lu %8llu %8u "
 131                "%8lu %8u"
 132                "\n",
 133                part_stat_read(p, ios[STAT_READ]),
 134                part_stat_read(p, merges[STAT_READ]),
 135                (unsigned long long)part_stat_read(p, sectors[STAT_READ]),
 136                (unsigned int)part_stat_read_msecs(p, STAT_READ),
 137                part_stat_read(p, ios[STAT_WRITE]),
 138                part_stat_read(p, merges[STAT_WRITE]),
 139                (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
 140                (unsigned int)part_stat_read_msecs(p, STAT_WRITE),
 141                inflight,
 142                jiffies_to_msecs(part_stat_read(p, io_ticks)),
 143                jiffies_to_msecs(part_stat_read(p, time_in_queue)),
 144                part_stat_read(p, ios[STAT_DISCARD]),
 145                part_stat_read(p, merges[STAT_DISCARD]),
 146                (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
 147                (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
 148                part_stat_read(p, ios[STAT_FLUSH]),
 149                (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
 150}
 151
 152ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
 153                           char *buf)
 154{
 155        struct hd_struct *p = dev_to_part(dev);
 156        struct request_queue *q = part_to_disk(p)->queue;
 157        unsigned int inflight[2];
 158
 159        part_in_flight_rw(q, p, inflight);
 160        return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
 161}
 162
 163#ifdef CONFIG_FAIL_MAKE_REQUEST
 164ssize_t part_fail_show(struct device *dev,
 165                       struct device_attribute *attr, char *buf)
 166{
 167        struct hd_struct *p = dev_to_part(dev);
 168
 169        return sprintf(buf, "%d\n", p->make_it_fail);
 170}
 171
 172ssize_t part_fail_store(struct device *dev,
 173                        struct device_attribute *attr,
 174                        const char *buf, size_t count)
 175{
 176        struct hd_struct *p = dev_to_part(dev);
 177        int i;
 178
 179        if (count > 0 && sscanf(buf, "%d", &i) > 0)
 180                p->make_it_fail = (i == 0) ? 0 : 1;
 181
 182        return count;
 183}
 184#endif
 185
 186static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
 187static DEVICE_ATTR(start, 0444, part_start_show, NULL);
 188static DEVICE_ATTR(size, 0444, part_size_show, NULL);
 189static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
 190static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
 191static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
 192static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
 193static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
 194#ifdef CONFIG_FAIL_MAKE_REQUEST
 195static struct device_attribute dev_attr_fail =
 196        __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
 197#endif
 198
 199static struct attribute *part_attrs[] = {
 200        &dev_attr_partition.attr,
 201        &dev_attr_start.attr,
 202        &dev_attr_size.attr,
 203        &dev_attr_ro.attr,
 204        &dev_attr_alignment_offset.attr,
 205        &dev_attr_discard_alignment.attr,
 206        &dev_attr_stat.attr,
 207        &dev_attr_inflight.attr,
 208#ifdef CONFIG_FAIL_MAKE_REQUEST
 209        &dev_attr_fail.attr,
 210#endif
 211        NULL
 212};
 213
 214static struct attribute_group part_attr_group = {
 215        .attrs = part_attrs,
 216};
 217
 218static const struct attribute_group *part_attr_groups[] = {
 219        &part_attr_group,
 220#ifdef CONFIG_BLK_DEV_IO_TRACE
 221        &blk_trace_attr_group,
 222#endif
 223        NULL
 224};
 225
 226static void part_release(struct device *dev)
 227{
 228        struct hd_struct *p = dev_to_part(dev);
 229        blk_free_devt(dev->devt);
 230        hd_free_part(p);
 231        kfree(p);
 232}
 233
 234static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
 235{
 236        struct hd_struct *part = dev_to_part(dev);
 237
 238        add_uevent_var(env, "PARTN=%u", part->partno);
 239        if (part->info && part->info->volname[0])
 240                add_uevent_var(env, "PARTNAME=%s", part->info->volname);
 241        return 0;
 242}
 243
 244struct device_type part_type = {
 245        .name           = "partition",
 246        .groups         = part_attr_groups,
 247        .release        = part_release,
 248        .uevent         = part_uevent,
 249};
 250
 251static void delete_partition_work_fn(struct work_struct *work)
 252{
 253        struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct,
 254                                        rcu_work);
 255
 256        part->start_sect = 0;
 257        part->nr_sects = 0;
 258        part_stat_set_all(part, 0);
 259        put_device(part_to_dev(part));
 260}
 261
 262void __delete_partition(struct percpu_ref *ref)
 263{
 264        struct hd_struct *part = container_of(ref, struct hd_struct, ref);
 265        INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn);
 266        queue_rcu_work(system_wq, &part->rcu_work);
 267}
 268
 269/*
 270 * Must be called either with bd_mutex held, before a disk can be opened or
 271 * after all disk users are gone.
 272 */
 273void delete_partition(struct gendisk *disk, int partno)
 274{
 275        struct disk_part_tbl *ptbl =
 276                rcu_dereference_protected(disk->part_tbl, 1);
 277        struct hd_struct *part;
 278
 279        if (partno >= ptbl->len)
 280                return;
 281
 282        part = rcu_dereference_protected(ptbl->part[partno], 1);
 283        if (!part)
 284                return;
 285
 286        rcu_assign_pointer(ptbl->part[partno], NULL);
 287        rcu_assign_pointer(ptbl->last_lookup, NULL);
 288        kobject_put(part->holder_dir);
 289        device_del(part_to_dev(part));
 290
 291        /*
 292         * Remove gendisk pointer from idr so that it cannot be looked up
 293         * while RCU period before freeing gendisk is running to prevent
 294         * use-after-free issues. Note that the device number stays
 295         * "in-use" until we really free the gendisk.
 296         */
 297        blk_invalidate_devt(part_devt(part));
 298        hd_struct_kill(part);
 299}
 300
 301static ssize_t whole_disk_show(struct device *dev,
 302                               struct device_attribute *attr, char *buf)
 303{
 304        return 0;
 305}
 306static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
 307
 308/*
 309 * Must be called either with bd_mutex held, before a disk can be opened or
 310 * after all disk users are gone.
 311 */
 312struct hd_struct *add_partition(struct gendisk *disk, int partno,
 313                                sector_t start, sector_t len, int flags,
 314                                struct partition_meta_info *info)
 315{
 316        struct hd_struct *p;
 317        dev_t devt = MKDEV(0, 0);
 318        struct device *ddev = disk_to_dev(disk);
 319        struct device *pdev;
 320        struct disk_part_tbl *ptbl;
 321        const char *dname;
 322        int err;
 323
 324        /*
 325         * Partitions are not supported on zoned block devices that are used as
 326         * such.
 327         */
 328        switch (disk->queue->limits.zoned) {
 329        case BLK_ZONED_HM:
 330                pr_warn("%s: partitions not supported on host managed zoned block device\n",
 331                        disk->disk_name);
 332                return ERR_PTR(-ENXIO);
 333        case BLK_ZONED_HA:
 334                pr_info("%s: disabling host aware zoned block device support due to partitions\n",
 335                        disk->disk_name);
 336                disk->queue->limits.zoned = BLK_ZONED_NONE;
 337                break;
 338        case BLK_ZONED_NONE:
 339                break;
 340        }
 341
 342        err = disk_expand_part_tbl(disk, partno);
 343        if (err)
 344                return ERR_PTR(err);
 345        ptbl = rcu_dereference_protected(disk->part_tbl, 1);
 346
 347        if (ptbl->part[partno])
 348                return ERR_PTR(-EBUSY);
 349
 350        p = kzalloc(sizeof(*p), GFP_KERNEL);
 351        if (!p)
 352                return ERR_PTR(-EBUSY);
 353
 354        if (!init_part_stats(p)) {
 355                err = -ENOMEM;
 356                goto out_free;
 357        }
 358
 359        seqcount_init(&p->nr_sects_seq);
 360        pdev = part_to_dev(p);
 361
 362        p->start_sect = start;
 363        p->alignment_offset =
 364                queue_limit_alignment_offset(&disk->queue->limits, start);
 365        p->discard_alignment =
 366                queue_limit_discard_alignment(&disk->queue->limits, start);
 367        p->nr_sects = len;
 368        p->partno = partno;
 369        p->policy = get_disk_ro(disk);
 370
 371        if (info) {
 372                struct partition_meta_info *pinfo = alloc_part_info(disk);
 373                if (!pinfo) {
 374                        err = -ENOMEM;
 375                        goto out_free_stats;
 376                }
 377                memcpy(pinfo, info, sizeof(*info));
 378                p->info = pinfo;
 379        }
 380
 381        dname = dev_name(ddev);
 382        if (isdigit(dname[strlen(dname) - 1]))
 383                dev_set_name(pdev, "%sp%d", dname, partno);
 384        else
 385                dev_set_name(pdev, "%s%d", dname, partno);
 386
 387        device_initialize(pdev);
 388        pdev->class = &block_class;
 389        pdev->type = &part_type;
 390        pdev->parent = ddev;
 391
 392        err = blk_alloc_devt(p, &devt);
 393        if (err)
 394                goto out_free_info;
 395        pdev->devt = devt;
 396
 397        /* delay uevent until 'holders' subdir is created */
 398        dev_set_uevent_suppress(pdev, 1);
 399        err = device_add(pdev);
 400        if (err)
 401                goto out_put;
 402
 403        err = -ENOMEM;
 404        p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
 405        if (!p->holder_dir)
 406                goto out_del;
 407
 408        dev_set_uevent_suppress(pdev, 0);
 409        if (flags & ADDPART_FLAG_WHOLEDISK) {
 410                err = device_create_file(pdev, &dev_attr_whole_disk);
 411                if (err)
 412                        goto out_del;
 413        }
 414
 415        err = hd_ref_init(p);
 416        if (err) {
 417                if (flags & ADDPART_FLAG_WHOLEDISK)
 418                        goto out_remove_file;
 419                goto out_del;
 420        }
 421
 422        /* everything is up and running, commence */
 423        rcu_assign_pointer(ptbl->part[partno], p);
 424
 425        /* suppress uevent if the disk suppresses it */
 426        if (!dev_get_uevent_suppress(ddev))
 427                kobject_uevent(&pdev->kobj, KOBJ_ADD);
 428        return p;
 429
 430out_free_info:
 431        free_part_info(p);
 432out_free_stats:
 433        free_part_stats(p);
 434out_free:
 435        kfree(p);
 436        return ERR_PTR(err);
 437out_remove_file:
 438        device_remove_file(pdev, &dev_attr_whole_disk);
 439out_del:
 440        kobject_put(p->holder_dir);
 441        device_del(pdev);
 442out_put:
 443        put_device(pdev);
 444        return ERR_PTR(err);
 445}
 446
 447static bool disk_unlock_native_capacity(struct gendisk *disk)
 448{
 449        const struct block_device_operations *bdops = disk->fops;
 450
 451        if (bdops->unlock_native_capacity &&
 452            !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
 453                printk(KERN_CONT "enabling native capacity\n");
 454                bdops->unlock_native_capacity(disk);
 455                disk->flags |= GENHD_FL_NATIVE_CAPACITY;
 456                return true;
 457        } else {
 458                printk(KERN_CONT "truncated\n");
 459                return false;
 460        }
 461}
 462
 463int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
 464{
 465        struct disk_part_iter piter;
 466        struct hd_struct *part;
 467        int res;
 468
 469        if (!disk_part_scan_enabled(disk))
 470                return 0;
 471        if (bdev->bd_part_count || bdev->bd_super)
 472                return -EBUSY;
 473        res = invalidate_partition(disk, 0);
 474        if (res)
 475                return res;
 476
 477        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
 478        while ((part = disk_part_iter_next(&piter)))
 479                delete_partition(disk, part->partno);
 480        disk_part_iter_exit(&piter);
 481
 482        return 0;
 483}
 484
 485static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
 486                struct parsed_partitions *state, int p)
 487{
 488        sector_t size = state->parts[p].size;
 489        sector_t from = state->parts[p].from;
 490        struct hd_struct *part;
 491
 492        if (!size)
 493                return true;
 494
 495        if (from >= get_capacity(disk)) {
 496                printk(KERN_WARNING
 497                       "%s: p%d start %llu is beyond EOD, ",
 498                       disk->disk_name, p, (unsigned long long) from);
 499                if (disk_unlock_native_capacity(disk))
 500                        return false;
 501                return true;
 502        }
 503
 504        if (from + size > get_capacity(disk)) {
 505                printk(KERN_WARNING
 506                       "%s: p%d size %llu extends beyond EOD, ",
 507                       disk->disk_name, p, (unsigned long long) size);
 508
 509                if (disk_unlock_native_capacity(disk))
 510                        return false;
 511
 512                /*
 513                 * We can not ignore partitions of broken tables created by for
 514                 * example camera firmware, but we limit them to the end of the
 515                 * disk to avoid creating invalid block devices.
 516                 */
 517                size = get_capacity(disk) - from;
 518        }
 519
 520        part = add_partition(disk, p, from, size, state->parts[p].flags,
 521                             &state->parts[p].info);
 522        if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
 523                printk(KERN_ERR " %s: p%d could not be added: %ld\n",
 524                       disk->disk_name, p, -PTR_ERR(part));
 525                return true;
 526        }
 527
 528#ifdef CONFIG_BLK_DEV_MD
 529        if (state->parts[p].flags & ADDPART_FLAG_RAID)
 530                md_autodetect_dev(part_to_dev(part)->devt);
 531#endif
 532        return true;
 533}
 534
 535int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
 536{
 537        struct parsed_partitions *state;
 538        int ret = -EAGAIN, p, highest;
 539
 540        if (!disk_part_scan_enabled(disk))
 541                return 0;
 542
 543        state = check_partition(disk, bdev);
 544        if (!state)
 545                return 0;
 546        if (IS_ERR(state)) {
 547                /*
 548                 * I/O error reading the partition table.  If we tried to read
 549                 * beyond EOD, retry after unlocking the native capacity.
 550                 */
 551                if (PTR_ERR(state) == -ENOSPC) {
 552                        printk(KERN_WARNING "%s: partition table beyond EOD, ",
 553                               disk->disk_name);
 554                        if (disk_unlock_native_capacity(disk))
 555                                return -EAGAIN;
 556                }
 557                return -EIO;
 558        }
 559
 560        /*
 561         * Partitions are not supported on host managed zoned block devices.
 562         */
 563        if (disk->queue->limits.zoned == BLK_ZONED_HM) {
 564                pr_warn("%s: ignoring partition table on host managed zoned block device\n",
 565                        disk->disk_name);
 566                ret = 0;
 567                goto out_free_state;
 568        }
 569
 570        /*
 571         * If we read beyond EOD, try unlocking native capacity even if the
 572         * partition table was successfully read as we could be missing some
 573         * partitions.
 574         */
 575        if (state->access_beyond_eod) {
 576                printk(KERN_WARNING
 577                       "%s: partition table partially beyond EOD, ",
 578                       disk->disk_name);
 579                if (disk_unlock_native_capacity(disk))
 580                        goto out_free_state;
 581        }
 582
 583        /* tell userspace that the media / partition table may have changed */
 584        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 585
 586        /*
 587         * Detect the highest partition number and preallocate disk->part_tbl.
 588         * This is an optimization and not strictly necessary.
 589         */
 590        for (p = 1, highest = 0; p < state->limit; p++)
 591                if (state->parts[p].size)
 592                        highest = p;
 593        disk_expand_part_tbl(disk, highest);
 594
 595        for (p = 1; p < state->limit; p++)
 596                if (!blk_add_partition(disk, bdev, state, p))
 597                        goto out_free_state;
 598
 599        ret = 0;
 600out_free_state:
 601        free_partitions(state);
 602        return ret;
 603}
 604
 605unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 606{
 607        struct address_space *mapping = bdev->bd_inode->i_mapping;
 608        struct page *page;
 609
 610        page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
 611        if (!IS_ERR(page)) {
 612                if (PageError(page))
 613                        goto fail;
 614                p->v = page;
 615                return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
 616fail:
 617                put_page(page);
 618        }
 619        p->v = NULL;
 620        return NULL;
 621}
 622
 623EXPORT_SYMBOL(read_dev_sector);
 624