linux/block/partition-generic.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  Code extracted from drivers/block/genhd.c
   4 *  Copyright (C) 1991-1998  Linus Torvalds
   5 *  Re-organised Feb 1998 Russell King
   6 *
   7 *  We now have independent partition support from the
   8 *  block drivers, which allows all the partition code to
   9 *  be grouped in one location, and it to be mostly self
  10 *  contained.
  11 */
  12
  13#include <linux/init.h>
  14#include <linux/module.h>
  15#include <linux/fs.h>
  16#include <linux/slab.h>
  17#include <linux/kmod.h>
  18#include <linux/ctype.h>
  19#include <linux/genhd.h>
  20#include <linux/blktrace_api.h>
  21
  22#include "partitions/check.h"
  23
  24#ifdef CONFIG_BLK_DEV_MD
  25extern void md_autodetect_dev(dev_t dev);
  26#endif
  27 
  28/*
  29 * disk_name() is used by partition check code and the genhd driver.
  30 * It formats the devicename of the indicated disk into
  31 * the supplied buffer (of size at least 32), and returns
  32 * a pointer to that same buffer (for convenience).
  33 */
  34
  35char *disk_name(struct gendisk *hd, int partno, char *buf)
  36{
  37        if (!partno)
  38                snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
  39        else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
  40                snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
  41        else
  42                snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
  43
  44        return buf;
  45}
  46
  47const char *bdevname(struct block_device *bdev, char *buf)
  48{
  49        return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
  50}
  51
  52EXPORT_SYMBOL(bdevname);
  53
  54const char *bio_devname(struct bio *bio, char *buf)
  55{
  56        return disk_name(bio->bi_disk, bio->bi_partno, buf);
  57}
  58EXPORT_SYMBOL(bio_devname);
  59
  60/*
  61 * There's very little reason to use this, you should really
  62 * have a struct block_device just about everywhere and use
  63 * bdevname() instead.
  64 */
  65const char *__bdevname(dev_t dev, char *buffer)
  66{
  67        scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
  68                                MAJOR(dev), MINOR(dev));
  69        return buffer;
  70}
  71
  72EXPORT_SYMBOL(__bdevname);
  73
  74static ssize_t part_partition_show(struct device *dev,
  75                                   struct device_attribute *attr, char *buf)
  76{
  77        struct hd_struct *p = dev_to_part(dev);
  78
  79        return sprintf(buf, "%d\n", p->partno);
  80}
  81
  82static ssize_t part_start_show(struct device *dev,
  83                               struct device_attribute *attr, char *buf)
  84{
  85        struct hd_struct *p = dev_to_part(dev);
  86
  87        return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
  88}
  89
  90ssize_t part_size_show(struct device *dev,
  91                       struct device_attribute *attr, char *buf)
  92{
  93        struct hd_struct *p = dev_to_part(dev);
  94        return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
  95}
  96
  97static ssize_t part_ro_show(struct device *dev,
  98                            struct device_attribute *attr, char *buf)
  99{
 100        struct hd_struct *p = dev_to_part(dev);
 101        return sprintf(buf, "%d\n", p->policy ? 1 : 0);
 102}
 103
 104static ssize_t part_alignment_offset_show(struct device *dev,
 105                                          struct device_attribute *attr, char *buf)
 106{
 107        struct hd_struct *p = dev_to_part(dev);
 108        return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
 109}
 110
 111static ssize_t part_discard_alignment_show(struct device *dev,
 112                                           struct device_attribute *attr, char *buf)
 113{
 114        struct hd_struct *p = dev_to_part(dev);
 115        return sprintf(buf, "%u\n", p->discard_alignment);
 116}
 117
 118ssize_t part_stat_show(struct device *dev,
 119                       struct device_attribute *attr, char *buf)
 120{
 121        struct hd_struct *p = dev_to_part(dev);
 122        struct request_queue *q = part_to_disk(p)->queue;
 123        unsigned int inflight[2];
 124        int cpu;
 125
 126        cpu = part_stat_lock();
 127        part_round_stats(q, cpu, p);
 128        part_stat_unlock();
 129        part_in_flight(q, p, inflight);
 130        return sprintf(buf,
 131                "%8lu %8lu %8llu %8u "
 132                "%8lu %8lu %8llu %8u "
 133                "%8u %8u %8u"
 134                "\n",
 135                part_stat_read(p, ios[READ]),
 136                part_stat_read(p, merges[READ]),
 137                (unsigned long long)part_stat_read(p, sectors[READ]),
 138                jiffies_to_msecs(part_stat_read(p, ticks[READ])),
 139                part_stat_read(p, ios[WRITE]),
 140                part_stat_read(p, merges[WRITE]),
 141                (unsigned long long)part_stat_read(p, sectors[WRITE]),
 142                jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
 143                inflight[0],
 144                jiffies_to_msecs(part_stat_read(p, io_ticks)),
 145                jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 146}
 147
 148ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
 149                           char *buf)
 150{
 151        struct hd_struct *p = dev_to_part(dev);
 152        struct request_queue *q = part_to_disk(p)->queue;
 153        unsigned int inflight[2];
 154
 155        part_in_flight_rw(q, p, inflight);
 156        return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
 157}
 158
 159#ifdef CONFIG_FAIL_MAKE_REQUEST
 160ssize_t part_fail_show(struct device *dev,
 161                       struct device_attribute *attr, char *buf)
 162{
 163        struct hd_struct *p = dev_to_part(dev);
 164
 165        return sprintf(buf, "%d\n", p->make_it_fail);
 166}
 167
 168ssize_t part_fail_store(struct device *dev,
 169                        struct device_attribute *attr,
 170                        const char *buf, size_t count)
 171{
 172        struct hd_struct *p = dev_to_part(dev);
 173        int i;
 174
 175        if (count > 0 && sscanf(buf, "%d", &i) > 0)
 176                p->make_it_fail = (i == 0) ? 0 : 1;
 177
 178        return count;
 179}
 180#endif
 181
 182static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 183static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 184static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 185static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
 186static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 187static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
 188                   NULL);
 189static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 190static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 191#ifdef CONFIG_FAIL_MAKE_REQUEST
 192static struct device_attribute dev_attr_fail =
 193        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 194#endif
 195
 196static struct attribute *part_attrs[] = {
 197        &dev_attr_partition.attr,
 198        &dev_attr_start.attr,
 199        &dev_attr_size.attr,
 200        &dev_attr_ro.attr,
 201        &dev_attr_alignment_offset.attr,
 202        &dev_attr_discard_alignment.attr,
 203        &dev_attr_stat.attr,
 204        &dev_attr_inflight.attr,
 205#ifdef CONFIG_FAIL_MAKE_REQUEST
 206        &dev_attr_fail.attr,
 207#endif
 208        NULL
 209};
 210
 211static struct attribute_group part_attr_group = {
 212        .attrs = part_attrs,
 213};
 214
 215static const struct attribute_group *part_attr_groups[] = {
 216        &part_attr_group,
 217#ifdef CONFIG_BLK_DEV_IO_TRACE
 218        &blk_trace_attr_group,
 219#endif
 220        NULL
 221};
 222
 223static void part_release(struct device *dev)
 224{
 225        struct hd_struct *p = dev_to_part(dev);
 226        blk_free_devt(dev->devt);
 227        hd_free_part(p);
 228        kfree(p);
 229}
 230
 231static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
 232{
 233        struct hd_struct *part = dev_to_part(dev);
 234
 235        add_uevent_var(env, "PARTN=%u", part->partno);
 236        if (part->info && part->info->volname[0])
 237                add_uevent_var(env, "PARTNAME=%s", part->info->volname);
 238        return 0;
 239}
 240
 241struct device_type part_type = {
 242        .name           = "partition",
 243        .groups         = part_attr_groups,
 244        .release        = part_release,
 245        .uevent         = part_uevent,
 246};
 247
 248static void delete_partition_rcu_cb(struct rcu_head *head)
 249{
 250        struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
 251
 252        part->start_sect = 0;
 253        part->nr_sects = 0;
 254        part_stat_set_all(part, 0);
 255        put_device(part_to_dev(part));
 256}
 257
 258void __delete_partition(struct percpu_ref *ref)
 259{
 260        struct hd_struct *part = container_of(ref, struct hd_struct, ref);
 261        call_rcu(&part->rcu_head, delete_partition_rcu_cb);
 262}
 263
 264/*
 265 * Must be called either with bd_mutex held, before a disk can be opened or
 266 * after all disk users are gone.
 267 */
 268void delete_partition(struct gendisk *disk, int partno)
 269{
 270        struct disk_part_tbl *ptbl =
 271                rcu_dereference_protected(disk->part_tbl, 1);
 272        struct hd_struct *part;
 273
 274        if (partno >= ptbl->len)
 275                return;
 276
 277        part = rcu_dereference_protected(ptbl->part[partno], 1);
 278        if (!part)
 279                return;
 280
 281        rcu_assign_pointer(ptbl->part[partno], NULL);
 282        rcu_assign_pointer(ptbl->last_lookup, NULL);
 283        kobject_put(part->holder_dir);
 284        device_del(part_to_dev(part));
 285
 286        hd_struct_kill(part);
 287}
 288
 289static ssize_t whole_disk_show(struct device *dev,
 290                               struct device_attribute *attr, char *buf)
 291{
 292        return 0;
 293}
 294static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 295                   whole_disk_show, NULL);
 296
 297/*
 298 * Must be called either with bd_mutex held, before a disk can be opened or
 299 * after all disk users are gone.
 300 */
 301struct hd_struct *add_partition(struct gendisk *disk, int partno,
 302                                sector_t start, sector_t len, int flags,
 303                                struct partition_meta_info *info)
 304{
 305        struct hd_struct *p;
 306        dev_t devt = MKDEV(0, 0);
 307        struct device *ddev = disk_to_dev(disk);
 308        struct device *pdev;
 309        struct disk_part_tbl *ptbl;
 310        const char *dname;
 311        int err;
 312
 313        err = disk_expand_part_tbl(disk, partno);
 314        if (err)
 315                return ERR_PTR(err);
 316        ptbl = rcu_dereference_protected(disk->part_tbl, 1);
 317
 318        if (ptbl->part[partno])
 319                return ERR_PTR(-EBUSY);
 320
 321        p = kzalloc(sizeof(*p), GFP_KERNEL);
 322        if (!p)
 323                return ERR_PTR(-EBUSY);
 324
 325        if (!init_part_stats(p)) {
 326                err = -ENOMEM;
 327                goto out_free;
 328        }
 329
 330        seqcount_init(&p->nr_sects_seq);
 331        pdev = part_to_dev(p);
 332
 333        p->start_sect = start;
 334        p->alignment_offset =
 335                queue_limit_alignment_offset(&disk->queue->limits, start);
 336        p->discard_alignment =
 337                queue_limit_discard_alignment(&disk->queue->limits, start);
 338        p->nr_sects = len;
 339        p->partno = partno;
 340        p->policy = get_disk_ro(disk);
 341
 342        if (info) {
 343                struct partition_meta_info *pinfo = alloc_part_info(disk);
 344                if (!pinfo) {
 345                        err = -ENOMEM;
 346                        goto out_free_stats;
 347                }
 348                memcpy(pinfo, info, sizeof(*info));
 349                p->info = pinfo;
 350        }
 351
 352        dname = dev_name(ddev);
 353        if (isdigit(dname[strlen(dname) - 1]))
 354                dev_set_name(pdev, "%sp%d", dname, partno);
 355        else
 356                dev_set_name(pdev, "%s%d", dname, partno);
 357
 358        device_initialize(pdev);
 359        pdev->class = &block_class;
 360        pdev->type = &part_type;
 361        pdev->parent = ddev;
 362
 363        err = blk_alloc_devt(p, &devt);
 364        if (err)
 365                goto out_free_info;
 366        pdev->devt = devt;
 367
 368        /* delay uevent until 'holders' subdir is created */
 369        dev_set_uevent_suppress(pdev, 1);
 370        err = device_add(pdev);
 371        if (err)
 372                goto out_put;
 373
 374        err = -ENOMEM;
 375        p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
 376        if (!p->holder_dir)
 377                goto out_del;
 378
 379        dev_set_uevent_suppress(pdev, 0);
 380        if (flags & ADDPART_FLAG_WHOLEDISK) {
 381                err = device_create_file(pdev, &dev_attr_whole_disk);
 382                if (err)
 383                        goto out_del;
 384        }
 385
 386        err = hd_ref_init(p);
 387        if (err) {
 388                if (flags & ADDPART_FLAG_WHOLEDISK)
 389                        goto out_remove_file;
 390                goto out_del;
 391        }
 392
 393        /* everything is up and running, commence */
 394        rcu_assign_pointer(ptbl->part[partno], p);
 395
 396        /* suppress uevent if the disk suppresses it */
 397        if (!dev_get_uevent_suppress(ddev))
 398                kobject_uevent(&pdev->kobj, KOBJ_ADD);
 399        return p;
 400
 401out_free_info:
 402        free_part_info(p);
 403out_free_stats:
 404        free_part_stats(p);
 405out_free:
 406        kfree(p);
 407        return ERR_PTR(err);
 408out_remove_file:
 409        device_remove_file(pdev, &dev_attr_whole_disk);
 410out_del:
 411        kobject_put(p->holder_dir);
 412        device_del(pdev);
 413out_put:
 414        put_device(pdev);
 415        return ERR_PTR(err);
 416}
 417
 418static bool disk_unlock_native_capacity(struct gendisk *disk)
 419{
 420        const struct block_device_operations *bdops = disk->fops;
 421
 422        if (bdops->unlock_native_capacity &&
 423            !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
 424                printk(KERN_CONT "enabling native capacity\n");
 425                bdops->unlock_native_capacity(disk);
 426                disk->flags |= GENHD_FL_NATIVE_CAPACITY;
 427                return true;
 428        } else {
 429                printk(KERN_CONT "truncated\n");
 430                return false;
 431        }
 432}
 433
 434static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 435{
 436        struct disk_part_iter piter;
 437        struct hd_struct *part;
 438        int res;
 439
 440        if (bdev->bd_part_count || bdev->bd_super)
 441                return -EBUSY;
 442        res = invalidate_partition(disk, 0);
 443        if (res)
 444                return res;
 445
 446        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
 447        while ((part = disk_part_iter_next(&piter)))
 448                delete_partition(disk, part->partno);
 449        disk_part_iter_exit(&piter);
 450
 451        return 0;
 452}
 453
 454static bool part_zone_aligned(struct gendisk *disk,
 455                              struct block_device *bdev,
 456                              sector_t from, sector_t size)
 457{
 458        unsigned int zone_sectors = bdev_zone_sectors(bdev);
 459
 460        /*
 461         * If this function is called, then the disk is a zoned block device
 462         * (host-aware or host-managed). This can be detected even if the
 463         * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not
 464         * set). In this case, however, only host-aware devices will be seen
 465         * as a block device is not created for host-managed devices. Without
 466         * zoned block device support, host-aware drives can still be used as
 467         * regular block devices (no zone operation) and their zone size will
 468         * be reported as 0. Allow this case.
 469         */
 470        if (!zone_sectors)
 471                return true;
 472
 473        /*
 474         * Check partition start and size alignement. If the drive has a
 475         * smaller last runt zone, ignore it and allow the partition to
 476         * use it. Check the zone size too: it should be a power of 2 number
 477         * of sectors.
 478         */
 479        if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
 480                u32 rem;
 481
 482                div_u64_rem(from, zone_sectors, &rem);
 483                if (rem)
 484                        return false;
 485                if ((from + size) < get_capacity(disk)) {
 486                        div_u64_rem(size, zone_sectors, &rem);
 487                        if (rem)
 488                                return false;
 489                }
 490
 491        } else {
 492
 493                if (from & (zone_sectors - 1))
 494                        return false;
 495                if ((from + size) < get_capacity(disk) &&
 496                    (size & (zone_sectors - 1)))
 497                        return false;
 498
 499        }
 500
 501        return true;
 502}
 503
 504int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 505{
 506        struct parsed_partitions *state = NULL;
 507        struct hd_struct *part;
 508        int p, highest, res;
 509rescan:
 510        if (state && !IS_ERR(state)) {
 511                free_partitions(state);
 512                state = NULL;
 513        }
 514
 515        res = drop_partitions(disk, bdev);
 516        if (res)
 517                return res;
 518
 519        if (disk->fops->revalidate_disk)
 520                disk->fops->revalidate_disk(disk);
 521        check_disk_size_change(disk, bdev);
 522        bdev->bd_invalidated = 0;
 523        if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 524                return 0;
 525        if (IS_ERR(state)) {
 526                /*
 527                 * I/O error reading the partition table.  If any
 528                 * partition code tried to read beyond EOD, retry
 529                 * after unlocking native capacity.
 530                 */
 531                if (PTR_ERR(state) == -ENOSPC) {
 532                        printk(KERN_WARNING "%s: partition table beyond EOD, ",
 533                               disk->disk_name);
 534                        if (disk_unlock_native_capacity(disk))
 535                                goto rescan;
 536                }
 537                return -EIO;
 538        }
 539        /*
 540         * If any partition code tried to read beyond EOD, try
 541         * unlocking native capacity even if partition table is
 542         * successfully read as we could be missing some partitions.
 543         */
 544        if (state->access_beyond_eod) {
 545                printk(KERN_WARNING
 546                       "%s: partition table partially beyond EOD, ",
 547                       disk->disk_name);
 548                if (disk_unlock_native_capacity(disk))
 549                        goto rescan;
 550        }
 551
 552        /* tell userspace that the media / partition table may have changed */
 553        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 554
 555        /* Detect the highest partition number and preallocate
 556         * disk->part_tbl.  This is an optimization and not strictly
 557         * necessary.
 558         */
 559        for (p = 1, highest = 0; p < state->limit; p++)
 560                if (state->parts[p].size)
 561                        highest = p;
 562
 563        disk_expand_part_tbl(disk, highest);
 564
 565        /* add partitions */
 566        for (p = 1; p < state->limit; p++) {
 567                sector_t size, from;
 568
 569                size = state->parts[p].size;
 570                if (!size)
 571                        continue;
 572
 573                from = state->parts[p].from;
 574                if (from >= get_capacity(disk)) {
 575                        printk(KERN_WARNING
 576                               "%s: p%d start %llu is beyond EOD, ",
 577                               disk->disk_name, p, (unsigned long long) from);
 578                        if (disk_unlock_native_capacity(disk))
 579                                goto rescan;
 580                        continue;
 581                }
 582
 583                if (from + size > get_capacity(disk)) {
 584                        printk(KERN_WARNING
 585                               "%s: p%d size %llu extends beyond EOD, ",
 586                               disk->disk_name, p, (unsigned long long) size);
 587
 588                        if (disk_unlock_native_capacity(disk)) {
 589                                /* free state and restart */
 590                                goto rescan;
 591                        } else {
 592                                /*
 593                                 * we can not ignore partitions of broken tables
 594                                 * created by for example camera firmware, but
 595                                 * we limit them to the end of the disk to avoid
 596                                 * creating invalid block devices
 597                                 */
 598                                size = get_capacity(disk) - from;
 599                        }
 600                }
 601
 602                /*
 603                 * On a zoned block device, partitions should be aligned on the
 604                 * device zone size (i.e. zone boundary crossing not allowed).
 605                 * Otherwise, resetting the write pointer of the last zone of
 606                 * one partition may impact the following partition.
 607                 */
 608                if (bdev_is_zoned(bdev) &&
 609                    !part_zone_aligned(disk, bdev, from, size)) {
 610                        printk(KERN_WARNING
 611                               "%s: p%d start %llu+%llu is not zone aligned\n",
 612                               disk->disk_name, p, (unsigned long long) from,
 613                               (unsigned long long) size);
 614                        continue;
 615                }
 616
 617                part = add_partition(disk, p, from, size,
 618                                     state->parts[p].flags,
 619                                     &state->parts[p].info);
 620                if (IS_ERR(part)) {
 621                        printk(KERN_ERR " %s: p%d could not be added: %ld\n",
 622                               disk->disk_name, p, -PTR_ERR(part));
 623                        continue;
 624                }
 625#ifdef CONFIG_BLK_DEV_MD
 626                if (state->parts[p].flags & ADDPART_FLAG_RAID)
 627                        md_autodetect_dev(part_to_dev(part)->devt);
 628#endif
 629        }
 630        free_partitions(state);
 631        return 0;
 632}
 633
 634int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
 635{
 636        int res;
 637
 638        if (!bdev->bd_invalidated)
 639                return 0;
 640
 641        res = drop_partitions(disk, bdev);
 642        if (res)
 643                return res;
 644
 645        set_capacity(disk, 0);
 646        check_disk_size_change(disk, bdev);
 647        bdev->bd_invalidated = 0;
 648        /* tell userspace that the media / partition table may have changed */
 649        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 650
 651        return 0;
 652}
 653
 654unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 655{
 656        struct address_space *mapping = bdev->bd_inode->i_mapping;
 657        struct page *page;
 658
 659        page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
 660        if (!IS_ERR(page)) {
 661                if (PageError(page))
 662                        goto fail;
 663                p->v = page;
 664                return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
 665fail:
 666                put_page(page);
 667        }
 668        p->v = NULL;
 669        return NULL;
 670}
 671
 672EXPORT_SYMBOL(read_dev_sector);
 673