linux/block/partition-generic.c
<<
>>
Prefs
   1/*
   2 *  Code extracted from drivers/block/genhd.c
   3 *  Copyright (C) 1991-1998  Linus Torvalds
   4 *  Re-organised Feb 1998 Russell King
   5 *
   6 *  We now have independent partition support from the
   7 *  block drivers, which allows all the partition code to
   8 *  be grouped in one location, and it to be mostly self
   9 *  contained.
  10 */
  11
  12#include <linux/init.h>
  13#include <linux/module.h>
  14#include <linux/fs.h>
  15#include <linux/slab.h>
  16#include <linux/kmod.h>
  17#include <linux/ctype.h>
  18#include <linux/genhd.h>
  19#include <linux/dax.h>
  20#include <linux/blktrace_api.h>
  21
  22#include "partitions/check.h"
  23
  24#ifdef CONFIG_BLK_DEV_MD
  25extern void md_autodetect_dev(dev_t dev);
  26#endif
  27 
  28/*
  29 * disk_name() is used by partition check code and the genhd driver.
  30 * It formats the devicename of the indicated disk into
  31 * the supplied buffer (of size at least 32), and returns
  32 * a pointer to that same buffer (for convenience).
  33 */
  34
  35char *disk_name(struct gendisk *hd, int partno, char *buf)
  36{
  37        if (!partno)
  38                snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
  39        else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
  40                snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
  41        else
  42                snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
  43
  44        return buf;
  45}
  46
  47const char *bdevname(struct block_device *bdev, char *buf)
  48{
  49        return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
  50}
  51
  52EXPORT_SYMBOL(bdevname);
  53
  54/*
  55 * There's very little reason to use this, you should really
  56 * have a struct block_device just about everywhere and use
  57 * bdevname() instead.
  58 */
  59const char *__bdevname(dev_t dev, char *buffer)
  60{
  61        scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
  62                                MAJOR(dev), MINOR(dev));
  63        return buffer;
  64}
  65
  66EXPORT_SYMBOL(__bdevname);
  67
  68static ssize_t part_partition_show(struct device *dev,
  69                                   struct device_attribute *attr, char *buf)
  70{
  71        struct hd_struct *p = dev_to_part(dev);
  72
  73        return sprintf(buf, "%d\n", p->partno);
  74}
  75
  76static ssize_t part_start_show(struct device *dev,
  77                               struct device_attribute *attr, char *buf)
  78{
  79        struct hd_struct *p = dev_to_part(dev);
  80
  81        return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
  82}
  83
  84ssize_t part_size_show(struct device *dev,
  85                       struct device_attribute *attr, char *buf)
  86{
  87        struct hd_struct *p = dev_to_part(dev);
  88        return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
  89}
  90
  91static ssize_t part_ro_show(struct device *dev,
  92                            struct device_attribute *attr, char *buf)
  93{
  94        struct hd_struct *p = dev_to_part(dev);
  95        return sprintf(buf, "%d\n", p->policy ? 1 : 0);
  96}
  97
  98static ssize_t part_alignment_offset_show(struct device *dev,
  99                                          struct device_attribute *attr, char *buf)
 100{
 101        struct hd_struct *p = dev_to_part(dev);
 102        return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
 103}
 104
 105static ssize_t part_discard_alignment_show(struct device *dev,
 106                                           struct device_attribute *attr, char *buf)
 107{
 108        struct hd_struct *p = dev_to_part(dev);
 109        return sprintf(buf, "%u\n", p->discard_alignment);
 110}
 111
 112ssize_t part_stat_show(struct device *dev,
 113                       struct device_attribute *attr, char *buf)
 114{
 115        struct hd_struct *p = dev_to_part(dev);
 116        int cpu;
 117
 118        cpu = part_stat_lock();
 119        part_round_stats(cpu, p);
 120        part_stat_unlock();
 121        return sprintf(buf,
 122                "%8lu %8lu %8llu %8u "
 123                "%8lu %8lu %8llu %8u "
 124                "%8u %8u %8u"
 125                "\n",
 126                part_stat_read(p, ios[READ]),
 127                part_stat_read(p, merges[READ]),
 128                (unsigned long long)part_stat_read(p, sectors[READ]),
 129                jiffies_to_msecs(part_stat_read(p, ticks[READ])),
 130                part_stat_read(p, ios[WRITE]),
 131                part_stat_read(p, merges[WRITE]),
 132                (unsigned long long)part_stat_read(p, sectors[WRITE]),
 133                jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
 134                part_in_flight(p),
 135                jiffies_to_msecs(part_stat_read(p, io_ticks)),
 136                jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 137}
 138
 139ssize_t part_inflight_show(struct device *dev,
 140                        struct device_attribute *attr, char *buf)
 141{
 142        struct hd_struct *p = dev_to_part(dev);
 143
 144        return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
 145                atomic_read(&p->in_flight[1]));
 146}
 147
 148#ifdef CONFIG_FAIL_MAKE_REQUEST
 149ssize_t part_fail_show(struct device *dev,
 150                       struct device_attribute *attr, char *buf)
 151{
 152        struct hd_struct *p = dev_to_part(dev);
 153
 154        return sprintf(buf, "%d\n", p->make_it_fail);
 155}
 156
 157ssize_t part_fail_store(struct device *dev,
 158                        struct device_attribute *attr,
 159                        const char *buf, size_t count)
 160{
 161        struct hd_struct *p = dev_to_part(dev);
 162        int i;
 163
 164        if (count > 0 && sscanf(buf, "%d", &i) > 0)
 165                p->make_it_fail = (i == 0) ? 0 : 1;
 166
 167        return count;
 168}
 169#endif
 170
 171static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
 172static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 173static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 174static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
 175static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 176static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
 177                   NULL);
 178static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 179static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 180#ifdef CONFIG_FAIL_MAKE_REQUEST
 181static struct device_attribute dev_attr_fail =
 182        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 183#endif
 184
 185static struct attribute *part_attrs[] = {
 186        &dev_attr_partition.attr,
 187        &dev_attr_start.attr,
 188        &dev_attr_size.attr,
 189        &dev_attr_ro.attr,
 190        &dev_attr_alignment_offset.attr,
 191        &dev_attr_discard_alignment.attr,
 192        &dev_attr_stat.attr,
 193        &dev_attr_inflight.attr,
 194#ifdef CONFIG_FAIL_MAKE_REQUEST
 195        &dev_attr_fail.attr,
 196#endif
 197        NULL
 198};
 199
 200static struct attribute_group part_attr_group = {
 201        .attrs = part_attrs,
 202};
 203
 204static const struct attribute_group *part_attr_groups[] = {
 205        &part_attr_group,
 206#ifdef CONFIG_BLK_DEV_IO_TRACE
 207        &blk_trace_attr_group,
 208#endif
 209        NULL
 210};
 211
 212static void part_release(struct device *dev)
 213{
 214        struct hd_struct *p = dev_to_part(dev);
 215        blk_free_devt(dev->devt);
 216        hd_free_part(p);
 217        kfree(p);
 218}
 219
 220static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
 221{
 222        struct hd_struct *part = dev_to_part(dev);
 223
 224        add_uevent_var(env, "PARTN=%u", part->partno);
 225        if (part->info && part->info->volname[0])
 226                add_uevent_var(env, "PARTNAME=%s", part->info->volname);
 227        return 0;
 228}
 229
 230struct device_type part_type = {
 231        .name           = "partition",
 232        .groups         = part_attr_groups,
 233        .release        = part_release,
 234        .uevent         = part_uevent,
 235};
 236
 237static void delete_partition_rcu_cb(struct rcu_head *head)
 238{
 239        struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
 240
 241        part->start_sect = 0;
 242        part->nr_sects = 0;
 243        part_stat_set_all(part, 0);
 244        put_device(part_to_dev(part));
 245}
 246
 247void __delete_partition(struct percpu_ref *ref)
 248{
 249        struct hd_struct *part = container_of(ref, struct hd_struct, ref);
 250        call_rcu(&part->rcu_head, delete_partition_rcu_cb);
 251}
 252
 253void delete_partition(struct gendisk *disk, int partno)
 254{
 255        struct disk_part_tbl *ptbl = disk->part_tbl;
 256        struct hd_struct *part;
 257
 258        if (partno >= ptbl->len)
 259                return;
 260
 261        part = ptbl->part[partno];
 262        if (!part)
 263                return;
 264
 265        rcu_assign_pointer(ptbl->part[partno], NULL);
 266        rcu_assign_pointer(ptbl->last_lookup, NULL);
 267        kobject_put(part->holder_dir);
 268        device_del(part_to_dev(part));
 269
 270        hd_struct_kill(part);
 271}
 272
 273static ssize_t whole_disk_show(struct device *dev,
 274                               struct device_attribute *attr, char *buf)
 275{
 276        return 0;
 277}
 278static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
 279                   whole_disk_show, NULL);
 280
 281struct hd_struct *add_partition(struct gendisk *disk, int partno,
 282                                sector_t start, sector_t len, int flags,
 283                                struct partition_meta_info *info)
 284{
 285        struct hd_struct *p;
 286        dev_t devt = MKDEV(0, 0);
 287        struct device *ddev = disk_to_dev(disk);
 288        struct device *pdev;
 289        struct disk_part_tbl *ptbl;
 290        const char *dname;
 291        int err;
 292
 293        err = disk_expand_part_tbl(disk, partno);
 294        if (err)
 295                return ERR_PTR(err);
 296        ptbl = disk->part_tbl;
 297
 298        if (ptbl->part[partno])
 299                return ERR_PTR(-EBUSY);
 300
 301        p = kzalloc(sizeof(*p), GFP_KERNEL);
 302        if (!p)
 303                return ERR_PTR(-EBUSY);
 304
 305        if (!init_part_stats(p)) {
 306                err = -ENOMEM;
 307                goto out_free;
 308        }
 309
 310        seqcount_init(&p->nr_sects_seq);
 311        pdev = part_to_dev(p);
 312
 313        p->start_sect = start;
 314        p->alignment_offset =
 315                queue_limit_alignment_offset(&disk->queue->limits, start);
 316        p->discard_alignment =
 317                queue_limit_discard_alignment(&disk->queue->limits, start);
 318        p->nr_sects = len;
 319        p->partno = partno;
 320        p->policy = get_disk_ro(disk);
 321
 322        if (info) {
 323                struct partition_meta_info *pinfo = alloc_part_info(disk);
 324                if (!pinfo)
 325                        goto out_free_stats;
 326                memcpy(pinfo, info, sizeof(*info));
 327                p->info = pinfo;
 328        }
 329
 330        dname = dev_name(ddev);
 331        if (isdigit(dname[strlen(dname) - 1]))
 332                dev_set_name(pdev, "%sp%d", dname, partno);
 333        else
 334                dev_set_name(pdev, "%s%d", dname, partno);
 335
 336        device_initialize(pdev);
 337        pdev->class = &block_class;
 338        pdev->type = &part_type;
 339        pdev->parent = ddev;
 340
 341        err = blk_alloc_devt(p, &devt);
 342        if (err)
 343                goto out_free_info;
 344        pdev->devt = devt;
 345
 346        /* delay uevent until 'holders' subdir is created */
 347        dev_set_uevent_suppress(pdev, 1);
 348        err = device_add(pdev);
 349        if (err)
 350                goto out_put;
 351
 352        err = -ENOMEM;
 353        p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
 354        if (!p->holder_dir)
 355                goto out_del;
 356
 357        dev_set_uevent_suppress(pdev, 0);
 358        if (flags & ADDPART_FLAG_WHOLEDISK) {
 359                err = device_create_file(pdev, &dev_attr_whole_disk);
 360                if (err)
 361                        goto out_del;
 362        }
 363
 364        err = hd_ref_init(p);
 365        if (err) {
 366                if (flags & ADDPART_FLAG_WHOLEDISK)
 367                        goto out_remove_file;
 368                goto out_del;
 369        }
 370
 371        /* everything is up and running, commence */
 372        rcu_assign_pointer(ptbl->part[partno], p);
 373
 374        /* suppress uevent if the disk suppresses it */
 375        if (!dev_get_uevent_suppress(ddev))
 376                kobject_uevent(&pdev->kobj, KOBJ_ADD);
 377        return p;
 378
 379out_free_info:
 380        free_part_info(p);
 381out_free_stats:
 382        free_part_stats(p);
 383out_free:
 384        kfree(p);
 385        return ERR_PTR(err);
 386out_remove_file:
 387        device_remove_file(pdev, &dev_attr_whole_disk);
 388out_del:
 389        kobject_put(p->holder_dir);
 390        device_del(pdev);
 391out_put:
 392        put_device(pdev);
 393        blk_free_devt(devt);
 394        return ERR_PTR(err);
 395}
 396
 397static bool disk_unlock_native_capacity(struct gendisk *disk)
 398{
 399        const struct block_device_operations *bdops = disk->fops;
 400
 401        if (bdops->unlock_native_capacity &&
 402            !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
 403                printk(KERN_CONT "enabling native capacity\n");
 404                bdops->unlock_native_capacity(disk);
 405                disk->flags |= GENHD_FL_NATIVE_CAPACITY;
 406                return true;
 407        } else {
 408                printk(KERN_CONT "truncated\n");
 409                return false;
 410        }
 411}
 412
 413static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
 414{
 415        struct disk_part_iter piter;
 416        struct hd_struct *part;
 417        int res;
 418
 419        if (bdev->bd_part_count || bdev->bd_super)
 420                return -EBUSY;
 421        res = invalidate_partition(disk, 0);
 422        if (res)
 423                return res;
 424
 425        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
 426        while ((part = disk_part_iter_next(&piter)))
 427                delete_partition(disk, part->partno);
 428        disk_part_iter_exit(&piter);
 429
 430        return 0;
 431}
 432
 433int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
 434{
 435        struct parsed_partitions *state = NULL;
 436        struct hd_struct *part;
 437        int p, highest, res;
 438rescan:
 439        if (state && !IS_ERR(state)) {
 440                free_partitions(state);
 441                state = NULL;
 442        }
 443
 444        res = drop_partitions(disk, bdev);
 445        if (res)
 446                return res;
 447
 448        if (disk->fops->revalidate_disk)
 449                disk->fops->revalidate_disk(disk);
 450        blk_integrity_revalidate(disk);
 451        check_disk_size_change(disk, bdev);
 452        bdev->bd_invalidated = 0;
 453        if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
 454                return 0;
 455        if (IS_ERR(state)) {
 456                /*
 457                 * I/O error reading the partition table.  If any
 458                 * partition code tried to read beyond EOD, retry
 459                 * after unlocking native capacity.
 460                 */
 461                if (PTR_ERR(state) == -ENOSPC) {
 462                        printk(KERN_WARNING "%s: partition table beyond EOD, ",
 463                               disk->disk_name);
 464                        if (disk_unlock_native_capacity(disk))
 465                                goto rescan;
 466                }
 467                return -EIO;
 468        }
 469        /*
 470         * If any partition code tried to read beyond EOD, try
 471         * unlocking native capacity even if partition table is
 472         * successfully read as we could be missing some partitions.
 473         */
 474        if (state->access_beyond_eod) {
 475                printk(KERN_WARNING
 476                       "%s: partition table partially beyond EOD, ",
 477                       disk->disk_name);
 478                if (disk_unlock_native_capacity(disk))
 479                        goto rescan;
 480        }
 481
 482        /* tell userspace that the media / partition table may have changed */
 483        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 484
 485        /* Detect the highest partition number and preallocate
 486         * disk->part_tbl.  This is an optimization and not strictly
 487         * necessary.
 488         */
 489        for (p = 1, highest = 0; p < state->limit; p++)
 490                if (state->parts[p].size)
 491                        highest = p;
 492
 493        disk_expand_part_tbl(disk, highest);
 494
 495        /* add partitions */
 496        for (p = 1; p < state->limit; p++) {
 497                sector_t size, from;
 498
 499                size = state->parts[p].size;
 500                if (!size)
 501                        continue;
 502
 503                from = state->parts[p].from;
 504                if (from >= get_capacity(disk)) {
 505                        printk(KERN_WARNING
 506                               "%s: p%d start %llu is beyond EOD, ",
 507                               disk->disk_name, p, (unsigned long long) from);
 508                        if (disk_unlock_native_capacity(disk))
 509                                goto rescan;
 510                        continue;
 511                }
 512
 513                if (from + size > get_capacity(disk)) {
 514                        printk(KERN_WARNING
 515                               "%s: p%d size %llu extends beyond EOD, ",
 516                               disk->disk_name, p, (unsigned long long) size);
 517
 518                        if (disk_unlock_native_capacity(disk)) {
 519                                /* free state and restart */
 520                                goto rescan;
 521                        } else {
 522                                /*
 523                                 * we can not ignore partitions of broken tables
 524                                 * created by for example camera firmware, but
 525                                 * we limit them to the end of the disk to avoid
 526                                 * creating invalid block devices
 527                                 */
 528                                size = get_capacity(disk) - from;
 529                        }
 530                }
 531
 532                part = add_partition(disk, p, from, size,
 533                                     state->parts[p].flags,
 534                                     &state->parts[p].info);
 535                if (IS_ERR(part)) {
 536                        printk(KERN_ERR " %s: p%d could not be added: %ld\n",
 537                               disk->disk_name, p, -PTR_ERR(part));
 538                        continue;
 539                }
 540#ifdef CONFIG_BLK_DEV_MD
 541                if (state->parts[p].flags & ADDPART_FLAG_RAID)
 542                        md_autodetect_dev(part_to_dev(part)->devt);
 543#endif
 544        }
 545        free_partitions(state);
 546        return 0;
 547}
 548
 549int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
 550{
 551        int res;
 552
 553        if (!bdev->bd_invalidated)
 554                return 0;
 555
 556        res = drop_partitions(disk, bdev);
 557        if (res)
 558                return res;
 559
 560        set_capacity(disk, 0);
 561        check_disk_size_change(disk, bdev);
 562        bdev->bd_invalidated = 0;
 563        /* tell userspace that the media / partition table may have changed */
 564        kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
 565
 566        return 0;
 567}
 568
 569static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
 570{
 571        struct address_space *mapping = bdev->bd_inode->i_mapping;
 572
 573        return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
 574                                 NULL);
 575}
 576
 577unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
 578{
 579        struct page *page;
 580
 581        /* don't populate page cache for dax capable devices */
 582        if (IS_DAX(bdev->bd_inode))
 583                page = read_dax_sector(bdev, n);
 584        else
 585                page = read_pagecache_sector(bdev, n);
 586
 587        if (!IS_ERR(page)) {
 588                if (PageError(page))
 589                        goto fail;
 590                p->v = page;
 591                return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
 592fail:
 593                put_page(page);
 594        }
 595        p->v = NULL;
 596        return NULL;
 597}
 598
 599EXPORT_SYMBOL(read_dev_sector);
 600