linux/block/genhd.c
<<
>>
Prefs
   1/*
   2 *  gendisk handling
   3 */
   4
   5#include <linux/module.h>
   6#include <linux/fs.h>
   7#include <linux/genhd.h>
   8#include <linux/kdev_t.h>
   9#include <linux/kernel.h>
  10#include <linux/blkdev.h>
  11#include <linux/init.h>
  12#include <linux/spinlock.h>
  13#include <linux/proc_fs.h>
  14#include <linux/seq_file.h>
  15#include <linux/slab.h>
  16#include <linux/kmod.h>
  17#include <linux/kobj_map.h>
  18#include <linux/buffer_head.h>
  19#include <linux/mutex.h>
  20#include <linux/idr.h>
  21
  22#include "blk.h"
  23
  24static DEFINE_MUTEX(block_class_lock);
  25#ifndef CONFIG_SYSFS_DEPRECATED
  26struct kobject *block_depr;
  27#endif
  28
  29/* for extended dynamic devt allocation, currently only one major is used */
  30#define MAX_EXT_DEVT            (1 << MINORBITS)
  31
  32/* For extended devt allocation.  ext_devt_mutex prevents look up
  33 * results from going away underneath its user.
  34 */
  35static DEFINE_MUTEX(ext_devt_mutex);
  36static DEFINE_IDR(ext_devt_idr);
  37
  38static struct device_type disk_type;
  39
  40/**
  41 * disk_get_part - get partition
  42 * @disk: disk to look partition from
  43 * @partno: partition number
  44 *
  45 * Look for partition @partno from @disk.  If found, increment
  46 * reference count and return it.
  47 *
  48 * CONTEXT:
  49 * Don't care.
  50 *
  51 * RETURNS:
  52 * Pointer to the found partition on success, NULL if not found.
  53 */
  54struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
  55{
  56        struct hd_struct *part = NULL;
  57        struct disk_part_tbl *ptbl;
  58
  59        if (unlikely(partno < 0))
  60                return NULL;
  61
  62        rcu_read_lock();
  63
  64        ptbl = rcu_dereference(disk->part_tbl);
  65        if (likely(partno < ptbl->len)) {
  66                part = rcu_dereference(ptbl->part[partno]);
  67                if (part)
  68                        get_device(part_to_dev(part));
  69        }
  70
  71        rcu_read_unlock();
  72
  73        return part;
  74}
  75EXPORT_SYMBOL_GPL(disk_get_part);
  76
  77/**
  78 * disk_part_iter_init - initialize partition iterator
  79 * @piter: iterator to initialize
  80 * @disk: disk to iterate over
  81 * @flags: DISK_PITER_* flags
  82 *
  83 * Initialize @piter so that it iterates over partitions of @disk.
  84 *
  85 * CONTEXT:
  86 * Don't care.
  87 */
  88void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
  89                          unsigned int flags)
  90{
  91        struct disk_part_tbl *ptbl;
  92
  93        rcu_read_lock();
  94        ptbl = rcu_dereference(disk->part_tbl);
  95
  96        piter->disk = disk;
  97        piter->part = NULL;
  98
  99        if (flags & DISK_PITER_REVERSE)
 100                piter->idx = ptbl->len - 1;
 101        else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0))
 102                piter->idx = 0;
 103        else
 104                piter->idx = 1;
 105
 106        piter->flags = flags;
 107
 108        rcu_read_unlock();
 109}
 110EXPORT_SYMBOL_GPL(disk_part_iter_init);
 111
 112/**
 113 * disk_part_iter_next - proceed iterator to the next partition and return it
 114 * @piter: iterator of interest
 115 *
 116 * Proceed @piter to the next partition and return it.
 117 *
 118 * CONTEXT:
 119 * Don't care.
 120 */
 121struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
 122{
 123        struct disk_part_tbl *ptbl;
 124        int inc, end;
 125
 126        /* put the last partition */
 127        disk_put_part(piter->part);
 128        piter->part = NULL;
 129
 130        /* get part_tbl */
 131        rcu_read_lock();
 132        ptbl = rcu_dereference(piter->disk->part_tbl);
 133
 134        /* determine iteration parameters */
 135        if (piter->flags & DISK_PITER_REVERSE) {
 136                inc = -1;
 137                if (piter->flags & (DISK_PITER_INCL_PART0 |
 138                                    DISK_PITER_INCL_EMPTY_PART0))
 139                        end = -1;
 140                else
 141                        end = 0;
 142        } else {
 143                inc = 1;
 144                end = ptbl->len;
 145        }
 146
 147        /* iterate to the next partition */
 148        for (; piter->idx != end; piter->idx += inc) {
 149                struct hd_struct *part;
 150
 151                part = rcu_dereference(ptbl->part[piter->idx]);
 152                if (!part)
 153                        continue;
 154                if (!part->nr_sects &&
 155                    !(piter->flags & DISK_PITER_INCL_EMPTY) &&
 156                    !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
 157                      piter->idx == 0))
 158                        continue;
 159
 160                get_device(part_to_dev(part));
 161                piter->part = part;
 162                piter->idx += inc;
 163                break;
 164        }
 165
 166        rcu_read_unlock();
 167
 168        return piter->part;
 169}
 170EXPORT_SYMBOL_GPL(disk_part_iter_next);
 171
 172/**
 173 * disk_part_iter_exit - finish up partition iteration
 174 * @piter: iter of interest
 175 *
 176 * Called when iteration is over.  Cleans up @piter.
 177 *
 178 * CONTEXT:
 179 * Don't care.
 180 */
 181void disk_part_iter_exit(struct disk_part_iter *piter)
 182{
 183        disk_put_part(piter->part);
 184        piter->part = NULL;
 185}
 186EXPORT_SYMBOL_GPL(disk_part_iter_exit);
 187
 188static inline int sector_in_part(struct hd_struct *part, sector_t sector)
 189{
 190        return part->start_sect <= sector &&
 191                sector < part->start_sect + part->nr_sects;
 192}
 193
 194/**
 195 * disk_map_sector_rcu - map sector to partition
 196 * @disk: gendisk of interest
 197 * @sector: sector to map
 198 *
 199 * Find out which partition @sector maps to on @disk.  This is
 200 * primarily used for stats accounting.
 201 *
 202 * CONTEXT:
 203 * RCU read locked.  The returned partition pointer is valid only
 204 * while preemption is disabled.
 205 *
 206 * RETURNS:
 207 * Found partition on success, part0 is returned if no partition matches
 208 */
 209struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
 210{
 211        struct disk_part_tbl *ptbl;
 212        struct hd_struct *part;
 213        int i;
 214
 215        ptbl = rcu_dereference(disk->part_tbl);
 216
 217        part = rcu_dereference(ptbl->last_lookup);
 218        if (part && sector_in_part(part, sector))
 219                return part;
 220
 221        for (i = 1; i < ptbl->len; i++) {
 222                part = rcu_dereference(ptbl->part[i]);
 223
 224                if (part && sector_in_part(part, sector)) {
 225                        rcu_assign_pointer(ptbl->last_lookup, part);
 226                        return part;
 227                }
 228        }
 229        return &disk->part0;
 230}
 231EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
 232
 233/*
 234 * Can be deleted altogether. Later.
 235 *
 236 */
 237static struct blk_major_name {
 238        struct blk_major_name *next;
 239        int major;
 240        char name[16];
 241} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 242
 243/* index in the above - for now: assume no multimajor ranges */
 244static inline int major_to_index(int major)
 245{
 246        return major % BLKDEV_MAJOR_HASH_SIZE;
 247}
 248
 249#ifdef CONFIG_PROC_FS
 250void blkdev_show(struct seq_file *seqf, off_t offset)
 251{
 252        struct blk_major_name *dp;
 253
 254        if (offset < BLKDEV_MAJOR_HASH_SIZE) {
 255                mutex_lock(&block_class_lock);
 256                for (dp = major_names[offset]; dp; dp = dp->next)
 257                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 258                mutex_unlock(&block_class_lock);
 259        }
 260}
 261#endif /* CONFIG_PROC_FS */
 262
 263/**
 264 * register_blkdev - register a new block device
 265 *
 266 * @major: the requested major device number [1..255]. If @major=0, try to
 267 *         allocate any unused major number.
 268 * @name: the name of the new block device as a zero terminated string
 269 *
 270 * The @name must be unique within the system.
 271 *
 272 * The return value depends on the @major input parameter.
 273 *  - if a major device number was requested in range [1..255] then the
 274 *    function returns zero on success, or a negative error code
 275 *  - if any unused major number was requested with @major=0 parameter
 276 *    then the return value is the allocated major number in range
 277 *    [1..255] or a negative error code otherwise
 278 */
 279int register_blkdev(unsigned int major, const char *name)
 280{
 281        struct blk_major_name **n, *p;
 282        int index, ret = 0;
 283
 284        mutex_lock(&block_class_lock);
 285
 286        /* temporary */
 287        if (major == 0) {
 288                for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
 289                        if (major_names[index] == NULL)
 290                                break;
 291                }
 292
 293                if (index == 0) {
 294                        printk("register_blkdev: failed to get major for %s\n",
 295                               name);
 296                        ret = -EBUSY;
 297                        goto out;
 298                }
 299                major = index;
 300                ret = major;
 301        }
 302
 303        p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
 304        if (p == NULL) {
 305                ret = -ENOMEM;
 306                goto out;
 307        }
 308
 309        p->major = major;
 310        strlcpy(p->name, name, sizeof(p->name));
 311        p->next = NULL;
 312        index = major_to_index(major);
 313
 314        for (n = &major_names[index]; *n; n = &(*n)->next) {
 315                if ((*n)->major == major)
 316                        break;
 317        }
 318        if (!*n)
 319                *n = p;
 320        else
 321                ret = -EBUSY;
 322
 323        if (ret < 0) {
 324                printk("register_blkdev: cannot get major %d for %s\n",
 325                       major, name);
 326                kfree(p);
 327        }
 328out:
 329        mutex_unlock(&block_class_lock);
 330        return ret;
 331}
 332
 333EXPORT_SYMBOL(register_blkdev);
 334
 335void unregister_blkdev(unsigned int major, const char *name)
 336{
 337        struct blk_major_name **n;
 338        struct blk_major_name *p = NULL;
 339        int index = major_to_index(major);
 340
 341        mutex_lock(&block_class_lock);
 342        for (n = &major_names[index]; *n; n = &(*n)->next)
 343                if ((*n)->major == major)
 344                        break;
 345        if (!*n || strcmp((*n)->name, name)) {
 346                WARN_ON(1);
 347        } else {
 348                p = *n;
 349                *n = p->next;
 350        }
 351        mutex_unlock(&block_class_lock);
 352        kfree(p);
 353}
 354
 355EXPORT_SYMBOL(unregister_blkdev);
 356
 357static struct kobj_map *bdev_map;
 358
 359/**
 360 * blk_mangle_minor - scatter minor numbers apart
 361 * @minor: minor number to mangle
 362 *
 363 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
 364 * is enabled.  Mangling twice gives the original value.
 365 *
 366 * RETURNS:
 367 * Mangled value.
 368 *
 369 * CONTEXT:
 370 * Don't care.
 371 */
 372static int blk_mangle_minor(int minor)
 373{
 374#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
 375        int i;
 376
 377        for (i = 0; i < MINORBITS / 2; i++) {
 378                int low = minor & (1 << i);
 379                int high = minor & (1 << (MINORBITS - 1 - i));
 380                int distance = MINORBITS - 1 - 2 * i;
 381
 382                minor ^= low | high;    /* clear both bits */
 383                low <<= distance;       /* swap the positions */
 384                high >>= distance;
 385                minor |= low | high;    /* and set */
 386        }
 387#endif
 388        return minor;
 389}
 390
 391/**
 392 * blk_alloc_devt - allocate a dev_t for a partition
 393 * @part: partition to allocate dev_t for
 394 * @devt: out parameter for resulting dev_t
 395 *
 396 * Allocate a dev_t for block device.
 397 *
 398 * RETURNS:
 399 * 0 on success, allocated dev_t is returned in *@devt.  -errno on
 400 * failure.
 401 *
 402 * CONTEXT:
 403 * Might sleep.
 404 */
 405int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 406{
 407        struct gendisk *disk = part_to_disk(part);
 408        int idx, rc;
 409
 410        /* in consecutive minor range? */
 411        if (part->partno < disk->minors) {
 412                *devt = MKDEV(disk->major, disk->first_minor + part->partno);
 413                return 0;
 414        }
 415
 416        /* allocate ext devt */
 417        do {
 418                if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
 419                        return -ENOMEM;
 420                rc = idr_get_new(&ext_devt_idr, part, &idx);
 421        } while (rc == -EAGAIN);
 422
 423        if (rc)
 424                return rc;
 425
 426        if (idx > MAX_EXT_DEVT) {
 427                idr_remove(&ext_devt_idr, idx);
 428                return -EBUSY;
 429        }
 430
 431        *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
 432        return 0;
 433}
 434
 435/**
 436 * blk_free_devt - free a dev_t
 437 * @devt: dev_t to free
 438 *
 439 * Free @devt which was allocated using blk_alloc_devt().
 440 *
 441 * CONTEXT:
 442 * Might sleep.
 443 */
 444void blk_free_devt(dev_t devt)
 445{
 446        might_sleep();
 447
 448        if (devt == MKDEV(0, 0))
 449                return;
 450
 451        if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
 452                mutex_lock(&ext_devt_mutex);
 453                idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 454                mutex_unlock(&ext_devt_mutex);
 455        }
 456}
 457
 458static char *bdevt_str(dev_t devt, char *buf)
 459{
 460        if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
 461                char tbuf[BDEVT_SIZE];
 462                snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
 463                snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
 464        } else
 465                snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
 466
 467        return buf;
 468}
 469
 470/*
 471 * Register device numbers dev..(dev+range-1)
 472 * range must be nonzero
 473 * The hash chain is sorted on range, so that subranges can override.
 474 */
 475void blk_register_region(dev_t devt, unsigned long range, struct module *module,
 476                         struct kobject *(*probe)(dev_t, int *, void *),
 477                         int (*lock)(dev_t, void *), void *data)
 478{
 479        kobj_map(bdev_map, devt, range, module, probe, lock, data);
 480}
 481
 482EXPORT_SYMBOL(blk_register_region);
 483
 484void blk_unregister_region(dev_t devt, unsigned long range)
 485{
 486        kobj_unmap(bdev_map, devt, range);
 487}
 488
 489EXPORT_SYMBOL(blk_unregister_region);
 490
 491static struct kobject *exact_match(dev_t devt, int *partno, void *data)
 492{
 493        struct gendisk *p = data;
 494
 495        return &disk_to_dev(p)->kobj;
 496}
 497
 498static int exact_lock(dev_t devt, void *data)
 499{
 500        struct gendisk *p = data;
 501
 502        if (!get_disk(p))
 503                return -1;
 504        return 0;
 505}
 506
 507/**
 508 * add_disk - add partitioning information to kernel list
 509 * @disk: per-device partitioning information
 510 *
 511 * This function registers the partitioning information in @disk
 512 * with the kernel.
 513 *
 514 * FIXME: error handling
 515 */
 516void add_disk(struct gendisk *disk)
 517{
 518        struct backing_dev_info *bdi;
 519        dev_t devt;
 520        int retval;
 521
 522        /* minors == 0 indicates to use ext devt from part0 and should
 523         * be accompanied with EXT_DEVT flag.  Make sure all
 524         * parameters make sense.
 525         */
 526        WARN_ON(disk->minors && !(disk->major || disk->first_minor));
 527        WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
 528
 529        disk->flags |= GENHD_FL_UP;
 530
 531        retval = blk_alloc_devt(&disk->part0, &devt);
 532        if (retval) {
 533                WARN_ON(1);
 534                return;
 535        }
 536        disk_to_dev(disk)->devt = devt;
 537
 538        /* ->major and ->first_minor aren't supposed to be
 539         * dereferenced from here on, but set them just in case.
 540         */
 541        disk->major = MAJOR(devt);
 542        disk->first_minor = MINOR(devt);
 543
 544        blk_register_region(disk_devt(disk), disk->minors, NULL,
 545                            exact_match, exact_lock, disk);
 546        register_disk(disk);
 547        blk_register_queue(disk);
 548
 549        bdi = &disk->queue->backing_dev_info;
 550        bdi_register_dev(bdi, disk_devt(disk));
 551        retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
 552                                   "bdi");
 553        WARN_ON(retval);
 554}
 555
 556EXPORT_SYMBOL(add_disk);
 557EXPORT_SYMBOL(del_gendisk);     /* in partitions/check.c */
 558
 559void unlink_gendisk(struct gendisk *disk)
 560{
 561        sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 562        bdi_unregister(&disk->queue->backing_dev_info);
 563        blk_unregister_queue(disk);
 564        blk_unregister_region(disk_devt(disk), disk->minors);
 565}
 566
 567/**
 568 * get_gendisk - get partitioning information for a given device
 569 * @devt: device to get partitioning information for
 570 * @partno: returned partition index
 571 *
 572 * This function gets the structure containing partitioning
 573 * information for the given device @devt.
 574 */
 575struct gendisk *get_gendisk(dev_t devt, int *partno)
 576{
 577        struct gendisk *disk = NULL;
 578
 579        if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
 580                struct kobject *kobj;
 581
 582                kobj = kobj_lookup(bdev_map, devt, partno);
 583                if (kobj)
 584                        disk = dev_to_disk(kobj_to_dev(kobj));
 585        } else {
 586                struct hd_struct *part;
 587
 588                mutex_lock(&ext_devt_mutex);
 589                part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 590                if (part && get_disk(part_to_disk(part))) {
 591                        *partno = part->partno;
 592                        disk = part_to_disk(part);
 593                }
 594                mutex_unlock(&ext_devt_mutex);
 595        }
 596
 597        return disk;
 598}
 599
 600/**
 601 * bdget_disk - do bdget() by gendisk and partition number
 602 * @disk: gendisk of interest
 603 * @partno: partition number
 604 *
 605 * Find partition @partno from @disk, do bdget() on it.
 606 *
 607 * CONTEXT:
 608 * Don't care.
 609 *
 610 * RETURNS:
 611 * Resulting block_device on success, NULL on failure.
 612 */
 613struct block_device *bdget_disk(struct gendisk *disk, int partno)
 614{
 615        struct hd_struct *part;
 616        struct block_device *bdev = NULL;
 617
 618        part = disk_get_part(disk, partno);
 619        if (part)
 620                bdev = bdget(part_devt(part));
 621        disk_put_part(part);
 622
 623        return bdev;
 624}
 625EXPORT_SYMBOL(bdget_disk);
 626
 627/*
 628 * print a full list of all partitions - intended for places where the root
 629 * filesystem can't be mounted and thus to give the victim some idea of what
 630 * went wrong
 631 */
 632void __init printk_all_partitions(void)
 633{
 634        struct class_dev_iter iter;
 635        struct device *dev;
 636
 637        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 638        while ((dev = class_dev_iter_next(&iter))) {
 639                struct gendisk *disk = dev_to_disk(dev);
 640                struct disk_part_iter piter;
 641                struct hd_struct *part;
 642                char name_buf[BDEVNAME_SIZE];
 643                char devt_buf[BDEVT_SIZE];
 644
 645                /*
 646                 * Don't show empty devices or things that have been
 647                 * surpressed
 648                 */
 649                if (get_capacity(disk) == 0 ||
 650                    (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
 651                        continue;
 652
 653                /*
 654                 * Note, unlike /proc/partitions, I am showing the
 655                 * numbers in hex - the same format as the root=
 656                 * option takes.
 657                 */
 658                disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
 659                while ((part = disk_part_iter_next(&piter))) {
 660                        bool is_part0 = part == &disk->part0;
 661
 662                        printk("%s%s %10llu %s", is_part0 ? "" : "  ",
 663                               bdevt_str(part_devt(part), devt_buf),
 664                               (unsigned long long)part->nr_sects >> 1,
 665                               disk_name(disk, part->partno, name_buf));
 666                        if (is_part0) {
 667                                if (disk->driverfs_dev != NULL &&
 668                                    disk->driverfs_dev->driver != NULL)
 669                                        printk(" driver: %s\n",
 670                                              disk->driverfs_dev->driver->name);
 671                                else
 672                                        printk(" (driver?)\n");
 673                        } else
 674                                printk("\n");
 675                }
 676                disk_part_iter_exit(&piter);
 677        }
 678        class_dev_iter_exit(&iter);
 679}
 680
 681#ifdef CONFIG_PROC_FS
 682/* iterator */
 683static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 684{
 685        loff_t skip = *pos;
 686        struct class_dev_iter *iter;
 687        struct device *dev;
 688
 689        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 690        if (!iter)
 691                return ERR_PTR(-ENOMEM);
 692
 693        seqf->private = iter;
 694        class_dev_iter_init(iter, &block_class, NULL, &disk_type);
 695        do {
 696                dev = class_dev_iter_next(iter);
 697                if (!dev)
 698                        return NULL;
 699        } while (skip--);
 700
 701        return dev_to_disk(dev);
 702}
 703
 704static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 705{
 706        struct device *dev;
 707
 708        (*pos)++;
 709        dev = class_dev_iter_next(seqf->private);
 710        if (dev)
 711                return dev_to_disk(dev);
 712
 713        return NULL;
 714}
 715
 716static void disk_seqf_stop(struct seq_file *seqf, void *v)
 717{
 718        struct class_dev_iter *iter = seqf->private;
 719
 720        /* stop is called even after start failed :-( */
 721        if (iter) {
 722                class_dev_iter_exit(iter);
 723                kfree(iter);
 724        }
 725}
 726
 727static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 728{
 729        static void *p;
 730
 731        p = disk_seqf_start(seqf, pos);
 732        if (!IS_ERR(p) && p && !*pos)
 733                seq_puts(seqf, "major minor  #blocks  name\n\n");
 734        return p;
 735}
 736
 737static int show_partition(struct seq_file *seqf, void *v)
 738{
 739        struct gendisk *sgp = v;
 740        struct disk_part_iter piter;
 741        struct hd_struct *part;
 742        char buf[BDEVNAME_SIZE];
 743
 744        /* Don't show non-partitionable removeable devices or empty devices */
 745        if (!get_capacity(sgp) || (!disk_partitionable(sgp) &&
 746                                   (sgp->flags & GENHD_FL_REMOVABLE)))
 747                return 0;
 748        if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
 749                return 0;
 750
 751        /* show the full disk and all non-0 size partitions of it */
 752        disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0);
 753        while ((part = disk_part_iter_next(&piter)))
 754                seq_printf(seqf, "%4d  %7d %10llu %s\n",
 755                           MAJOR(part_devt(part)), MINOR(part_devt(part)),
 756                           (unsigned long long)part->nr_sects >> 1,
 757                           disk_name(sgp, part->partno, buf));
 758        disk_part_iter_exit(&piter);
 759
 760        return 0;
 761}
 762
 763static const struct seq_operations partitions_op = {
 764        .start  = show_partition_start,
 765        .next   = disk_seqf_next,
 766        .stop   = disk_seqf_stop,
 767        .show   = show_partition
 768};
 769
 770static int partitions_open(struct inode *inode, struct file *file)
 771{
 772        return seq_open(file, &partitions_op);
 773}
 774
 775static const struct file_operations proc_partitions_operations = {
 776        .open           = partitions_open,
 777        .read           = seq_read,
 778        .llseek         = seq_lseek,
 779        .release        = seq_release,
 780};
 781#endif
 782
 783
 784static struct kobject *base_probe(dev_t devt, int *partno, void *data)
 785{
 786        if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 787                /* Make old-style 2.4 aliases work */
 788                request_module("block-major-%d", MAJOR(devt));
 789        return NULL;
 790}
 791
 792static int __init genhd_device_init(void)
 793{
 794        int error;
 795
 796        block_class.dev_kobj = sysfs_dev_block_kobj;
 797        error = class_register(&block_class);
 798        if (unlikely(error))
 799                return error;
 800        bdev_map = kobj_map_init(base_probe, &block_class_lock);
 801        blk_dev_init();
 802
 803        register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 804
 805#ifndef CONFIG_SYSFS_DEPRECATED
 806        /* create top-level block dir */
 807        block_depr = kobject_create_and_add("block", NULL);
 808#endif
 809        return 0;
 810}
 811
 812subsys_initcall(genhd_device_init);
 813
 814static ssize_t disk_range_show(struct device *dev,
 815                               struct device_attribute *attr, char *buf)
 816{
 817        struct gendisk *disk = dev_to_disk(dev);
 818
 819        return sprintf(buf, "%d\n", disk->minors);
 820}
 821
 822static ssize_t disk_ext_range_show(struct device *dev,
 823                                   struct device_attribute *attr, char *buf)
 824{
 825        struct gendisk *disk = dev_to_disk(dev);
 826
 827        return sprintf(buf, "%d\n", disk_max_parts(disk));
 828}
 829
 830static ssize_t disk_removable_show(struct device *dev,
 831                                   struct device_attribute *attr, char *buf)
 832{
 833        struct gendisk *disk = dev_to_disk(dev);
 834
 835        return sprintf(buf, "%d\n",
 836                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 837}
 838
 839static ssize_t disk_ro_show(struct device *dev,
 840                                   struct device_attribute *attr, char *buf)
 841{
 842        struct gendisk *disk = dev_to_disk(dev);
 843
 844        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 845}
 846
 847static ssize_t disk_capability_show(struct device *dev,
 848                                    struct device_attribute *attr, char *buf)
 849{
 850        struct gendisk *disk = dev_to_disk(dev);
 851
 852        return sprintf(buf, "%x\n", disk->flags);
 853}
 854
 855static ssize_t disk_alignment_offset_show(struct device *dev,
 856                                          struct device_attribute *attr,
 857                                          char *buf)
 858{
 859        struct gendisk *disk = dev_to_disk(dev);
 860
 861        return sprintf(buf, "%d\n", queue_alignment_offset(disk->queue));
 862}
 863
 864static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 865static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
 866static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
 867static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 868static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 872static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 873#ifdef CONFIG_FAIL_MAKE_REQUEST
 874static struct device_attribute dev_attr_fail =
 875        __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 876#endif
 877#ifdef CONFIG_FAIL_IO_TIMEOUT
 878static struct device_attribute dev_attr_fail_timeout =
 879        __ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
 880                part_timeout_store);
 881#endif
 882
 883static struct attribute *disk_attrs[] = {
 884        &dev_attr_range.attr,
 885        &dev_attr_ext_range.attr,
 886        &dev_attr_removable.attr,
 887        &dev_attr_ro.attr,
 888        &dev_attr_size.attr,
 889        &dev_attr_alignment_offset.attr,
 890        &dev_attr_capability.attr,
 891        &dev_attr_stat.attr,
 892        &dev_attr_inflight.attr,
 893#ifdef CONFIG_FAIL_MAKE_REQUEST
 894        &dev_attr_fail.attr,
 895#endif
 896#ifdef CONFIG_FAIL_IO_TIMEOUT
 897        &dev_attr_fail_timeout.attr,
 898#endif
 899        NULL
 900};
 901
 902static struct attribute_group disk_attr_group = {
 903        .attrs = disk_attrs,
 904};
 905
 906static const struct attribute_group *disk_attr_groups[] = {
 907        &disk_attr_group,
 908        NULL
 909};
 910
 911static void disk_free_ptbl_rcu_cb(struct rcu_head *head)
 912{
 913        struct disk_part_tbl *ptbl =
 914                container_of(head, struct disk_part_tbl, rcu_head);
 915
 916        kfree(ptbl);
 917}
 918
 919/**
 920 * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way
 921 * @disk: disk to replace part_tbl for
 922 * @new_ptbl: new part_tbl to install
 923 *
 924 * Replace disk->part_tbl with @new_ptbl in RCU-safe way.  The
 925 * original ptbl is freed using RCU callback.
 926 *
 927 * LOCKING:
 928 * Matching bd_mutx locked.
 929 */
 930static void disk_replace_part_tbl(struct gendisk *disk,
 931                                  struct disk_part_tbl *new_ptbl)
 932{
 933        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 934
 935        rcu_assign_pointer(disk->part_tbl, new_ptbl);
 936
 937        if (old_ptbl) {
 938                rcu_assign_pointer(old_ptbl->last_lookup, NULL);
 939                call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb);
 940        }
 941}
 942
 943/**
 944 * disk_expand_part_tbl - expand disk->part_tbl
 945 * @disk: disk to expand part_tbl for
 946 * @partno: expand such that this partno can fit in
 947 *
 948 * Expand disk->part_tbl such that @partno can fit in.  disk->part_tbl
 949 * uses RCU to allow unlocked dereferencing for stats and other stuff.
 950 *
 951 * LOCKING:
 952 * Matching bd_mutex locked, might sleep.
 953 *
 954 * RETURNS:
 955 * 0 on success, -errno on failure.
 956 */
 957int disk_expand_part_tbl(struct gendisk *disk, int partno)
 958{
 959        struct disk_part_tbl *old_ptbl = disk->part_tbl;
 960        struct disk_part_tbl *new_ptbl;
 961        int len = old_ptbl ? old_ptbl->len : 0;
 962        int target = partno + 1;
 963        size_t size;
 964        int i;
 965
 966        /* disk_max_parts() is zero during initialization, ignore if so */
 967        if (disk_max_parts(disk) && target > disk_max_parts(disk))
 968                return -EINVAL;
 969
 970        if (target <= len)
 971                return 0;
 972
 973        size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]);
 974        new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id);
 975        if (!new_ptbl)
 976                return -ENOMEM;
 977
 978        INIT_RCU_HEAD(&new_ptbl->rcu_head);
 979        new_ptbl->len = target;
 980
 981        for (i = 0; i < len; i++)
 982                rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]);
 983
 984        disk_replace_part_tbl(disk, new_ptbl);
 985        return 0;
 986}
 987
 988static void disk_release(struct device *dev)
 989{
 990        struct gendisk *disk = dev_to_disk(dev);
 991
 992        kfree(disk->random);
 993        disk_replace_part_tbl(disk, NULL);
 994        free_part_stats(&disk->part0);
 995        kfree(disk);
 996}
 997struct class block_class = {
 998        .name           = "block",
 999};
1000
1001static char *block_devnode(struct device *dev, mode_t *mode)
1002{
1003        struct gendisk *disk = dev_to_disk(dev);
1004
1005        if (disk->devnode)
1006                return disk->devnode(disk, mode);
1007        return NULL;
1008}
1009
1010static struct device_type disk_type = {
1011        .name           = "disk",
1012        .groups         = disk_attr_groups,
1013        .release        = disk_release,
1014        .devnode        = block_devnode,
1015};
1016
1017#ifdef CONFIG_PROC_FS
1018/*
1019 * aggregate disk stat collector.  Uses the same stats that the sysfs
1020 * entries do, above, but makes them available through one seq_file.
1021 *
1022 * The output looks suspiciously like /proc/partitions with a bunch of
1023 * extra fields.
1024 */
1025static int diskstats_show(struct seq_file *seqf, void *v)
1026{
1027        struct gendisk *gp = v;
1028        struct disk_part_iter piter;
1029        struct hd_struct *hd;
1030        char buf[BDEVNAME_SIZE];
1031        int cpu;
1032
1033        /*
1034        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1035                seq_puts(seqf,  "major minor name"
1036                                "     rio rmerge rsect ruse wio wmerge "
1037                                "wsect wuse running use aveq"
1038                                "\n\n");
1039        */
1040 
1041        disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
1042        while ((hd = disk_part_iter_next(&piter))) {
1043                cpu = part_stat_lock();
1044                part_round_stats(cpu, hd);
1045                part_stat_unlock();
1046                seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
1047                           "%u %lu %lu %llu %u %u %u %u\n",
1048                           MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
1049                           disk_name(gp, hd->partno, buf),
1050                           part_stat_read(hd, ios[0]),
1051                           part_stat_read(hd, merges[0]),
1052                           (unsigned long long)part_stat_read(hd, sectors[0]),
1053                           jiffies_to_msecs(part_stat_read(hd, ticks[0])),
1054                           part_stat_read(hd, ios[1]),
1055                           part_stat_read(hd, merges[1]),
1056                           (unsigned long long)part_stat_read(hd, sectors[1]),
1057                           jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1058                           part_in_flight(hd),
1059                           jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1060                           jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1061                        );
1062        }
1063        disk_part_iter_exit(&piter);
1064 
1065        return 0;
1066}
1067
1068static const struct seq_operations diskstats_op = {
1069        .start  = disk_seqf_start,
1070        .next   = disk_seqf_next,
1071        .stop   = disk_seqf_stop,
1072        .show   = diskstats_show
1073};
1074
1075static int diskstats_open(struct inode *inode, struct file *file)
1076{
1077        return seq_open(file, &diskstats_op);
1078}
1079
1080static const struct file_operations proc_diskstats_operations = {
1081        .open           = diskstats_open,
1082        .read           = seq_read,
1083        .llseek         = seq_lseek,
1084        .release        = seq_release,
1085};
1086
1087static int __init proc_genhd_init(void)
1088{
1089        proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
1090        proc_create("partitions", 0, NULL, &proc_partitions_operations);
1091        return 0;
1092}
1093module_init(proc_genhd_init);
1094#endif /* CONFIG_PROC_FS */
1095
1096static void media_change_notify_thread(struct work_struct *work)
1097{
1098        struct gendisk *gd = container_of(work, struct gendisk, async_notify);
1099        char event[] = "MEDIA_CHANGE=1";
1100        char *envp[] = { event, NULL };
1101
1102        /*
1103         * set enviroment vars to indicate which event this is for
1104         * so that user space will know to go check the media status.
1105         */
1106        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1107        put_device(gd->driverfs_dev);
1108}
1109
1110#if 0
1111void genhd_media_change_notify(struct gendisk *disk)
1112{
1113        get_device(disk->driverfs_dev);
1114        schedule_work(&disk->async_notify);
1115}
1116EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1117#endif  /*  0  */
1118
1119dev_t blk_lookup_devt(const char *name, int partno)
1120{
1121        dev_t devt = MKDEV(0, 0);
1122        struct class_dev_iter iter;
1123        struct device *dev;
1124
1125        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1126        while ((dev = class_dev_iter_next(&iter))) {
1127                struct gendisk *disk = dev_to_disk(dev);
1128                struct hd_struct *part;
1129
1130                if (strcmp(dev_name(dev), name))
1131                        continue;
1132
1133                if (partno < disk->minors) {
1134                        /* We need to return the right devno, even
1135                         * if the partition doesn't exist yet.
1136                         */
1137                        devt = MKDEV(MAJOR(dev->devt),
1138                                     MINOR(dev->devt) + partno);
1139                        break;
1140                }
1141                part = disk_get_part(disk, partno);
1142                if (part) {
1143                        devt = part_devt(part);
1144                        disk_put_part(part);
1145                        break;
1146                }
1147                disk_put_part(part);
1148        }
1149        class_dev_iter_exit(&iter);
1150        return devt;
1151}
1152EXPORT_SYMBOL(blk_lookup_devt);
1153
1154struct gendisk *alloc_disk(int minors)
1155{
1156        return alloc_disk_node(minors, -1);
1157}
1158EXPORT_SYMBOL(alloc_disk);
1159
1160struct gendisk *alloc_disk_node(int minors, int node_id)
1161{
1162        struct gendisk *disk;
1163
1164        disk = kmalloc_node(sizeof(struct gendisk),
1165                                GFP_KERNEL | __GFP_ZERO, node_id);
1166        if (disk) {
1167                if (!init_part_stats(&disk->part0)) {
1168                        kfree(disk);
1169                        return NULL;
1170                }
1171                disk->node_id = node_id;
1172                if (disk_expand_part_tbl(disk, 0)) {
1173                        free_part_stats(&disk->part0);
1174                        kfree(disk);
1175                        return NULL;
1176                }
1177                disk->part_tbl->part[0] = &disk->part0;
1178
1179                disk->minors = minors;
1180                rand_initialize_disk(disk);
1181                disk_to_dev(disk)->class = &block_class;
1182                disk_to_dev(disk)->type = &disk_type;
1183                device_initialize(disk_to_dev(disk));
1184                INIT_WORK(&disk->async_notify,
1185                        media_change_notify_thread);
1186        }
1187        return disk;
1188}
1189EXPORT_SYMBOL(alloc_disk_node);
1190
1191struct kobject *get_disk(struct gendisk *disk)
1192{
1193        struct module *owner;
1194        struct kobject *kobj;
1195
1196        if (!disk->fops)
1197                return NULL;
1198        owner = disk->fops->owner;
1199        if (owner && !try_module_get(owner))
1200                return NULL;
1201        kobj = kobject_get(&disk_to_dev(disk)->kobj);
1202        if (kobj == NULL) {
1203                module_put(owner);
1204                return NULL;
1205        }
1206        return kobj;
1207
1208}
1209
1210EXPORT_SYMBOL(get_disk);
1211
1212void put_disk(struct gendisk *disk)
1213{
1214        if (disk)
1215                kobject_put(&disk_to_dev(disk)->kobj);
1216}
1217
1218EXPORT_SYMBOL(put_disk);
1219
1220static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1221{
1222        char event[] = "DISK_RO=1";
1223        char *envp[] = { event, NULL };
1224
1225        if (!ro)
1226                event[8] = '0';
1227        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1228}
1229
1230void set_device_ro(struct block_device *bdev, int flag)
1231{
1232        bdev->bd_part->policy = flag;
1233}
1234
1235EXPORT_SYMBOL(set_device_ro);
1236
1237void set_disk_ro(struct gendisk *disk, int flag)
1238{
1239        struct disk_part_iter piter;
1240        struct hd_struct *part;
1241
1242        if (disk->part0.policy != flag) {
1243                set_disk_ro_uevent(disk, flag);
1244                disk->part0.policy = flag;
1245        }
1246
1247        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1248        while ((part = disk_part_iter_next(&piter)))
1249                part->policy = flag;
1250        disk_part_iter_exit(&piter);
1251}
1252
1253EXPORT_SYMBOL(set_disk_ro);
1254
1255int bdev_read_only(struct block_device *bdev)
1256{
1257        if (!bdev)
1258                return 0;
1259        return bdev->bd_part->policy;
1260}
1261
1262EXPORT_SYMBOL(bdev_read_only);
1263
1264int invalidate_partition(struct gendisk *disk, int partno)
1265{
1266        int res = 0;
1267        struct block_device *bdev = bdget_disk(disk, partno);
1268        if (bdev) {
1269                fsync_bdev(bdev);
1270                res = __invalidate_device(bdev);
1271                bdput(bdev);
1272        }
1273        return res;
1274}
1275
1276EXPORT_SYMBOL(invalidate_partition);
1277