linux/block/genhd.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 *  gendisk handling
   4 *
   5 * Portions Copyright (C) 2020 Christoph Hellwig
   6 */
   7
   8#include <linux/module.h>
   9#include <linux/ctype.h>
  10#include <linux/fs.h>
  11#include <linux/kdev_t.h>
  12#include <linux/kernel.h>
  13#include <linux/blkdev.h>
  14#include <linux/backing-dev.h>
  15#include <linux/init.h>
  16#include <linux/spinlock.h>
  17#include <linux/proc_fs.h>
  18#include <linux/seq_file.h>
  19#include <linux/slab.h>
  20#include <linux/kmod.h>
  21#include <linux/major.h>
  22#include <linux/mutex.h>
  23#include <linux/idr.h>
  24#include <linux/log2.h>
  25#include <linux/pm_runtime.h>
  26#include <linux/badblocks.h>
  27#include <linux/part_stat.h>
  28#include "blk-throttle.h"
  29
  30#include "blk.h"
  31#include "blk-mq-sched.h"
  32#include "blk-rq-qos.h"
  33#include "blk-cgroup.h"
  34
  35static struct kobject *block_depr;
  36
  37/*
  38 * Unique, monotonically increasing sequential number associated with block
  39 * devices instances (i.e. incremented each time a device is attached).
  40 * Associating uevents with block devices in userspace is difficult and racy:
  41 * the uevent netlink socket is lossy, and on slow and overloaded systems has
  42 * a very high latency.
  43 * Block devices do not have exclusive owners in userspace, any process can set
  44 * one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0
  45 * can be reused again and again).
  46 * A userspace process setting up a block device and watching for its events
  47 * cannot thus reliably tell whether an event relates to the device it just set
  48 * up or another earlier instance with the same name.
  49 * This sequential number allows userspace processes to solve this problem, and
  50 * uniquely associate an uevent to the lifetime to a device.
  51 */
  52static atomic64_t diskseq;
  53
  54/* for extended dynamic devt allocation, currently only one major is used */
  55#define NR_EXT_DEVT             (1 << MINORBITS)
  56static DEFINE_IDA(ext_devt_ida);
  57
  58void set_capacity(struct gendisk *disk, sector_t sectors)
  59{
  60        struct block_device *bdev = disk->part0;
  61
  62        spin_lock(&bdev->bd_size_lock);
  63        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
  64        bdev->bd_nr_sectors = sectors;
  65        spin_unlock(&bdev->bd_size_lock);
  66}
  67EXPORT_SYMBOL(set_capacity);
  68
  69/*
  70 * Set disk capacity and notify if the size is not currently zero and will not
  71 * be set to zero.  Returns true if a uevent was sent, otherwise false.
  72 */
  73bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
  74{
  75        sector_t capacity = get_capacity(disk);
  76        char *envp[] = { "RESIZE=1", NULL };
  77
  78        set_capacity(disk, size);
  79
  80        /*
  81         * Only print a message and send a uevent if the gendisk is user visible
  82         * and alive.  This avoids spamming the log and udev when setting the
  83         * initial capacity during probing.
  84         */
  85        if (size == capacity ||
  86            !disk_live(disk) ||
  87            (disk->flags & GENHD_FL_HIDDEN))
  88                return false;
  89
  90        pr_info("%s: detected capacity change from %lld to %lld\n",
  91                disk->disk_name, capacity, size);
  92
  93        /*
  94         * Historically we did not send a uevent for changes to/from an empty
  95         * device.
  96         */
  97        if (!capacity || !size)
  98                return false;
  99        kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp);
 100        return true;
 101}
 102EXPORT_SYMBOL_GPL(set_capacity_and_notify);
 103
 104/*
 105 * Format the device name of the indicated block device into the supplied buffer
 106 * and return a pointer to that same buffer for convenience.
 107 *
 108 * Note: do not use this in new code, use the %pg specifier to sprintf and
 109 * printk insted.
 110 */
 111const char *bdevname(struct block_device *bdev, char *buf)
 112{
 113        struct gendisk *hd = bdev->bd_disk;
 114        int partno = bdev->bd_partno;
 115
 116        if (!partno)
 117                snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
 118        else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
 119                snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
 120        else
 121                snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
 122
 123        return buf;
 124}
 125EXPORT_SYMBOL(bdevname);
 126
 127static void part_stat_read_all(struct block_device *part,
 128                struct disk_stats *stat)
 129{
 130        int cpu;
 131
 132        memset(stat, 0, sizeof(struct disk_stats));
 133        for_each_possible_cpu(cpu) {
 134                struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu);
 135                int group;
 136
 137                for (group = 0; group < NR_STAT_GROUPS; group++) {
 138                        stat->nsecs[group] += ptr->nsecs[group];
 139                        stat->sectors[group] += ptr->sectors[group];
 140                        stat->ios[group] += ptr->ios[group];
 141                        stat->merges[group] += ptr->merges[group];
 142                }
 143
 144                stat->io_ticks += ptr->io_ticks;
 145        }
 146}
 147
 148static unsigned int part_in_flight(struct block_device *part)
 149{
 150        unsigned int inflight = 0;
 151        int cpu;
 152
 153        for_each_possible_cpu(cpu) {
 154                inflight += part_stat_local_read_cpu(part, in_flight[0], cpu) +
 155                            part_stat_local_read_cpu(part, in_flight[1], cpu);
 156        }
 157        if ((int)inflight < 0)
 158                inflight = 0;
 159
 160        return inflight;
 161}
 162
 163static void part_in_flight_rw(struct block_device *part,
 164                unsigned int inflight[2])
 165{
 166        int cpu;
 167
 168        inflight[0] = 0;
 169        inflight[1] = 0;
 170        for_each_possible_cpu(cpu) {
 171                inflight[0] += part_stat_local_read_cpu(part, in_flight[0], cpu);
 172                inflight[1] += part_stat_local_read_cpu(part, in_flight[1], cpu);
 173        }
 174        if ((int)inflight[0] < 0)
 175                inflight[0] = 0;
 176        if ((int)inflight[1] < 0)
 177                inflight[1] = 0;
 178}
 179
 180/*
 181 * Can be deleted altogether. Later.
 182 *
 183 */
 184#define BLKDEV_MAJOR_HASH_SIZE 255
 185static struct blk_major_name {
 186        struct blk_major_name *next;
 187        int major;
 188        char name[16];
 189#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 190        void (*probe)(dev_t devt);
 191#endif
 192} *major_names[BLKDEV_MAJOR_HASH_SIZE];
 193static DEFINE_MUTEX(major_names_lock);
 194static DEFINE_SPINLOCK(major_names_spinlock);
 195
 196/* index in the above - for now: assume no multimajor ranges */
 197static inline int major_to_index(unsigned major)
 198{
 199        return major % BLKDEV_MAJOR_HASH_SIZE;
 200}
 201
 202#ifdef CONFIG_PROC_FS
 203void blkdev_show(struct seq_file *seqf, off_t offset)
 204{
 205        struct blk_major_name *dp;
 206
 207        spin_lock(&major_names_spinlock);
 208        for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
 209                if (dp->major == offset)
 210                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
 211        spin_unlock(&major_names_spinlock);
 212}
 213#endif /* CONFIG_PROC_FS */
 214
 215/**
 216 * __register_blkdev - register a new block device
 217 *
 218 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If
 219 *         @major = 0, try to allocate any unused major number.
 220 * @name: the name of the new block device as a zero terminated string
 221 * @probe: pre-devtmpfs / pre-udev callback used to create disks when their
 222 *         pre-created device node is accessed. When a probe call uses
 223 *         add_disk() and it fails the driver must cleanup resources. This
 224 *         interface may soon be removed.
 225 *
 226 * The @name must be unique within the system.
 227 *
 228 * The return value depends on the @major input parameter:
 229 *
 230 *  - if a major device number was requested in range [1..BLKDEV_MAJOR_MAX-1]
 231 *    then the function returns zero on success, or a negative error code
 232 *  - if any unused major number was requested with @major = 0 parameter
 233 *    then the return value is the allocated major number in range
 234 *    [1..BLKDEV_MAJOR_MAX-1] or a negative error code otherwise
 235 *
 236 * See Documentation/admin-guide/devices.txt for the list of allocated
 237 * major numbers.
 238 *
 239 * Use register_blkdev instead for any new code.
 240 */
 241int __register_blkdev(unsigned int major, const char *name,
 242                void (*probe)(dev_t devt))
 243{
 244        struct blk_major_name **n, *p;
 245        int index, ret = 0;
 246
 247        mutex_lock(&major_names_lock);
 248
 249        /* temporary */
 250        if (major == 0) {
 251                for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
 252                        if (major_names[index] == NULL)
 253                                break;
 254                }
 255
 256                if (index == 0) {
 257                        printk("%s: failed to get major for %s\n",
 258                               __func__, name);
 259                        ret = -EBUSY;
 260                        goto out;
 261                }
 262                major = index;
 263                ret = major;
 264        }
 265
 266        if (major >= BLKDEV_MAJOR_MAX) {
 267                pr_err("%s: major requested (%u) is greater than the maximum (%u) for %s\n",
 268                       __func__, major, BLKDEV_MAJOR_MAX-1, name);
 269
 270                ret = -EINVAL;
 271                goto out;
 272        }
 273
 274        p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
 275        if (p == NULL) {
 276                ret = -ENOMEM;
 277                goto out;
 278        }
 279
 280        p->major = major;
 281#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 282        p->probe = probe;
 283#endif
 284        strlcpy(p->name, name, sizeof(p->name));
 285        p->next = NULL;
 286        index = major_to_index(major);
 287
 288        spin_lock(&major_names_spinlock);
 289        for (n = &major_names[index]; *n; n = &(*n)->next) {
 290                if ((*n)->major == major)
 291                        break;
 292        }
 293        if (!*n)
 294                *n = p;
 295        else
 296                ret = -EBUSY;
 297        spin_unlock(&major_names_spinlock);
 298
 299        if (ret < 0) {
 300                printk("register_blkdev: cannot get major %u for %s\n",
 301                       major, name);
 302                kfree(p);
 303        }
 304out:
 305        mutex_unlock(&major_names_lock);
 306        return ret;
 307}
 308EXPORT_SYMBOL(__register_blkdev);
 309
 310void unregister_blkdev(unsigned int major, const char *name)
 311{
 312        struct blk_major_name **n;
 313        struct blk_major_name *p = NULL;
 314        int index = major_to_index(major);
 315
 316        mutex_lock(&major_names_lock);
 317        spin_lock(&major_names_spinlock);
 318        for (n = &major_names[index]; *n; n = &(*n)->next)
 319                if ((*n)->major == major)
 320                        break;
 321        if (!*n || strcmp((*n)->name, name)) {
 322                WARN_ON(1);
 323        } else {
 324                p = *n;
 325                *n = p->next;
 326        }
 327        spin_unlock(&major_names_spinlock);
 328        mutex_unlock(&major_names_lock);
 329        kfree(p);
 330}
 331
 332EXPORT_SYMBOL(unregister_blkdev);
 333
 334int blk_alloc_ext_minor(void)
 335{
 336        int idx;
 337
 338        idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT - 1, GFP_KERNEL);
 339        if (idx == -ENOSPC)
 340                return -EBUSY;
 341        return idx;
 342}
 343
 344void blk_free_ext_minor(unsigned int minor)
 345{
 346        ida_free(&ext_devt_ida, minor);
 347}
 348
 349static char *bdevt_str(dev_t devt, char *buf)
 350{
 351        if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
 352                char tbuf[BDEVT_SIZE];
 353                snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
 354                snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
 355        } else
 356                snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
 357
 358        return buf;
 359}
 360
 361void disk_uevent(struct gendisk *disk, enum kobject_action action)
 362{
 363        struct block_device *part;
 364        unsigned long idx;
 365
 366        rcu_read_lock();
 367        xa_for_each(&disk->part_tbl, idx, part) {
 368                if (bdev_is_partition(part) && !bdev_nr_sectors(part))
 369                        continue;
 370                if (!kobject_get_unless_zero(&part->bd_device.kobj))
 371                        continue;
 372
 373                rcu_read_unlock();
 374                kobject_uevent(bdev_kobj(part), action);
 375                put_device(&part->bd_device);
 376                rcu_read_lock();
 377        }
 378        rcu_read_unlock();
 379}
 380EXPORT_SYMBOL_GPL(disk_uevent);
 381
 382int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
 383{
 384        struct block_device *bdev;
 385
 386        if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
 387                return -EINVAL;
 388        if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
 389                return -EINVAL;
 390        if (disk->open_partitions)
 391                return -EBUSY;
 392
 393        set_bit(GD_NEED_PART_SCAN, &disk->state);
 394        bdev = blkdev_get_by_dev(disk_devt(disk), mode, NULL);
 395        if (IS_ERR(bdev))
 396                return PTR_ERR(bdev);
 397        blkdev_put(bdev, mode);
 398        return 0;
 399}
 400
 401/**
 402 * device_add_disk - add disk information to kernel list
 403 * @parent: parent device for the disk
 404 * @disk: per-device partitioning information
 405 * @groups: Additional per-device sysfs groups
 406 *
 407 * This function registers the partitioning information in @disk
 408 * with the kernel.
 409 */
 410int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
 411                                 const struct attribute_group **groups)
 412
 413{
 414        struct device *ddev = disk_to_dev(disk);
 415        int ret;
 416
 417        /* Only makes sense for bio-based to set ->poll_bio */
 418        if (queue_is_mq(disk->queue) && disk->fops->poll_bio)
 419                return -EINVAL;
 420
 421        /*
 422         * The disk queue should now be all set with enough information about
 423         * the device for the elevator code to pick an adequate default
 424         * elevator if one is needed, that is, for devices requesting queue
 425         * registration.
 426         */
 427        elevator_init_mq(disk->queue);
 428
 429        /*
 430         * If the driver provides an explicit major number it also must provide
 431         * the number of minors numbers supported, and those will be used to
 432         * setup the gendisk.
 433         * Otherwise just allocate the device numbers for both the whole device
 434         * and all partitions from the extended dev_t space.
 435         */
 436        if (disk->major) {
 437                if (WARN_ON(!disk->minors))
 438                        return -EINVAL;
 439
 440                if (disk->minors > DISK_MAX_PARTS) {
 441                        pr_err("block: can't allocate more than %d partitions\n",
 442                                DISK_MAX_PARTS);
 443                        disk->minors = DISK_MAX_PARTS;
 444                }
 445                if (disk->first_minor + disk->minors > MINORMASK + 1)
 446                        return -EINVAL;
 447        } else {
 448                if (WARN_ON(disk->minors))
 449                        return -EINVAL;
 450
 451                ret = blk_alloc_ext_minor();
 452                if (ret < 0)
 453                        return ret;
 454                disk->major = BLOCK_EXT_MAJOR;
 455                disk->first_minor = ret;
 456        }
 457
 458        /* delay uevents, until we scanned partition table */
 459        dev_set_uevent_suppress(ddev, 1);
 460
 461        ddev->parent = parent;
 462        ddev->groups = groups;
 463        dev_set_name(ddev, "%s", disk->disk_name);
 464        if (!(disk->flags & GENHD_FL_HIDDEN))
 465                ddev->devt = MKDEV(disk->major, disk->first_minor);
 466        ret = device_add(ddev);
 467        if (ret)
 468                goto out_free_ext_minor;
 469
 470        ret = disk_alloc_events(disk);
 471        if (ret)
 472                goto out_device_del;
 473
 474        if (!sysfs_deprecated) {
 475                ret = sysfs_create_link(block_depr, &ddev->kobj,
 476                                        kobject_name(&ddev->kobj));
 477                if (ret)
 478                        goto out_device_del;
 479        }
 480
 481        /*
 482         * avoid probable deadlock caused by allocating memory with
 483         * GFP_KERNEL in runtime_resume callback of its all ancestor
 484         * devices
 485         */
 486        pm_runtime_set_memalloc_noio(ddev, true);
 487
 488        ret = blk_integrity_add(disk);
 489        if (ret)
 490                goto out_del_block_link;
 491
 492        disk->part0->bd_holder_dir =
 493                kobject_create_and_add("holders", &ddev->kobj);
 494        if (!disk->part0->bd_holder_dir) {
 495                ret = -ENOMEM;
 496                goto out_del_integrity;
 497        }
 498        disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 499        if (!disk->slave_dir) {
 500                ret = -ENOMEM;
 501                goto out_put_holder_dir;
 502        }
 503
 504        ret = bd_register_pending_holders(disk);
 505        if (ret < 0)
 506                goto out_put_slave_dir;
 507
 508        ret = blk_register_queue(disk);
 509        if (ret)
 510                goto out_put_slave_dir;
 511
 512        if (!(disk->flags & GENHD_FL_HIDDEN)) {
 513                ret = bdi_register(disk->bdi, "%u:%u",
 514                                   disk->major, disk->first_minor);
 515                if (ret)
 516                        goto out_unregister_queue;
 517                bdi_set_owner(disk->bdi, ddev);
 518                ret = sysfs_create_link(&ddev->kobj,
 519                                        &disk->bdi->dev->kobj, "bdi");
 520                if (ret)
 521                        goto out_unregister_bdi;
 522
 523                bdev_add(disk->part0, ddev->devt);
 524                if (get_capacity(disk))
 525                        disk_scan_partitions(disk, FMODE_READ);
 526
 527                /*
 528                 * Announce the disk and partitions after all partitions are
 529                 * created. (for hidden disks uevents remain suppressed forever)
 530                 */
 531                dev_set_uevent_suppress(ddev, 0);
 532                disk_uevent(disk, KOBJ_ADD);
 533        }
 534
 535        disk_update_readahead(disk);
 536        disk_add_events(disk);
 537        set_bit(GD_ADDED, &disk->state);
 538        return 0;
 539
 540out_unregister_bdi:
 541        if (!(disk->flags & GENHD_FL_HIDDEN))
 542                bdi_unregister(disk->bdi);
 543out_unregister_queue:
 544        blk_unregister_queue(disk);
 545out_put_slave_dir:
 546        kobject_put(disk->slave_dir);
 547out_put_holder_dir:
 548        kobject_put(disk->part0->bd_holder_dir);
 549out_del_integrity:
 550        blk_integrity_del(disk);
 551out_del_block_link:
 552        if (!sysfs_deprecated)
 553                sysfs_remove_link(block_depr, dev_name(ddev));
 554out_device_del:
 555        device_del(ddev);
 556out_free_ext_minor:
 557        if (disk->major == BLOCK_EXT_MAJOR)
 558                blk_free_ext_minor(disk->first_minor);
 559        return ret;
 560}
 561EXPORT_SYMBOL(device_add_disk);
 562
 563/**
 564 * blk_mark_disk_dead - mark a disk as dead
 565 * @disk: disk to mark as dead
 566 *
 567 * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
 568 * to this disk.
 569 */
 570void blk_mark_disk_dead(struct gendisk *disk)
 571{
 572        set_bit(GD_DEAD, &disk->state);
 573        blk_queue_start_drain(disk->queue);
 574}
 575EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
 576
 577/**
 578 * del_gendisk - remove the gendisk
 579 * @disk: the struct gendisk to remove
 580 *
 581 * Removes the gendisk and all its associated resources. This deletes the
 582 * partitions associated with the gendisk, and unregisters the associated
 583 * request_queue.
 584 *
 585 * This is the counter to the respective __device_add_disk() call.
 586 *
 587 * The final removal of the struct gendisk happens when its refcount reaches 0
 588 * with put_disk(), which should be called after del_gendisk(), if
 589 * __device_add_disk() was used.
 590 *
 591 * Drivers exist which depend on the release of the gendisk to be synchronous,
 592 * it should not be deferred.
 593 *
 594 * Context: can sleep
 595 */
 596void del_gendisk(struct gendisk *disk)
 597{
 598        struct request_queue *q = disk->queue;
 599
 600        might_sleep();
 601
 602        if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
 603                return;
 604
 605        blk_integrity_del(disk);
 606        disk_del_events(disk);
 607
 608        mutex_lock(&disk->open_mutex);
 609        remove_inode_hash(disk->part0->bd_inode);
 610        blk_drop_partitions(disk);
 611        mutex_unlock(&disk->open_mutex);
 612
 613        fsync_bdev(disk->part0);
 614        __invalidate_device(disk->part0, true);
 615
 616        /*
 617         * Fail any new I/O.
 618         */
 619        set_bit(GD_DEAD, &disk->state);
 620        set_capacity(disk, 0);
 621
 622        /*
 623         * Prevent new I/O from crossing bio_queue_enter().
 624         */
 625        blk_queue_start_drain(q);
 626        blk_mq_freeze_queue_wait(q);
 627
 628        if (!(disk->flags & GENHD_FL_HIDDEN)) {
 629                sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 630
 631                /*
 632                 * Unregister bdi before releasing device numbers (as they can
 633                 * get reused and we'd get clashes in sysfs).
 634                 */
 635                bdi_unregister(disk->bdi);
 636        }
 637
 638        blk_unregister_queue(disk);
 639
 640        kobject_put(disk->part0->bd_holder_dir);
 641        kobject_put(disk->slave_dir);
 642
 643        part_stat_set_all(disk->part0, 0);
 644        disk->part0->bd_stamp = 0;
 645        if (!sysfs_deprecated)
 646                sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk)));
 647        pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
 648        device_del(disk_to_dev(disk));
 649
 650        blk_throtl_cancel_bios(disk->queue);
 651
 652        blk_sync_queue(q);
 653        blk_flush_integrity();
 654        blk_mq_cancel_work_sync(q);
 655
 656        blk_mq_quiesce_queue(q);
 657        if (q->elevator) {
 658                mutex_lock(&q->sysfs_lock);
 659                elevator_exit(q);
 660                mutex_unlock(&q->sysfs_lock);
 661        }
 662        rq_qos_exit(q);
 663        blk_mq_unquiesce_queue(q);
 664
 665        /*
 666         * Allow using passthrough request again after the queue is torn down.
 667         */
 668        blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q);
 669        __blk_mq_unfreeze_queue(q, true);
 670
 671}
 672EXPORT_SYMBOL(del_gendisk);
 673
 674/**
 675 * invalidate_disk - invalidate the disk
 676 * @disk: the struct gendisk to invalidate
 677 *
 678 * A helper to invalidates the disk. It will clean the disk's associated
 679 * buffer/page caches and reset its internal states so that the disk
 680 * can be reused by the drivers.
 681 *
 682 * Context: can sleep
 683 */
 684void invalidate_disk(struct gendisk *disk)
 685{
 686        struct block_device *bdev = disk->part0;
 687
 688        invalidate_bdev(bdev);
 689        bdev->bd_inode->i_mapping->wb_err = 0;
 690        set_capacity(disk, 0);
 691}
 692EXPORT_SYMBOL(invalidate_disk);
 693
 694/* sysfs access to bad-blocks list. */
 695static ssize_t disk_badblocks_show(struct device *dev,
 696                                        struct device_attribute *attr,
 697                                        char *page)
 698{
 699        struct gendisk *disk = dev_to_disk(dev);
 700
 701        if (!disk->bb)
 702                return sprintf(page, "\n");
 703
 704        return badblocks_show(disk->bb, page, 0);
 705}
 706
 707static ssize_t disk_badblocks_store(struct device *dev,
 708                                        struct device_attribute *attr,
 709                                        const char *page, size_t len)
 710{
 711        struct gendisk *disk = dev_to_disk(dev);
 712
 713        if (!disk->bb)
 714                return -ENXIO;
 715
 716        return badblocks_store(disk->bb, page, len, 0);
 717}
 718
 719#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 720void blk_request_module(dev_t devt)
 721{
 722        unsigned int major = MAJOR(devt);
 723        struct blk_major_name **n;
 724
 725        mutex_lock(&major_names_lock);
 726        for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) {
 727                if ((*n)->major == major && (*n)->probe) {
 728                        (*n)->probe(devt);
 729                        mutex_unlock(&major_names_lock);
 730                        return;
 731                }
 732        }
 733        mutex_unlock(&major_names_lock);
 734
 735        if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
 736                /* Make old-style 2.4 aliases work */
 737                request_module("block-major-%d", MAJOR(devt));
 738}
 739#endif /* CONFIG_BLOCK_LEGACY_AUTOLOAD */
 740
 741/*
 742 * print a full list of all partitions - intended for places where the root
 743 * filesystem can't be mounted and thus to give the victim some idea of what
 744 * went wrong
 745 */
 746void __init printk_all_partitions(void)
 747{
 748        struct class_dev_iter iter;
 749        struct device *dev;
 750
 751        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 752        while ((dev = class_dev_iter_next(&iter))) {
 753                struct gendisk *disk = dev_to_disk(dev);
 754                struct block_device *part;
 755                char devt_buf[BDEVT_SIZE];
 756                unsigned long idx;
 757
 758                /*
 759                 * Don't show empty devices or things that have been
 760                 * suppressed
 761                 */
 762                if (get_capacity(disk) == 0 || (disk->flags & GENHD_FL_HIDDEN))
 763                        continue;
 764
 765                /*
 766                 * Note, unlike /proc/partitions, I am showing the numbers in
 767                 * hex - the same format as the root= option takes.
 768                 */
 769                rcu_read_lock();
 770                xa_for_each(&disk->part_tbl, idx, part) {
 771                        if (!bdev_nr_sectors(part))
 772                                continue;
 773                        printk("%s%s %10llu %pg %s",
 774                               bdev_is_partition(part) ? "  " : "",
 775                               bdevt_str(part->bd_dev, devt_buf),
 776                               bdev_nr_sectors(part) >> 1, part,
 777                               part->bd_meta_info ?
 778                                        part->bd_meta_info->uuid : "");
 779                        if (bdev_is_partition(part))
 780                                printk("\n");
 781                        else if (dev->parent && dev->parent->driver)
 782                                printk(" driver: %s\n",
 783                                        dev->parent->driver->name);
 784                        else
 785                                printk(" (driver?)\n");
 786                }
 787                rcu_read_unlock();
 788        }
 789        class_dev_iter_exit(&iter);
 790}
 791
 792#ifdef CONFIG_PROC_FS
 793/* iterator */
 794static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
 795{
 796        loff_t skip = *pos;
 797        struct class_dev_iter *iter;
 798        struct device *dev;
 799
 800        iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 801        if (!iter)
 802                return ERR_PTR(-ENOMEM);
 803
 804        seqf->private = iter;
 805        class_dev_iter_init(iter, &block_class, NULL, &disk_type);
 806        do {
 807                dev = class_dev_iter_next(iter);
 808                if (!dev)
 809                        return NULL;
 810        } while (skip--);
 811
 812        return dev_to_disk(dev);
 813}
 814
 815static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
 816{
 817        struct device *dev;
 818
 819        (*pos)++;
 820        dev = class_dev_iter_next(seqf->private);
 821        if (dev)
 822                return dev_to_disk(dev);
 823
 824        return NULL;
 825}
 826
 827static void disk_seqf_stop(struct seq_file *seqf, void *v)
 828{
 829        struct class_dev_iter *iter = seqf->private;
 830
 831        /* stop is called even after start failed :-( */
 832        if (iter) {
 833                class_dev_iter_exit(iter);
 834                kfree(iter);
 835                seqf->private = NULL;
 836        }
 837}
 838
 839static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
 840{
 841        void *p;
 842
 843        p = disk_seqf_start(seqf, pos);
 844        if (!IS_ERR_OR_NULL(p) && !*pos)
 845                seq_puts(seqf, "major minor  #blocks  name\n\n");
 846        return p;
 847}
 848
 849static int show_partition(struct seq_file *seqf, void *v)
 850{
 851        struct gendisk *sgp = v;
 852        struct block_device *part;
 853        unsigned long idx;
 854
 855        if (!get_capacity(sgp) || (sgp->flags & GENHD_FL_HIDDEN))
 856                return 0;
 857
 858        rcu_read_lock();
 859        xa_for_each(&sgp->part_tbl, idx, part) {
 860                if (!bdev_nr_sectors(part))
 861                        continue;
 862                seq_printf(seqf, "%4d  %7d %10llu %pg\n",
 863                           MAJOR(part->bd_dev), MINOR(part->bd_dev),
 864                           bdev_nr_sectors(part) >> 1, part);
 865        }
 866        rcu_read_unlock();
 867        return 0;
 868}
 869
 870static const struct seq_operations partitions_op = {
 871        .start  = show_partition_start,
 872        .next   = disk_seqf_next,
 873        .stop   = disk_seqf_stop,
 874        .show   = show_partition
 875};
 876#endif
 877
 878static int __init genhd_device_init(void)
 879{
 880        int error;
 881
 882        block_class.dev_kobj = sysfs_dev_block_kobj;
 883        error = class_register(&block_class);
 884        if (unlikely(error))
 885                return error;
 886        blk_dev_init();
 887
 888        register_blkdev(BLOCK_EXT_MAJOR, "blkext");
 889
 890        /* create top-level block dir */
 891        if (!sysfs_deprecated)
 892                block_depr = kobject_create_and_add("block", NULL);
 893        return 0;
 894}
 895
 896subsys_initcall(genhd_device_init);
 897
 898static ssize_t disk_range_show(struct device *dev,
 899                               struct device_attribute *attr, char *buf)
 900{
 901        struct gendisk *disk = dev_to_disk(dev);
 902
 903        return sprintf(buf, "%d\n", disk->minors);
 904}
 905
 906static ssize_t disk_ext_range_show(struct device *dev,
 907                                   struct device_attribute *attr, char *buf)
 908{
 909        struct gendisk *disk = dev_to_disk(dev);
 910
 911        return sprintf(buf, "%d\n",
 912                (disk->flags & GENHD_FL_NO_PART) ? 1 : DISK_MAX_PARTS);
 913}
 914
 915static ssize_t disk_removable_show(struct device *dev,
 916                                   struct device_attribute *attr, char *buf)
 917{
 918        struct gendisk *disk = dev_to_disk(dev);
 919
 920        return sprintf(buf, "%d\n",
 921                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 922}
 923
 924static ssize_t disk_hidden_show(struct device *dev,
 925                                   struct device_attribute *attr, char *buf)
 926{
 927        struct gendisk *disk = dev_to_disk(dev);
 928
 929        return sprintf(buf, "%d\n",
 930                       (disk->flags & GENHD_FL_HIDDEN ? 1 : 0));
 931}
 932
 933static ssize_t disk_ro_show(struct device *dev,
 934                                   struct device_attribute *attr, char *buf)
 935{
 936        struct gendisk *disk = dev_to_disk(dev);
 937
 938        return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0);
 939}
 940
 941ssize_t part_size_show(struct device *dev,
 942                       struct device_attribute *attr, char *buf)
 943{
 944        return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev)));
 945}
 946
 947ssize_t part_stat_show(struct device *dev,
 948                       struct device_attribute *attr, char *buf)
 949{
 950        struct block_device *bdev = dev_to_bdev(dev);
 951        struct request_queue *q = bdev_get_queue(bdev);
 952        struct disk_stats stat;
 953        unsigned int inflight;
 954
 955        if (queue_is_mq(q))
 956                inflight = blk_mq_in_flight(q, bdev);
 957        else
 958                inflight = part_in_flight(bdev);
 959
 960        if (inflight) {
 961                part_stat_lock();
 962                update_io_ticks(bdev, jiffies, true);
 963                part_stat_unlock();
 964        }
 965        part_stat_read_all(bdev, &stat);
 966        return sprintf(buf,
 967                "%8lu %8lu %8llu %8u "
 968                "%8lu %8lu %8llu %8u "
 969                "%8u %8u %8u "
 970                "%8lu %8lu %8llu %8u "
 971                "%8lu %8u"
 972                "\n",
 973                stat.ios[STAT_READ],
 974                stat.merges[STAT_READ],
 975                (unsigned long long)stat.sectors[STAT_READ],
 976                (unsigned int)div_u64(stat.nsecs[STAT_READ], NSEC_PER_MSEC),
 977                stat.ios[STAT_WRITE],
 978                stat.merges[STAT_WRITE],
 979                (unsigned long long)stat.sectors[STAT_WRITE],
 980                (unsigned int)div_u64(stat.nsecs[STAT_WRITE], NSEC_PER_MSEC),
 981                inflight,
 982                jiffies_to_msecs(stat.io_ticks),
 983                (unsigned int)div_u64(stat.nsecs[STAT_READ] +
 984                                      stat.nsecs[STAT_WRITE] +
 985                                      stat.nsecs[STAT_DISCARD] +
 986                                      stat.nsecs[STAT_FLUSH],
 987                                                NSEC_PER_MSEC),
 988                stat.ios[STAT_DISCARD],
 989                stat.merges[STAT_DISCARD],
 990                (unsigned long long)stat.sectors[STAT_DISCARD],
 991                (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], NSEC_PER_MSEC),
 992                stat.ios[STAT_FLUSH],
 993                (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], NSEC_PER_MSEC));
 994}
 995
 996ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
 997                           char *buf)
 998{
 999        struct block_device *bdev = dev_to_bdev(dev);
1000        struct request_queue *q = bdev_get_queue(bdev);
1001        unsigned int inflight[2];
1002
1003        if (queue_is_mq(q))
1004                blk_mq_in_flight_rw(q, bdev, inflight);
1005        else
1006                part_in_flight_rw(bdev, inflight);
1007
1008        return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
1009}
1010
1011static ssize_t disk_capability_show(struct device *dev,
1012                                    struct device_attribute *attr, char *buf)
1013{
1014        struct gendisk *disk = dev_to_disk(dev);
1015
1016        return sprintf(buf, "%x\n", disk->flags);
1017}
1018
1019static ssize_t disk_alignment_offset_show(struct device *dev,
1020                                          struct device_attribute *attr,
1021                                          char *buf)
1022{
1023        struct gendisk *disk = dev_to_disk(dev);
1024
1025        return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
1026}
1027
1028static ssize_t disk_discard_alignment_show(struct device *dev,
1029                                           struct device_attribute *attr,
1030                                           char *buf)
1031{
1032        struct gendisk *disk = dev_to_disk(dev);
1033
1034        return sprintf(buf, "%d\n", bdev_alignment_offset(disk->part0));
1035}
1036
1037static ssize_t diskseq_show(struct device *dev,
1038                            struct device_attribute *attr, char *buf)
1039{
1040        struct gendisk *disk = dev_to_disk(dev);
1041
1042        return sprintf(buf, "%llu\n", disk->diskseq);
1043}
1044
1045static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
1046static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
1047static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
1048static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
1049static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
1050static DEVICE_ATTR(size, 0444, part_size_show, NULL);
1051static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
1052static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
1053static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
1054static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
1055static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
1056static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
1057static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
1058
1059#ifdef CONFIG_FAIL_MAKE_REQUEST
1060ssize_t part_fail_show(struct device *dev,
1061                       struct device_attribute *attr, char *buf)
1062{
1063        return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail);
1064}
1065
1066ssize_t part_fail_store(struct device *dev,
1067                        struct device_attribute *attr,
1068                        const char *buf, size_t count)
1069{
1070        int i;
1071
1072        if (count > 0 && sscanf(buf, "%d", &i) > 0)
1073                dev_to_bdev(dev)->bd_make_it_fail = i;
1074
1075        return count;
1076}
1077
1078static struct device_attribute dev_attr_fail =
1079        __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
1080#endif /* CONFIG_FAIL_MAKE_REQUEST */
1081
1082#ifdef CONFIG_FAIL_IO_TIMEOUT
1083static struct device_attribute dev_attr_fail_timeout =
1084        __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
1085#endif
1086
1087static struct attribute *disk_attrs[] = {
1088        &dev_attr_range.attr,
1089        &dev_attr_ext_range.attr,
1090        &dev_attr_removable.attr,
1091        &dev_attr_hidden.attr,
1092        &dev_attr_ro.attr,
1093        &dev_attr_size.attr,
1094        &dev_attr_alignment_offset.attr,
1095        &dev_attr_discard_alignment.attr,
1096        &dev_attr_capability.attr,
1097        &dev_attr_stat.attr,
1098        &dev_attr_inflight.attr,
1099        &dev_attr_badblocks.attr,
1100        &dev_attr_events.attr,
1101        &dev_attr_events_async.attr,
1102        &dev_attr_events_poll_msecs.attr,
1103        &dev_attr_diskseq.attr,
1104#ifdef CONFIG_FAIL_MAKE_REQUEST
1105        &dev_attr_fail.attr,
1106#endif
1107#ifdef CONFIG_FAIL_IO_TIMEOUT
1108        &dev_attr_fail_timeout.attr,
1109#endif
1110        NULL
1111};
1112
1113static umode_t disk_visible(struct kobject *kobj, struct attribute *a, int n)
1114{
1115        struct device *dev = container_of(kobj, typeof(*dev), kobj);
1116        struct gendisk *disk = dev_to_disk(dev);
1117
1118        if (a == &dev_attr_badblocks.attr && !disk->bb)
1119                return 0;
1120        return a->mode;
1121}
1122
1123static struct attribute_group disk_attr_group = {
1124        .attrs = disk_attrs,
1125        .is_visible = disk_visible,
1126};
1127
1128static const struct attribute_group *disk_attr_groups[] = {
1129        &disk_attr_group,
1130        NULL
1131};
1132
1133/**
1134 * disk_release - releases all allocated resources of the gendisk
1135 * @dev: the device representing this disk
1136 *
1137 * This function releases all allocated resources of the gendisk.
1138 *
1139 * Drivers which used __device_add_disk() have a gendisk with a request_queue
1140 * assigned. Since the request_queue sits on top of the gendisk for these
1141 * drivers we also call blk_put_queue() for them, and we expect the
1142 * request_queue refcount to reach 0 at this point, and so the request_queue
1143 * will also be freed prior to the disk.
1144 *
1145 * Context: can sleep
1146 */
1147static void disk_release(struct device *dev)
1148{
1149        struct gendisk *disk = dev_to_disk(dev);
1150
1151        might_sleep();
1152        WARN_ON_ONCE(disk_live(disk));
1153
1154        blkcg_exit_queue(disk->queue);
1155
1156        disk_release_events(disk);
1157        kfree(disk->random);
1158        xa_destroy(&disk->part_tbl);
1159
1160        disk->queue->disk = NULL;
1161        blk_put_queue(disk->queue);
1162
1163        if (test_bit(GD_ADDED, &disk->state) && disk->fops->free_disk)
1164                disk->fops->free_disk(disk);
1165
1166        iput(disk->part0->bd_inode);    /* frees the disk */
1167}
1168
1169static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
1170{
1171        struct gendisk *disk = dev_to_disk(dev);
1172
1173        return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
1174}
1175
1176struct class block_class = {
1177        .name           = "block",
1178        .dev_uevent     = block_uevent,
1179};
1180
1181static char *block_devnode(struct device *dev, umode_t *mode,
1182                           kuid_t *uid, kgid_t *gid)
1183{
1184        struct gendisk *disk = dev_to_disk(dev);
1185
1186        if (disk->fops->devnode)
1187                return disk->fops->devnode(disk, mode);
1188        return NULL;
1189}
1190
1191const struct device_type disk_type = {
1192        .name           = "disk",
1193        .groups         = disk_attr_groups,
1194        .release        = disk_release,
1195        .devnode        = block_devnode,
1196};
1197
1198#ifdef CONFIG_PROC_FS
1199/*
1200 * aggregate disk stat collector.  Uses the same stats that the sysfs
1201 * entries do, above, but makes them available through one seq_file.
1202 *
1203 * The output looks suspiciously like /proc/partitions with a bunch of
1204 * extra fields.
1205 */
1206static int diskstats_show(struct seq_file *seqf, void *v)
1207{
1208        struct gendisk *gp = v;
1209        struct block_device *hd;
1210        unsigned int inflight;
1211        struct disk_stats stat;
1212        unsigned long idx;
1213
1214        /*
1215        if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
1216                seq_puts(seqf,  "major minor name"
1217                                "     rio rmerge rsect ruse wio wmerge "
1218                                "wsect wuse running use aveq"
1219                                "\n\n");
1220        */
1221
1222        rcu_read_lock();
1223        xa_for_each(&gp->part_tbl, idx, hd) {
1224                if (bdev_is_partition(hd) && !bdev_nr_sectors(hd))
1225                        continue;
1226                if (queue_is_mq(gp->queue))
1227                        inflight = blk_mq_in_flight(gp->queue, hd);
1228                else
1229                        inflight = part_in_flight(hd);
1230
1231                if (inflight) {
1232                        part_stat_lock();
1233                        update_io_ticks(hd, jiffies, true);
1234                        part_stat_unlock();
1235                }
1236                part_stat_read_all(hd, &stat);
1237                seq_printf(seqf, "%4d %7d %pg "
1238                           "%lu %lu %lu %u "
1239                           "%lu %lu %lu %u "
1240                           "%u %u %u "
1241                           "%lu %lu %lu %u "
1242                           "%lu %u"
1243                           "\n",
1244                           MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd,
1245                           stat.ios[STAT_READ],
1246                           stat.merges[STAT_READ],
1247                           stat.sectors[STAT_READ],
1248                           (unsigned int)div_u64(stat.nsecs[STAT_READ],
1249                                                        NSEC_PER_MSEC),
1250                           stat.ios[STAT_WRITE],
1251                           stat.merges[STAT_WRITE],
1252                           stat.sectors[STAT_WRITE],
1253                           (unsigned int)div_u64(stat.nsecs[STAT_WRITE],
1254                                                        NSEC_PER_MSEC),
1255                           inflight,
1256                           jiffies_to_msecs(stat.io_ticks),
1257                           (unsigned int)div_u64(stat.nsecs[STAT_READ] +
1258                                                 stat.nsecs[STAT_WRITE] +
1259                                                 stat.nsecs[STAT_DISCARD] +
1260                                                 stat.nsecs[STAT_FLUSH],
1261                                                        NSEC_PER_MSEC),
1262                           stat.ios[STAT_DISCARD],
1263                           stat.merges[STAT_DISCARD],
1264                           stat.sectors[STAT_DISCARD],
1265                           (unsigned int)div_u64(stat.nsecs[STAT_DISCARD],
1266                                                 NSEC_PER_MSEC),
1267                           stat.ios[STAT_FLUSH],
1268                           (unsigned int)div_u64(stat.nsecs[STAT_FLUSH],
1269                                                 NSEC_PER_MSEC)
1270                        );
1271        }
1272        rcu_read_unlock();
1273
1274        return 0;
1275}
1276
1277static const struct seq_operations diskstats_op = {
1278        .start  = disk_seqf_start,
1279        .next   = disk_seqf_next,
1280        .stop   = disk_seqf_stop,
1281        .show   = diskstats_show
1282};
1283
1284static int __init proc_genhd_init(void)
1285{
1286        proc_create_seq("diskstats", 0, NULL, &diskstats_op);
1287        proc_create_seq("partitions", 0, NULL, &partitions_op);
1288        return 0;
1289}
1290module_init(proc_genhd_init);
1291#endif /* CONFIG_PROC_FS */
1292
1293dev_t part_devt(struct gendisk *disk, u8 partno)
1294{
1295        struct block_device *part;
1296        dev_t devt = 0;
1297
1298        rcu_read_lock();
1299        part = xa_load(&disk->part_tbl, partno);
1300        if (part)
1301                devt = part->bd_dev;
1302        rcu_read_unlock();
1303
1304        return devt;
1305}
1306
1307dev_t blk_lookup_devt(const char *name, int partno)
1308{
1309        dev_t devt = MKDEV(0, 0);
1310        struct class_dev_iter iter;
1311        struct device *dev;
1312
1313        class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
1314        while ((dev = class_dev_iter_next(&iter))) {
1315                struct gendisk *disk = dev_to_disk(dev);
1316
1317                if (strcmp(dev_name(dev), name))
1318                        continue;
1319
1320                if (partno < disk->minors) {
1321                        /* We need to return the right devno, even
1322                         * if the partition doesn't exist yet.
1323                         */
1324                        devt = MKDEV(MAJOR(dev->devt),
1325                                     MINOR(dev->devt) + partno);
1326                } else {
1327                        devt = part_devt(disk, partno);
1328                        if (devt)
1329                                break;
1330                }
1331        }
1332        class_dev_iter_exit(&iter);
1333        return devt;
1334}
1335
1336struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
1337                struct lock_class_key *lkclass)
1338{
1339        struct gendisk *disk;
1340
1341        if (!blk_get_queue(q))
1342                return NULL;
1343
1344        disk = kzalloc_node(sizeof(struct gendisk), GFP_KERNEL, node_id);
1345        if (!disk)
1346                goto out_put_queue;
1347
1348        disk->bdi = bdi_alloc(node_id);
1349        if (!disk->bdi)
1350                goto out_free_disk;
1351
1352        /* bdev_alloc() might need the queue, set before the first call */
1353        disk->queue = q;
1354
1355        disk->part0 = bdev_alloc(disk, 0);
1356        if (!disk->part0)
1357                goto out_free_bdi;
1358
1359        disk->node_id = node_id;
1360        mutex_init(&disk->open_mutex);
1361        xa_init(&disk->part_tbl);
1362        if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
1363                goto out_destroy_part_tbl;
1364
1365        if (blkcg_init_queue(q))
1366                goto out_erase_part0;
1367
1368        rand_initialize_disk(disk);
1369        disk_to_dev(disk)->class = &block_class;
1370        disk_to_dev(disk)->type = &disk_type;
1371        device_initialize(disk_to_dev(disk));
1372        inc_diskseq(disk);
1373        q->disk = disk;
1374        lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
1375#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
1376        INIT_LIST_HEAD(&disk->slave_bdevs);
1377#endif
1378        return disk;
1379
1380out_erase_part0:
1381        xa_erase(&disk->part_tbl, 0);
1382out_destroy_part_tbl:
1383        xa_destroy(&disk->part_tbl);
1384        disk->part0->bd_disk = NULL;
1385        iput(disk->part0->bd_inode);
1386out_free_bdi:
1387        bdi_put(disk->bdi);
1388out_free_disk:
1389        kfree(disk);
1390out_put_queue:
1391        blk_put_queue(q);
1392        return NULL;
1393}
1394EXPORT_SYMBOL(__alloc_disk_node);
1395
1396struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
1397{
1398        struct request_queue *q;
1399        struct gendisk *disk;
1400
1401        q = blk_alloc_queue(node, false);
1402        if (!q)
1403                return NULL;
1404
1405        disk = __alloc_disk_node(q, node, lkclass);
1406        if (!disk) {
1407                blk_cleanup_queue(q);
1408                return NULL;
1409        }
1410        return disk;
1411}
1412EXPORT_SYMBOL(__blk_alloc_disk);
1413
1414/**
1415 * put_disk - decrements the gendisk refcount
1416 * @disk: the struct gendisk to decrement the refcount for
1417 *
1418 * This decrements the refcount for the struct gendisk. When this reaches 0
1419 * we'll have disk_release() called.
1420 *
1421 * Context: Any context, but the last reference must not be dropped from
1422 *          atomic context.
1423 */
1424void put_disk(struct gendisk *disk)
1425{
1426        if (disk)
1427                put_device(disk_to_dev(disk));
1428}
1429EXPORT_SYMBOL(put_disk);
1430
1431/**
1432 * blk_cleanup_disk - shutdown a gendisk allocated by blk_alloc_disk
1433 * @disk: gendisk to shutdown
1434 *
1435 * Mark the queue hanging off @disk DYING, drain all pending requests, then mark
1436 * the queue DEAD, destroy and put it and the gendisk structure.
1437 *
1438 * Context: can sleep
1439 */
1440void blk_cleanup_disk(struct gendisk *disk)
1441{
1442        blk_cleanup_queue(disk->queue);
1443        put_disk(disk);
1444}
1445EXPORT_SYMBOL(blk_cleanup_disk);
1446
1447static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1448{
1449        char event[] = "DISK_RO=1";
1450        char *envp[] = { event, NULL };
1451
1452        if (!ro)
1453                event[8] = '0';
1454        kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1455}
1456
1457/**
1458 * set_disk_ro - set a gendisk read-only
1459 * @disk:       gendisk to operate on
1460 * @read_only:  %true to set the disk read-only, %false set the disk read/write
1461 *
1462 * This function is used to indicate whether a given disk device should have its
1463 * read-only flag set. set_disk_ro() is typically used by device drivers to
1464 * indicate whether the underlying physical device is write-protected.
1465 */
1466void set_disk_ro(struct gendisk *disk, bool read_only)
1467{
1468        if (read_only) {
1469                if (test_and_set_bit(GD_READ_ONLY, &disk->state))
1470                        return;
1471        } else {
1472                if (!test_and_clear_bit(GD_READ_ONLY, &disk->state))
1473                        return;
1474        }
1475        set_disk_ro_uevent(disk, read_only);
1476}
1477EXPORT_SYMBOL(set_disk_ro);
1478
1479void inc_diskseq(struct gendisk *disk)
1480{
1481        disk->diskseq = atomic64_inc_return(&diskseq);
1482}
1483