linux/block/ioctl.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include <linux/capability.h>
   3#include <linux/blkdev.h>
   4#include <linux/export.h>
   5#include <linux/gfp.h>
   6#include <linux/blkpg.h>
   7#include <linux/hdreg.h>
   8#include <linux/backing-dev.h>
   9#include <linux/fs.h>
  10#include <linux/blktrace_api.h>
  11#include <linux/pr.h>
  12#include <linux/uaccess.h>
  13
  14static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
  15{
  16        struct block_device *bdevp;
  17        struct gendisk *disk;
  18        struct hd_struct *part, *lpart;
  19        struct blkpg_ioctl_arg a;
  20        struct blkpg_partition p;
  21        struct disk_part_iter piter;
  22        long long start, length;
  23        int partno;
  24
  25        if (!capable(CAP_SYS_ADMIN))
  26                return -EACCES;
  27        if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
  28                return -EFAULT;
  29        if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
  30                return -EFAULT;
  31        disk = bdev->bd_disk;
  32        if (bdev != bdev->bd_contains)
  33                return -EINVAL;
  34        partno = p.pno;
  35        if (partno <= 0)
  36                return -EINVAL;
  37        switch (a.op) {
  38                case BLKPG_ADD_PARTITION:
  39                        start = p.start >> 9;
  40                        length = p.length >> 9;
  41                        /* check for fit in a hd_struct */
  42                        if (sizeof(sector_t) == sizeof(long) &&
  43                            sizeof(long long) > sizeof(long)) {
  44                                long pstart = start, plength = length;
  45                                if (pstart != start || plength != length
  46                                    || pstart < 0 || plength < 0 || partno > 65535)
  47                                        return -EINVAL;
  48                        }
  49                        /* check if partition is aligned to blocksize */
  50                        if (p.start & (bdev_logical_block_size(bdev) - 1))
  51                                return -EINVAL;
  52
  53                        mutex_lock(&bdev->bd_mutex);
  54
  55                        /* overlap? */
  56                        disk_part_iter_init(&piter, disk,
  57                                            DISK_PITER_INCL_EMPTY);
  58                        while ((part = disk_part_iter_next(&piter))) {
  59                                if (!(start + length <= part->start_sect ||
  60                                      start >= part->start_sect + part->nr_sects)) {
  61                                        disk_part_iter_exit(&piter);
  62                                        mutex_unlock(&bdev->bd_mutex);
  63                                        return -EBUSY;
  64                                }
  65                        }
  66                        disk_part_iter_exit(&piter);
  67
  68                        /* all seems OK */
  69                        part = add_partition(disk, partno, start, length,
  70                                             ADDPART_FLAG_NONE, NULL);
  71                        mutex_unlock(&bdev->bd_mutex);
  72                        return PTR_ERR_OR_ZERO(part);
  73                case BLKPG_DEL_PARTITION:
  74                        part = disk_get_part(disk, partno);
  75                        if (!part)
  76                                return -ENXIO;
  77
  78                        bdevp = bdget(part_devt(part));
  79                        disk_put_part(part);
  80                        if (!bdevp)
  81                                return -ENOMEM;
  82
  83                        mutex_lock(&bdevp->bd_mutex);
  84                        if (bdevp->bd_openers) {
  85                                mutex_unlock(&bdevp->bd_mutex);
  86                                bdput(bdevp);
  87                                return -EBUSY;
  88                        }
  89                        /* all seems OK */
  90                        fsync_bdev(bdevp);
  91                        invalidate_bdev(bdevp);
  92
  93                        mutex_lock_nested(&bdev->bd_mutex, 1);
  94                        delete_partition(disk, partno);
  95                        mutex_unlock(&bdev->bd_mutex);
  96                        mutex_unlock(&bdevp->bd_mutex);
  97                        bdput(bdevp);
  98
  99                        return 0;
 100                case BLKPG_RESIZE_PARTITION:
 101                        start = p.start >> 9;
 102                        /* new length of partition in bytes */
 103                        length = p.length >> 9;
 104                        /* check for fit in a hd_struct */
 105                        if (sizeof(sector_t) == sizeof(long) &&
 106                            sizeof(long long) > sizeof(long)) {
 107                                long pstart = start, plength = length;
 108                                if (pstart != start || plength != length
 109                                    || pstart < 0 || plength < 0)
 110                                        return -EINVAL;
 111                        }
 112                        part = disk_get_part(disk, partno);
 113                        if (!part)
 114                                return -ENXIO;
 115                        bdevp = bdget(part_devt(part));
 116                        if (!bdevp) {
 117                                disk_put_part(part);
 118                                return -ENOMEM;
 119                        }
 120                        mutex_lock(&bdevp->bd_mutex);
 121                        mutex_lock_nested(&bdev->bd_mutex, 1);
 122                        if (start != part->start_sect) {
 123                                mutex_unlock(&bdevp->bd_mutex);
 124                                mutex_unlock(&bdev->bd_mutex);
 125                                bdput(bdevp);
 126                                disk_put_part(part);
 127                                return -EINVAL;
 128                        }
 129                        /* overlap? */
 130                        disk_part_iter_init(&piter, disk,
 131                                            DISK_PITER_INCL_EMPTY);
 132                        while ((lpart = disk_part_iter_next(&piter))) {
 133                                if (lpart->partno != partno &&
 134                                   !(start + length <= lpart->start_sect ||
 135                                   start >= lpart->start_sect + lpart->nr_sects)
 136                                   ) {
 137                                        disk_part_iter_exit(&piter);
 138                                        mutex_unlock(&bdevp->bd_mutex);
 139                                        mutex_unlock(&bdev->bd_mutex);
 140                                        bdput(bdevp);
 141                                        disk_put_part(part);
 142                                        return -EBUSY;
 143                                }
 144                        }
 145                        disk_part_iter_exit(&piter);
 146                        part_nr_sects_write(part, (sector_t)length);
 147                        i_size_write(bdevp->bd_inode, p.length);
 148                        mutex_unlock(&bdevp->bd_mutex);
 149                        mutex_unlock(&bdev->bd_mutex);
 150                        bdput(bdevp);
 151                        disk_put_part(part);
 152                        return 0;
 153                default:
 154                        return -EINVAL;
 155        }
 156}
 157
 158/*
 159 * This is an exported API for the block driver, and will not
 160 * acquire bd_mutex. This API should be used in case that
 161 * caller has held bd_mutex already.
 162 */
 163int __blkdev_reread_part(struct block_device *bdev)
 164{
 165        struct gendisk *disk = bdev->bd_disk;
 166
 167        if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
 168                return -EINVAL;
 169        if (!capable(CAP_SYS_ADMIN))
 170                return -EACCES;
 171
 172        lockdep_assert_held(&bdev->bd_mutex);
 173
 174        return rescan_partitions(disk, bdev);
 175}
 176EXPORT_SYMBOL(__blkdev_reread_part);
 177
 178/*
 179 * This is an exported API for the block driver, and will
 180 * try to acquire bd_mutex. If bd_mutex has been held already
 181 * in current context, please call __blkdev_reread_part().
 182 *
 183 * Make sure the held locks in current context aren't required
 184 * in open()/close() handler and I/O path for avoiding ABBA deadlock:
 185 * - bd_mutex is held before calling block driver's open/close
 186 *   handler
 187 * - reading partition table may submit I/O to the block device
 188 */
 189int blkdev_reread_part(struct block_device *bdev)
 190{
 191        int res;
 192
 193        mutex_lock(&bdev->bd_mutex);
 194        res = __blkdev_reread_part(bdev);
 195        mutex_unlock(&bdev->bd_mutex);
 196
 197        return res;
 198}
 199EXPORT_SYMBOL(blkdev_reread_part);
 200
 201static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 202                unsigned long arg, unsigned long flags)
 203{
 204        uint64_t range[2];
 205        uint64_t start, len;
 206        struct request_queue *q = bdev_get_queue(bdev);
 207        struct address_space *mapping = bdev->bd_inode->i_mapping;
 208
 209
 210        if (!(mode & FMODE_WRITE))
 211                return -EBADF;
 212
 213        if (!blk_queue_discard(q))
 214                return -EOPNOTSUPP;
 215
 216        if (copy_from_user(range, (void __user *)arg, sizeof(range)))
 217                return -EFAULT;
 218
 219        start = range[0];
 220        len = range[1];
 221
 222        if (start & 511)
 223                return -EINVAL;
 224        if (len & 511)
 225                return -EINVAL;
 226
 227        if (start + len > i_size_read(bdev->bd_inode))
 228                return -EINVAL;
 229        truncate_inode_pages_range(mapping, start, start + len - 1);
 230        return blkdev_issue_discard(bdev, start >> 9, len >> 9,
 231                                    GFP_KERNEL, flags);
 232}
 233
 234static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 235                unsigned long arg)
 236{
 237        uint64_t range[2];
 238        struct address_space *mapping;
 239        uint64_t start, end, len;
 240
 241        if (!(mode & FMODE_WRITE))
 242                return -EBADF;
 243
 244        if (copy_from_user(range, (void __user *)arg, sizeof(range)))
 245                return -EFAULT;
 246
 247        start = range[0];
 248        len = range[1];
 249        end = start + len - 1;
 250
 251        if (start & 511)
 252                return -EINVAL;
 253        if (len & 511)
 254                return -EINVAL;
 255        if (end >= (uint64_t)i_size_read(bdev->bd_inode))
 256                return -EINVAL;
 257        if (end < start)
 258                return -EINVAL;
 259
 260        /* Invalidate the page cache, including dirty pages */
 261        mapping = bdev->bd_inode->i_mapping;
 262        truncate_inode_pages_range(mapping, start, end);
 263
 264        return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
 265                        BLKDEV_ZERO_NOUNMAP);
 266}
 267
 268static int put_ushort(unsigned long arg, unsigned short val)
 269{
 270        return put_user(val, (unsigned short __user *)arg);
 271}
 272
 273static int put_int(unsigned long arg, int val)
 274{
 275        return put_user(val, (int __user *)arg);
 276}
 277
 278static int put_uint(unsigned long arg, unsigned int val)
 279{
 280        return put_user(val, (unsigned int __user *)arg);
 281}
 282
 283static int put_long(unsigned long arg, long val)
 284{
 285        return put_user(val, (long __user *)arg);
 286}
 287
 288static int put_ulong(unsigned long arg, unsigned long val)
 289{
 290        return put_user(val, (unsigned long __user *)arg);
 291}
 292
 293static int put_u64(unsigned long arg, u64 val)
 294{
 295        return put_user(val, (u64 __user *)arg);
 296}
 297
 298int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
 299                        unsigned cmd, unsigned long arg)
 300{
 301        struct gendisk *disk = bdev->bd_disk;
 302
 303        if (disk->fops->ioctl)
 304                return disk->fops->ioctl(bdev, mode, cmd, arg);
 305
 306        return -ENOTTY;
 307}
 308/*
 309 * For the record: _GPL here is only because somebody decided to slap it
 310 * on the previous export.  Sheer idiocy, since it wasn't copyrightable
 311 * at all and could be open-coded without any exports by anybody who cares.
 312 */
 313EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
 314
 315static int blkdev_pr_register(struct block_device *bdev,
 316                struct pr_registration __user *arg)
 317{
 318        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 319        struct pr_registration reg;
 320
 321        if (!capable(CAP_SYS_ADMIN))
 322                return -EPERM;
 323        if (!ops || !ops->pr_register)
 324                return -EOPNOTSUPP;
 325        if (copy_from_user(&reg, arg, sizeof(reg)))
 326                return -EFAULT;
 327
 328        if (reg.flags & ~PR_FL_IGNORE_KEY)
 329                return -EOPNOTSUPP;
 330        return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
 331}
 332
 333static int blkdev_pr_reserve(struct block_device *bdev,
 334                struct pr_reservation __user *arg)
 335{
 336        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 337        struct pr_reservation rsv;
 338
 339        if (!capable(CAP_SYS_ADMIN))
 340                return -EPERM;
 341        if (!ops || !ops->pr_reserve)
 342                return -EOPNOTSUPP;
 343        if (copy_from_user(&rsv, arg, sizeof(rsv)))
 344                return -EFAULT;
 345
 346        if (rsv.flags & ~PR_FL_IGNORE_KEY)
 347                return -EOPNOTSUPP;
 348        return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
 349}
 350
 351static int blkdev_pr_release(struct block_device *bdev,
 352                struct pr_reservation __user *arg)
 353{
 354        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 355        struct pr_reservation rsv;
 356
 357        if (!capable(CAP_SYS_ADMIN))
 358                return -EPERM;
 359        if (!ops || !ops->pr_release)
 360                return -EOPNOTSUPP;
 361        if (copy_from_user(&rsv, arg, sizeof(rsv)))
 362                return -EFAULT;
 363
 364        if (rsv.flags)
 365                return -EOPNOTSUPP;
 366        return ops->pr_release(bdev, rsv.key, rsv.type);
 367}
 368
 369static int blkdev_pr_preempt(struct block_device *bdev,
 370                struct pr_preempt __user *arg, bool abort)
 371{
 372        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 373        struct pr_preempt p;
 374
 375        if (!capable(CAP_SYS_ADMIN))
 376                return -EPERM;
 377        if (!ops || !ops->pr_preempt)
 378                return -EOPNOTSUPP;
 379        if (copy_from_user(&p, arg, sizeof(p)))
 380                return -EFAULT;
 381
 382        if (p.flags)
 383                return -EOPNOTSUPP;
 384        return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
 385}
 386
 387static int blkdev_pr_clear(struct block_device *bdev,
 388                struct pr_clear __user *arg)
 389{
 390        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 391        struct pr_clear c;
 392
 393        if (!capable(CAP_SYS_ADMIN))
 394                return -EPERM;
 395        if (!ops || !ops->pr_clear)
 396                return -EOPNOTSUPP;
 397        if (copy_from_user(&c, arg, sizeof(c)))
 398                return -EFAULT;
 399
 400        if (c.flags)
 401                return -EOPNOTSUPP;
 402        return ops->pr_clear(bdev, c.key);
 403}
 404
 405/*
 406 * Is it an unrecognized ioctl? The correct returns are either
 407 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
 408 * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
 409 * code before returning.
 410 *
 411 * Confused drivers sometimes return EINVAL, which is wrong. It
 412 * means "I understood the ioctl command, but the parameters to
 413 * it were wrong".
 414 *
 415 * We should aim to just fix the broken drivers, the EINVAL case
 416 * should go away.
 417 */
 418static inline int is_unrecognized_ioctl(int ret)
 419{
 420        return  ret == -EINVAL ||
 421                ret == -ENOTTY ||
 422                ret == -ENOIOCTLCMD;
 423}
 424
 425static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
 426                unsigned cmd, unsigned long arg)
 427{
 428        int ret;
 429
 430        if (!capable(CAP_SYS_ADMIN))
 431                return -EACCES;
 432
 433        ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 434        if (!is_unrecognized_ioctl(ret))
 435                return ret;
 436
 437        fsync_bdev(bdev);
 438        invalidate_bdev(bdev);
 439        return 0;
 440}
 441
 442static int blkdev_roset(struct block_device *bdev, fmode_t mode,
 443                unsigned cmd, unsigned long arg)
 444{
 445        int ret, n;
 446
 447        if (!capable(CAP_SYS_ADMIN))
 448                return -EACCES;
 449
 450        ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 451        if (!is_unrecognized_ioctl(ret))
 452                return ret;
 453        if (get_user(n, (int __user *)arg))
 454                return -EFAULT;
 455        set_device_ro(bdev, n);
 456        return 0;
 457}
 458
 459static int blkdev_getgeo(struct block_device *bdev,
 460                struct hd_geometry __user *argp)
 461{
 462        struct gendisk *disk = bdev->bd_disk;
 463        struct hd_geometry geo;
 464        int ret;
 465
 466        if (!argp)
 467                return -EINVAL;
 468        if (!disk->fops->getgeo)
 469                return -ENOTTY;
 470
 471        /*
 472         * We need to set the startsect first, the driver may
 473         * want to override it.
 474         */
 475        memset(&geo, 0, sizeof(geo));
 476        geo.start = get_start_sect(bdev);
 477        ret = disk->fops->getgeo(bdev, &geo);
 478        if (ret)
 479                return ret;
 480        if (copy_to_user(argp, &geo, sizeof(geo)))
 481                return -EFAULT;
 482        return 0;
 483}
 484
 485/* set the logical block size */
 486static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 487                int __user *argp)
 488{
 489        int ret, n;
 490
 491        if (!capable(CAP_SYS_ADMIN))
 492                return -EACCES;
 493        if (!argp)
 494                return -EINVAL;
 495        if (get_user(n, argp))
 496                return -EFAULT;
 497
 498        if (!(mode & FMODE_EXCL)) {
 499                bdgrab(bdev);
 500                if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
 501                        return -EBUSY;
 502        }
 503
 504        ret = set_blocksize(bdev, n);
 505        if (!(mode & FMODE_EXCL))
 506                blkdev_put(bdev, mode | FMODE_EXCL);
 507        return ret;
 508}
 509
 510/*
 511 * always keep this in sync with compat_blkdev_ioctl()
 512 */
 513int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 514                        unsigned long arg)
 515{
 516        void __user *argp = (void __user *)arg;
 517        loff_t size;
 518        unsigned int max_sectors;
 519
 520        switch (cmd) {
 521        case BLKFLSBUF:
 522                return blkdev_flushbuf(bdev, mode, cmd, arg);
 523        case BLKROSET:
 524                return blkdev_roset(bdev, mode, cmd, arg);
 525        case BLKDISCARD:
 526                return blk_ioctl_discard(bdev, mode, arg, 0);
 527        case BLKSECDISCARD:
 528                return blk_ioctl_discard(bdev, mode, arg,
 529                                BLKDEV_DISCARD_SECURE);
 530        case BLKZEROOUT:
 531                return blk_ioctl_zeroout(bdev, mode, arg);
 532        case BLKREPORTZONE:
 533                return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
 534        case BLKRESETZONE:
 535                return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
 536        case BLKGETZONESZ:
 537                return put_uint(arg, bdev_zone_sectors(bdev));
 538        case BLKGETNRZONES:
 539                return put_uint(arg, blkdev_nr_zones(bdev));
 540        case HDIO_GETGEO:
 541                return blkdev_getgeo(bdev, argp);
 542        case BLKRAGET:
 543        case BLKFRAGET:
 544                if (!arg)
 545                        return -EINVAL;
 546                return put_long(arg, (bdev->bd_bdi->ra_pages*PAGE_SIZE) / 512);
 547        case BLKROGET:
 548                return put_int(arg, bdev_read_only(bdev) != 0);
 549        case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 550                return put_int(arg, block_size(bdev));
 551        case BLKSSZGET: /* get block device logical block size */
 552                return put_int(arg, bdev_logical_block_size(bdev));
 553        case BLKPBSZGET: /* get block device physical block size */
 554                return put_uint(arg, bdev_physical_block_size(bdev));
 555        case BLKIOMIN:
 556                return put_uint(arg, bdev_io_min(bdev));
 557        case BLKIOOPT:
 558                return put_uint(arg, bdev_io_opt(bdev));
 559        case BLKALIGNOFF:
 560                return put_int(arg, bdev_alignment_offset(bdev));
 561        case BLKDISCARDZEROES:
 562                return put_uint(arg, 0);
 563        case BLKSECTGET:
 564                max_sectors = min_t(unsigned int, USHRT_MAX,
 565                                    queue_max_sectors(bdev_get_queue(bdev)));
 566                return put_ushort(arg, max_sectors);
 567        case BLKROTATIONAL:
 568                return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
 569        case BLKRASET:
 570        case BLKFRASET:
 571                if(!capable(CAP_SYS_ADMIN))
 572                        return -EACCES;
 573                bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 574                return 0;
 575        case BLKBSZSET:
 576                return blkdev_bszset(bdev, mode, argp);
 577        case BLKPG:
 578                return blkpg_ioctl(bdev, argp);
 579        case BLKRRPART:
 580                return blkdev_reread_part(bdev);
 581        case BLKGETSIZE:
 582                size = i_size_read(bdev->bd_inode);
 583                if ((size >> 9) > ~0UL)
 584                        return -EFBIG;
 585                return put_ulong(arg, size >> 9);
 586        case BLKGETSIZE64:
 587                return put_u64(arg, i_size_read(bdev->bd_inode));
 588        case BLKTRACESTART:
 589        case BLKTRACESTOP:
 590        case BLKTRACESETUP:
 591        case BLKTRACETEARDOWN:
 592                return blk_trace_ioctl(bdev, cmd, argp);
 593        case IOC_PR_REGISTER:
 594                return blkdev_pr_register(bdev, argp);
 595        case IOC_PR_RESERVE:
 596                return blkdev_pr_reserve(bdev, argp);
 597        case IOC_PR_RELEASE:
 598                return blkdev_pr_release(bdev, argp);
 599        case IOC_PR_PREEMPT:
 600                return blkdev_pr_preempt(bdev, argp, false);
 601        case IOC_PR_PREEMPT_ABORT:
 602                return blkdev_pr_preempt(bdev, argp, true);
 603        case IOC_PR_CLEAR:
 604                return blkdev_pr_clear(bdev, argp);
 605        default:
 606                return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 607        }
 608}
 609EXPORT_SYMBOL_GPL(blkdev_ioctl);
 610