linux/block/ioctl.c
<<
>>
Prefs
   1#include <linux/capability.h>
   2#include <linux/blkdev.h>
   3#include <linux/export.h>
   4#include <linux/gfp.h>
   5#include <linux/blkpg.h>
   6#include <linux/hdreg.h>
   7#include <linux/badblocks.h>
   8#include <linux/backing-dev.h>
   9#include <linux/fs.h>
  10#include <linux/blktrace_api.h>
  11#include <linux/pr.h>
  12#include <asm/uaccess.h>
  13
  14static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg)
  15{
  16        struct block_device *bdevp;
  17        struct gendisk *disk;
  18        struct hd_struct *part, *lpart;
  19        struct blkpg_ioctl_arg a;
  20        struct blkpg_partition p;
  21        struct disk_part_iter piter;
  22        long long start, length;
  23        int partno;
  24
  25        if (!capable(CAP_SYS_ADMIN))
  26                return -EACCES;
  27        if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg)))
  28                return -EFAULT;
  29        if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
  30                return -EFAULT;
  31        disk = bdev->bd_disk;
  32        if (bdev != bdev->bd_contains)
  33                return -EINVAL;
  34        partno = p.pno;
  35        if (partno <= 0)
  36                return -EINVAL;
  37        switch (a.op) {
  38                case BLKPG_ADD_PARTITION:
  39                        start = p.start >> 9;
  40                        length = p.length >> 9;
  41                        /* check for fit in a hd_struct */
  42                        if (sizeof(sector_t) == sizeof(long) &&
  43                            sizeof(long long) > sizeof(long)) {
  44                                long pstart = start, plength = length;
  45                                if (pstart != start || plength != length
  46                                    || pstart < 0 || plength < 0 || partno > 65535)
  47                                        return -EINVAL;
  48                        }
  49
  50                        mutex_lock(&bdev->bd_mutex);
  51
  52                        /* overlap? */
  53                        disk_part_iter_init(&piter, disk,
  54                                            DISK_PITER_INCL_EMPTY);
  55                        while ((part = disk_part_iter_next(&piter))) {
  56                                if (!(start + length <= part->start_sect ||
  57                                      start >= part->start_sect + part->nr_sects)) {
  58                                        disk_part_iter_exit(&piter);
  59                                        mutex_unlock(&bdev->bd_mutex);
  60                                        return -EBUSY;
  61                                }
  62                        }
  63                        disk_part_iter_exit(&piter);
  64
  65                        /* all seems OK */
  66                        part = add_partition(disk, partno, start, length,
  67                                             ADDPART_FLAG_NONE, NULL);
  68                        mutex_unlock(&bdev->bd_mutex);
  69                        return PTR_ERR_OR_ZERO(part);
  70                case BLKPG_DEL_PARTITION:
  71                        part = disk_get_part(disk, partno);
  72                        if (!part)
  73                                return -ENXIO;
  74
  75                        bdevp = bdget(part_devt(part));
  76                        disk_put_part(part);
  77                        if (!bdevp)
  78                                return -ENOMEM;
  79
  80                        mutex_lock(&bdevp->bd_mutex);
  81                        if (bdevp->bd_openers) {
  82                                mutex_unlock(&bdevp->bd_mutex);
  83                                bdput(bdevp);
  84                                return -EBUSY;
  85                        }
  86                        /* all seems OK */
  87                        fsync_bdev(bdevp);
  88                        invalidate_bdev(bdevp);
  89
  90                        mutex_lock_nested(&bdev->bd_mutex, 1);
  91                        delete_partition(disk, partno);
  92                        mutex_unlock(&bdev->bd_mutex);
  93                        mutex_unlock(&bdevp->bd_mutex);
  94                        bdput(bdevp);
  95
  96                        return 0;
  97                case BLKPG_RESIZE_PARTITION:
  98                        start = p.start >> 9;
  99                        /* new length of partition in bytes */
 100                        length = p.length >> 9;
 101                        /* check for fit in a hd_struct */
 102                        if (sizeof(sector_t) == sizeof(long) &&
 103                            sizeof(long long) > sizeof(long)) {
 104                                long pstart = start, plength = length;
 105                                if (pstart != start || plength != length
 106                                    || pstart < 0 || plength < 0)
 107                                        return -EINVAL;
 108                        }
 109                        part = disk_get_part(disk, partno);
 110                        if (!part)
 111                                return -ENXIO;
 112                        bdevp = bdget(part_devt(part));
 113                        if (!bdevp) {
 114                                disk_put_part(part);
 115                                return -ENOMEM;
 116                        }
 117                        mutex_lock(&bdevp->bd_mutex);
 118                        mutex_lock_nested(&bdev->bd_mutex, 1);
 119                        if (start != part->start_sect) {
 120                                mutex_unlock(&bdevp->bd_mutex);
 121                                mutex_unlock(&bdev->bd_mutex);
 122                                bdput(bdevp);
 123                                disk_put_part(part);
 124                                return -EINVAL;
 125                        }
 126                        /* overlap? */
 127                        disk_part_iter_init(&piter, disk,
 128                                            DISK_PITER_INCL_EMPTY);
 129                        while ((lpart = disk_part_iter_next(&piter))) {
 130                                if (lpart->partno != partno &&
 131                                   !(start + length <= lpart->start_sect ||
 132                                   start >= lpart->start_sect + lpart->nr_sects)
 133                                   ) {
 134                                        disk_part_iter_exit(&piter);
 135                                        mutex_unlock(&bdevp->bd_mutex);
 136                                        mutex_unlock(&bdev->bd_mutex);
 137                                        bdput(bdevp);
 138                                        disk_put_part(part);
 139                                        return -EBUSY;
 140                                }
 141                        }
 142                        disk_part_iter_exit(&piter);
 143                        part_nr_sects_write(part, (sector_t)length);
 144                        i_size_write(bdevp->bd_inode, p.length);
 145                        mutex_unlock(&bdevp->bd_mutex);
 146                        mutex_unlock(&bdev->bd_mutex);
 147                        bdput(bdevp);
 148                        disk_put_part(part);
 149                        return 0;
 150                default:
 151                        return -EINVAL;
 152        }
 153}
 154
 155/*
 156 * This is an exported API for the block driver, and will not
 157 * acquire bd_mutex. This API should be used in case that
 158 * caller has held bd_mutex already.
 159 */
 160int __blkdev_reread_part(struct block_device *bdev)
 161{
 162        struct gendisk *disk = bdev->bd_disk;
 163
 164        if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
 165                return -EINVAL;
 166        if (!capable(CAP_SYS_ADMIN))
 167                return -EACCES;
 168
 169        lockdep_assert_held(&bdev->bd_mutex);
 170
 171        return rescan_partitions(disk, bdev);
 172}
 173EXPORT_SYMBOL(__blkdev_reread_part);
 174
 175/*
 176 * This is an exported API for the block driver, and will
 177 * try to acquire bd_mutex. If bd_mutex has been held already
 178 * in current context, please call __blkdev_reread_part().
 179 *
 180 * Make sure the held locks in current context aren't required
 181 * in open()/close() handler and I/O path for avoiding ABBA deadlock:
 182 * - bd_mutex is held before calling block driver's open/close
 183 *   handler
 184 * - reading partition table may submit I/O to the block device
 185 */
 186int blkdev_reread_part(struct block_device *bdev)
 187{
 188        int res;
 189
 190        mutex_lock(&bdev->bd_mutex);
 191        res = __blkdev_reread_part(bdev);
 192        mutex_unlock(&bdev->bd_mutex);
 193
 194        return res;
 195}
 196EXPORT_SYMBOL(blkdev_reread_part);
 197
 198static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
 199                unsigned long arg, unsigned long flags)
 200{
 201        uint64_t range[2];
 202        uint64_t start, len;
 203
 204        if (!(mode & FMODE_WRITE))
 205                return -EBADF;
 206
 207        if (copy_from_user(range, (void __user *)arg, sizeof(range)))
 208                return -EFAULT;
 209
 210        start = range[0];
 211        len = range[1];
 212
 213        if (start & 511)
 214                return -EINVAL;
 215        if (len & 511)
 216                return -EINVAL;
 217        start >>= 9;
 218        len >>= 9;
 219
 220        if (start + len > (i_size_read(bdev->bd_inode) >> 9))
 221                return -EINVAL;
 222        return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
 223}
 224
 225static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 226                unsigned long arg)
 227{
 228        uint64_t range[2];
 229        uint64_t start, len;
 230
 231        if (!(mode & FMODE_WRITE))
 232                return -EBADF;
 233
 234        if (copy_from_user(range, (void __user *)arg, sizeof(range)))
 235                return -EFAULT;
 236
 237        start = range[0];
 238        len = range[1];
 239
 240        if (start & 511)
 241                return -EINVAL;
 242        if (len & 511)
 243                return -EINVAL;
 244        start >>= 9;
 245        len >>= 9;
 246
 247        if (start + len > (i_size_read(bdev->bd_inode) >> 9))
 248                return -EINVAL;
 249
 250        return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL, false);
 251}
 252
 253static int put_ushort(unsigned long arg, unsigned short val)
 254{
 255        return put_user(val, (unsigned short __user *)arg);
 256}
 257
 258static int put_int(unsigned long arg, int val)
 259{
 260        return put_user(val, (int __user *)arg);
 261}
 262
 263static int put_uint(unsigned long arg, unsigned int val)
 264{
 265        return put_user(val, (unsigned int __user *)arg);
 266}
 267
 268static int put_long(unsigned long arg, long val)
 269{
 270        return put_user(val, (long __user *)arg);
 271}
 272
 273static int put_ulong(unsigned long arg, unsigned long val)
 274{
 275        return put_user(val, (unsigned long __user *)arg);
 276}
 277
 278static int put_u64(unsigned long arg, u64 val)
 279{
 280        return put_user(val, (u64 __user *)arg);
 281}
 282
 283int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
 284                        unsigned cmd, unsigned long arg)
 285{
 286        struct gendisk *disk = bdev->bd_disk;
 287
 288        if (disk->fops->ioctl)
 289                return disk->fops->ioctl(bdev, mode, cmd, arg);
 290
 291        return -ENOTTY;
 292}
 293/*
 294 * For the record: _GPL here is only because somebody decided to slap it
 295 * on the previous export.  Sheer idiocy, since it wasn't copyrightable
 296 * at all and could be open-coded without any exports by anybody who cares.
 297 */
 298EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
 299
 300static int blkdev_pr_register(struct block_device *bdev,
 301                struct pr_registration __user *arg)
 302{
 303        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 304        struct pr_registration reg;
 305
 306        if (!capable(CAP_SYS_ADMIN))
 307                return -EPERM;
 308        if (!ops || !ops->pr_register)
 309                return -EOPNOTSUPP;
 310        if (copy_from_user(&reg, arg, sizeof(reg)))
 311                return -EFAULT;
 312
 313        if (reg.flags & ~PR_FL_IGNORE_KEY)
 314                return -EOPNOTSUPP;
 315        return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags);
 316}
 317
 318static int blkdev_pr_reserve(struct block_device *bdev,
 319                struct pr_reservation __user *arg)
 320{
 321        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 322        struct pr_reservation rsv;
 323
 324        if (!capable(CAP_SYS_ADMIN))
 325                return -EPERM;
 326        if (!ops || !ops->pr_reserve)
 327                return -EOPNOTSUPP;
 328        if (copy_from_user(&rsv, arg, sizeof(rsv)))
 329                return -EFAULT;
 330
 331        if (rsv.flags & ~PR_FL_IGNORE_KEY)
 332                return -EOPNOTSUPP;
 333        return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags);
 334}
 335
 336static int blkdev_pr_release(struct block_device *bdev,
 337                struct pr_reservation __user *arg)
 338{
 339        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 340        struct pr_reservation rsv;
 341
 342        if (!capable(CAP_SYS_ADMIN))
 343                return -EPERM;
 344        if (!ops || !ops->pr_release)
 345                return -EOPNOTSUPP;
 346        if (copy_from_user(&rsv, arg, sizeof(rsv)))
 347                return -EFAULT;
 348
 349        if (rsv.flags)
 350                return -EOPNOTSUPP;
 351        return ops->pr_release(bdev, rsv.key, rsv.type);
 352}
 353
 354static int blkdev_pr_preempt(struct block_device *bdev,
 355                struct pr_preempt __user *arg, bool abort)
 356{
 357        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 358        struct pr_preempt p;
 359
 360        if (!capable(CAP_SYS_ADMIN))
 361                return -EPERM;
 362        if (!ops || !ops->pr_preempt)
 363                return -EOPNOTSUPP;
 364        if (copy_from_user(&p, arg, sizeof(p)))
 365                return -EFAULT;
 366
 367        if (p.flags)
 368                return -EOPNOTSUPP;
 369        return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort);
 370}
 371
 372static int blkdev_pr_clear(struct block_device *bdev,
 373                struct pr_clear __user *arg)
 374{
 375        const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
 376        struct pr_clear c;
 377
 378        if (!capable(CAP_SYS_ADMIN))
 379                return -EPERM;
 380        if (!ops || !ops->pr_clear)
 381                return -EOPNOTSUPP;
 382        if (copy_from_user(&c, arg, sizeof(c)))
 383                return -EFAULT;
 384
 385        if (c.flags)
 386                return -EOPNOTSUPP;
 387        return ops->pr_clear(bdev, c.key);
 388}
 389
 390/*
 391 * Is it an unrecognized ioctl? The correct returns are either
 392 * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a
 393 * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl
 394 * code before returning.
 395 *
 396 * Confused drivers sometimes return EINVAL, which is wrong. It
 397 * means "I understood the ioctl command, but the parameters to
 398 * it were wrong".
 399 *
 400 * We should aim to just fix the broken drivers, the EINVAL case
 401 * should go away.
 402 */
 403static inline int is_unrecognized_ioctl(int ret)
 404{
 405        return  ret == -EINVAL ||
 406                ret == -ENOTTY ||
 407                ret == -ENOIOCTLCMD;
 408}
 409
 410#ifdef CONFIG_FS_DAX
 411bool blkdev_dax_capable(struct block_device *bdev)
 412{
 413        struct gendisk *disk = bdev->bd_disk;
 414
 415        if (!disk->fops->direct_access)
 416                return false;
 417
 418        /*
 419         * If the partition is not aligned on a page boundary, we can't
 420         * do dax I/O to it.
 421         */
 422        if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
 423                        || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
 424                return false;
 425
 426        /*
 427         * If the device has known bad blocks, force all I/O through the
 428         * driver / page cache.
 429         *
 430         * TODO: support finer grained dax error handling
 431         */
 432        if (disk->bb && disk->bb->count)
 433                return false;
 434
 435        return true;
 436}
 437#endif
 438
 439static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
 440                unsigned cmd, unsigned long arg)
 441{
 442        int ret;
 443
 444        if (!capable(CAP_SYS_ADMIN))
 445                return -EACCES;
 446
 447        ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 448        if (!is_unrecognized_ioctl(ret))
 449                return ret;
 450
 451        fsync_bdev(bdev);
 452        invalidate_bdev(bdev);
 453        return 0;
 454}
 455
 456static int blkdev_roset(struct block_device *bdev, fmode_t mode,
 457                unsigned cmd, unsigned long arg)
 458{
 459        int ret, n;
 460
 461        ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 462        if (!is_unrecognized_ioctl(ret))
 463                return ret;
 464        if (!capable(CAP_SYS_ADMIN))
 465                return -EACCES;
 466        if (get_user(n, (int __user *)arg))
 467                return -EFAULT;
 468        set_device_ro(bdev, n);
 469        return 0;
 470}
 471
 472static int blkdev_getgeo(struct block_device *bdev,
 473                struct hd_geometry __user *argp)
 474{
 475        struct gendisk *disk = bdev->bd_disk;
 476        struct hd_geometry geo;
 477        int ret;
 478
 479        if (!argp)
 480                return -EINVAL;
 481        if (!disk->fops->getgeo)
 482                return -ENOTTY;
 483
 484        /*
 485         * We need to set the startsect first, the driver may
 486         * want to override it.
 487         */
 488        memset(&geo, 0, sizeof(geo));
 489        geo.start = get_start_sect(bdev);
 490        ret = disk->fops->getgeo(bdev, &geo);
 491        if (ret)
 492                return ret;
 493        if (copy_to_user(argp, &geo, sizeof(geo)))
 494                return -EFAULT;
 495        return 0;
 496}
 497
 498/* set the logical block size */
 499static int blkdev_bszset(struct block_device *bdev, fmode_t mode,
 500                int __user *argp)
 501{
 502        int ret, n;
 503
 504        if (!capable(CAP_SYS_ADMIN))
 505                return -EACCES;
 506        if (!argp)
 507                return -EINVAL;
 508        if (get_user(n, argp))
 509                return -EFAULT;
 510
 511        if (!(mode & FMODE_EXCL)) {
 512                bdgrab(bdev);
 513                if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0)
 514                        return -EBUSY;
 515        }
 516
 517        ret = set_blocksize(bdev, n);
 518        if (!(mode & FMODE_EXCL))
 519                blkdev_put(bdev, mode | FMODE_EXCL);
 520        return ret;
 521}
 522
 523/*
 524 * always keep this in sync with compat_blkdev_ioctl()
 525 */
 526int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 527                        unsigned long arg)
 528{
 529        struct backing_dev_info *bdi;
 530        void __user *argp = (void __user *)arg;
 531        loff_t size;
 532        unsigned int max_sectors;
 533
 534        switch (cmd) {
 535        case BLKFLSBUF:
 536                return blkdev_flushbuf(bdev, mode, cmd, arg);
 537        case BLKROSET:
 538                return blkdev_roset(bdev, mode, cmd, arg);
 539        case BLKDISCARD:
 540                return blk_ioctl_discard(bdev, mode, arg, 0);
 541        case BLKSECDISCARD:
 542                return blk_ioctl_discard(bdev, mode, arg,
 543                                BLKDEV_DISCARD_SECURE);
 544        case BLKZEROOUT:
 545                return blk_ioctl_zeroout(bdev, mode, arg);
 546        case HDIO_GETGEO:
 547                return blkdev_getgeo(bdev, argp);
 548        case BLKRAGET:
 549        case BLKFRAGET:
 550                if (!arg)
 551                        return -EINVAL;
 552                bdi = blk_get_backing_dev_info(bdev);
 553                return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512);
 554        case BLKROGET:
 555                return put_int(arg, bdev_read_only(bdev) != 0);
 556        case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 557                return put_int(arg, block_size(bdev));
 558        case BLKSSZGET: /* get block device logical block size */
 559                return put_int(arg, bdev_logical_block_size(bdev));
 560        case BLKPBSZGET: /* get block device physical block size */
 561                return put_uint(arg, bdev_physical_block_size(bdev));
 562        case BLKIOMIN:
 563                return put_uint(arg, bdev_io_min(bdev));
 564        case BLKIOOPT:
 565                return put_uint(arg, bdev_io_opt(bdev));
 566        case BLKALIGNOFF:
 567                return put_int(arg, bdev_alignment_offset(bdev));
 568        case BLKDISCARDZEROES:
 569                return put_uint(arg, bdev_discard_zeroes_data(bdev));
 570        case BLKSECTGET:
 571                max_sectors = min_t(unsigned int, USHRT_MAX,
 572                                    queue_max_sectors(bdev_get_queue(bdev)));
 573                return put_ushort(arg, max_sectors);
 574        case BLKROTATIONAL:
 575                return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
 576        case BLKRASET:
 577        case BLKFRASET:
 578                if(!capable(CAP_SYS_ADMIN))
 579                        return -EACCES;
 580                bdi = blk_get_backing_dev_info(bdev);
 581                bdi->ra_pages = (arg * 512) / PAGE_SIZE;
 582                return 0;
 583        case BLKBSZSET:
 584                return blkdev_bszset(bdev, mode, argp);
 585        case BLKPG:
 586                return blkpg_ioctl(bdev, argp);
 587        case BLKRRPART:
 588                return blkdev_reread_part(bdev);
 589        case BLKGETSIZE:
 590                size = i_size_read(bdev->bd_inode);
 591                if ((size >> 9) > ~0UL)
 592                        return -EFBIG;
 593                return put_ulong(arg, size >> 9);
 594        case BLKGETSIZE64:
 595                return put_u64(arg, i_size_read(bdev->bd_inode));
 596        case BLKTRACESTART:
 597        case BLKTRACESTOP:
 598        case BLKTRACESETUP:
 599        case BLKTRACETEARDOWN:
 600                return blk_trace_ioctl(bdev, cmd, argp);
 601        case BLKDAXGET:
 602                return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX));
 603                break;
 604        case IOC_PR_REGISTER:
 605                return blkdev_pr_register(bdev, argp);
 606        case IOC_PR_RESERVE:
 607                return blkdev_pr_reserve(bdev, argp);
 608        case IOC_PR_RELEASE:
 609                return blkdev_pr_release(bdev, argp);
 610        case IOC_PR_PREEMPT:
 611                return blkdev_pr_preempt(bdev, argp, false);
 612        case IOC_PR_PREEMPT_ABORT:
 613                return blkdev_pr_preempt(bdev, argp, true);
 614        case IOC_PR_CLEAR:
 615                return blkdev_pr_clear(bdev, argp);
 616        default:
 617                return __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 618        }
 619}
 620EXPORT_SYMBOL_GPL(blkdev_ioctl);
 621