linux/drivers/block/loop.c
<<
>>
Prefs
   1/*
   2 *  linux/drivers/block/loop.c
   3 *
   4 *  Written by Theodore Ts'o, 3/29/93
   5 *
   6 * Copyright 1993 by Theodore Ts'o.  Redistribution of this file is
   7 * permitted under the GNU General Public License.
   8 *
   9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
  10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
  11 *
  12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
  13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
  14 *
  15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
  16 *
  17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
  18 *
  19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
  20 *
  21 * Loadable modules and other fixes by AK, 1998
  22 *
  23 * Make real block number available to downstream transfer functions, enables
  24 * CBC (and relatives) mode encryption requiring unique IVs per data block.
  25 * Reed H. Petty, rhp@draper.net
  26 *
  27 * Maximum number of loop devices now dynamic via max_loop module parameter.
  28 * Russell Kroll <rkroll@exploits.org> 19990701
  29 *
  30 * Maximum number of loop devices when compiled-in now selectable by passing
  31 * max_loop=<1-255> to the kernel on boot.
  32 * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
  33 *
  34 * Completely rewrite request handling to be make_request_fn style and
  35 * non blocking, pushing work to a helper thread. Lots of fixes from
  36 * Al Viro too.
  37 * Jens Axboe <axboe@suse.de>, Nov 2000
  38 *
  39 * Support up to 256 loop devices
  40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
  41 *
  42 * Support for falling back on the write file operation when the address space
  43 * operations write_begin is not available on the backing filesystem.
  44 * Anton Altaparmakov, 16 Feb 2005
  45 *
  46 * Still To Fix:
  47 * - Advisory locking is ignored here.
  48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
  49 *
  50 */
  51
  52#include <linux/module.h>
  53#include <linux/moduleparam.h>
  54#include <linux/sched.h>
  55#include <linux/fs.h>
  56#include <linux/file.h>
  57#include <linux/stat.h>
  58#include <linux/errno.h>
  59#include <linux/major.h>
  60#include <linux/wait.h>
  61#include <linux/blkdev.h>
  62#include <linux/blkpg.h>
  63#include <linux/init.h>
  64#include <linux/swap.h>
  65#include <linux/slab.h>
  66#include <linux/loop.h>
  67#include <linux/compat.h>
  68#include <linux/suspend.h>
  69#include <linux/freezer.h>
  70#include <linux/writeback.h>
  71#include <linux/buffer_head.h>          /* for invalidate_bdev() */
  72#include <linux/completion.h>
  73#include <linux/highmem.h>
  74#include <linux/gfp.h>
  75#include <linux/kthread.h>
  76#include <linux/splice.h>
  77
  78#include <asm/uaccess.h>
  79
  80static LIST_HEAD(loop_devices);
  81static DEFINE_MUTEX(loop_devices_mutex);
  82
  83static int max_part;
  84static int part_shift;
  85
  86/*
  87 * Transfer functions
  88 */
  89static int transfer_none(struct loop_device *lo, int cmd,
  90                         struct page *raw_page, unsigned raw_off,
  91                         struct page *loop_page, unsigned loop_off,
  92                         int size, sector_t real_block)
  93{
  94        char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
  95        char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
  96
  97        if (cmd == READ)
  98                memcpy(loop_buf, raw_buf, size);
  99        else
 100                memcpy(raw_buf, loop_buf, size);
 101
 102        kunmap_atomic(raw_buf, KM_USER0);
 103        kunmap_atomic(loop_buf, KM_USER1);
 104        cond_resched();
 105        return 0;
 106}
 107
 108static int transfer_xor(struct loop_device *lo, int cmd,
 109                        struct page *raw_page, unsigned raw_off,
 110                        struct page *loop_page, unsigned loop_off,
 111                        int size, sector_t real_block)
 112{
 113        char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
 114        char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
 115        char *in, *out, *key;
 116        int i, keysize;
 117
 118        if (cmd == READ) {
 119                in = raw_buf;
 120                out = loop_buf;
 121        } else {
 122                in = loop_buf;
 123                out = raw_buf;
 124        }
 125
 126        key = lo->lo_encrypt_key;
 127        keysize = lo->lo_encrypt_key_size;
 128        for (i = 0; i < size; i++)
 129                *out++ = *in++ ^ key[(i & 511) % keysize];
 130
 131        kunmap_atomic(raw_buf, KM_USER0);
 132        kunmap_atomic(loop_buf, KM_USER1);
 133        cond_resched();
 134        return 0;
 135}
 136
 137static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
 138{
 139        if (unlikely(info->lo_encrypt_key_size <= 0))
 140                return -EINVAL;
 141        return 0;
 142}
 143
 144static struct loop_func_table none_funcs = {
 145        .number = LO_CRYPT_NONE,
 146        .transfer = transfer_none,
 147};      
 148
 149static struct loop_func_table xor_funcs = {
 150        .number = LO_CRYPT_XOR,
 151        .transfer = transfer_xor,
 152        .init = xor_init
 153};      
 154
 155/* xfer_funcs[0] is special - its release function is never called */
 156static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 157        &none_funcs,
 158        &xor_funcs
 159};
 160
 161static loff_t get_loop_size(struct loop_device *lo, struct file *file)
 162{
 163        loff_t size, offset, loopsize;
 164
 165        /* Compute loopsize in bytes */
 166        size = i_size_read(file->f_mapping->host);
 167        offset = lo->lo_offset;
 168        loopsize = size - offset;
 169        if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
 170                loopsize = lo->lo_sizelimit;
 171
 172        /*
 173         * Unfortunately, if we want to do I/O on the device,
 174         * the number of 512-byte sectors has to fit into a sector_t.
 175         */
 176        return loopsize >> 9;
 177}
 178
 179static int
 180figure_loop_size(struct loop_device *lo)
 181{
 182        loff_t size = get_loop_size(lo, lo->lo_backing_file);
 183        sector_t x = (sector_t)size;
 184
 185        if (unlikely((loff_t)x != size))
 186                return -EFBIG;
 187
 188        set_capacity(lo->lo_disk, x);
 189        return 0;                                       
 190}
 191
 192static inline int
 193lo_do_transfer(struct loop_device *lo, int cmd,
 194               struct page *rpage, unsigned roffs,
 195               struct page *lpage, unsigned loffs,
 196               int size, sector_t rblock)
 197{
 198        if (unlikely(!lo->transfer))
 199                return 0;
 200
 201        return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
 202}
 203
 204/**
 205 * do_lo_send_aops - helper for writing data to a loop device
 206 *
 207 * This is the fast version for backing filesystems which implement the address
 208 * space operations write_begin and write_end.
 209 */
 210static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 211                loff_t pos, struct page *unused)
 212{
 213        struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 214        struct address_space *mapping = file->f_mapping;
 215        pgoff_t index;
 216        unsigned offset, bv_offs;
 217        int len, ret;
 218
 219        mutex_lock(&mapping->host->i_mutex);
 220        index = pos >> PAGE_CACHE_SHIFT;
 221        offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
 222        bv_offs = bvec->bv_offset;
 223        len = bvec->bv_len;
 224        while (len > 0) {
 225                sector_t IV;
 226                unsigned size, copied;
 227                int transfer_result;
 228                struct page *page;
 229                void *fsdata;
 230
 231                IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
 232                size = PAGE_CACHE_SIZE - offset;
 233                if (size > len)
 234                        size = len;
 235
 236                ret = pagecache_write_begin(file, mapping, pos, size, 0,
 237                                                        &page, &fsdata);
 238                if (ret)
 239                        goto fail;
 240
 241                transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 242                                bvec->bv_page, bv_offs, size, IV);
 243                copied = size;
 244                if (unlikely(transfer_result))
 245                        copied = 0;
 246
 247                ret = pagecache_write_end(file, mapping, pos, size, copied,
 248                                                        page, fsdata);
 249                if (ret < 0 || ret != copied)
 250                        goto fail;
 251
 252                if (unlikely(transfer_result))
 253                        goto fail;
 254
 255                bv_offs += copied;
 256                len -= copied;
 257                offset = 0;
 258                index++;
 259                pos += copied;
 260        }
 261        ret = 0;
 262out:
 263        mutex_unlock(&mapping->host->i_mutex);
 264        return ret;
 265fail:
 266        ret = -1;
 267        goto out;
 268}
 269
 270/**
 271 * __do_lo_send_write - helper for writing data to a loop device
 272 *
 273 * This helper just factors out common code between do_lo_send_direct_write()
 274 * and do_lo_send_write().
 275 */
 276static int __do_lo_send_write(struct file *file,
 277                u8 *buf, const int len, loff_t pos)
 278{
 279        ssize_t bw;
 280        mm_segment_t old_fs = get_fs();
 281
 282        set_fs(get_ds());
 283        bw = file->f_op->write(file, buf, len, &pos);
 284        set_fs(old_fs);
 285        if (likely(bw == len))
 286                return 0;
 287        printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
 288                        (unsigned long long)pos, len);
 289        if (bw >= 0)
 290                bw = -EIO;
 291        return bw;
 292}
 293
 294/**
 295 * do_lo_send_direct_write - helper for writing data to a loop device
 296 *
 297 * This is the fast, non-transforming version for backing filesystems which do
 298 * not implement the address space operations write_begin and write_end.
 299 * It uses the write file operation which should be present on all writeable
 300 * filesystems.
 301 */
 302static int do_lo_send_direct_write(struct loop_device *lo,
 303                struct bio_vec *bvec, loff_t pos, struct page *page)
 304{
 305        ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
 306                        kmap(bvec->bv_page) + bvec->bv_offset,
 307                        bvec->bv_len, pos);
 308        kunmap(bvec->bv_page);
 309        cond_resched();
 310        return bw;
 311}
 312
 313/**
 314 * do_lo_send_write - helper for writing data to a loop device
 315 *
 316 * This is the slow, transforming version for filesystems which do not
 317 * implement the address space operations write_begin and write_end.  It
 318 * uses the write file operation which should be present on all writeable
 319 * filesystems.
 320 *
 321 * Using fops->write is slower than using aops->{prepare,commit}_write in the
 322 * transforming case because we need to double buffer the data as we cannot do
 323 * the transformations in place as we do not have direct access to the
 324 * destination pages of the backing file.
 325 */
 326static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
 327                loff_t pos, struct page *page)
 328{
 329        int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
 330                        bvec->bv_offset, bvec->bv_len, pos >> 9);
 331        if (likely(!ret))
 332                return __do_lo_send_write(lo->lo_backing_file,
 333                                page_address(page), bvec->bv_len,
 334                                pos);
 335        printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
 336                        "length %i.\n", (unsigned long long)pos, bvec->bv_len);
 337        if (ret > 0)
 338                ret = -EIO;
 339        return ret;
 340}
 341
 342static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
 343{
 344        int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
 345                        struct page *page);
 346        struct bio_vec *bvec;
 347        struct page *page = NULL;
 348        int i, ret = 0;
 349
 350        do_lo_send = do_lo_send_aops;
 351        if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
 352                do_lo_send = do_lo_send_direct_write;
 353                if (lo->transfer != transfer_none) {
 354                        page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
 355                        if (unlikely(!page))
 356                                goto fail;
 357                        kmap(page);
 358                        do_lo_send = do_lo_send_write;
 359                }
 360        }
 361        bio_for_each_segment(bvec, bio, i) {
 362                ret = do_lo_send(lo, bvec, pos, page);
 363                if (ret < 0)
 364                        break;
 365                pos += bvec->bv_len;
 366        }
 367        if (page) {
 368                kunmap(page);
 369                __free_page(page);
 370        }
 371out:
 372        return ret;
 373fail:
 374        printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
 375        ret = -ENOMEM;
 376        goto out;
 377}
 378
 379struct lo_read_data {
 380        struct loop_device *lo;
 381        struct page *page;
 382        unsigned offset;
 383        int bsize;
 384};
 385
 386static int
 387lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 388                struct splice_desc *sd)
 389{
 390        struct lo_read_data *p = sd->u.data;
 391        struct loop_device *lo = p->lo;
 392        struct page *page = buf->page;
 393        sector_t IV;
 394        int size, ret;
 395
 396        ret = buf->ops->confirm(pipe, buf);
 397        if (unlikely(ret))
 398                return ret;
 399
 400        IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
 401                                                        (buf->offset >> 9);
 402        size = sd->len;
 403        if (size > p->bsize)
 404                size = p->bsize;
 405
 406        if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
 407                printk(KERN_ERR "loop: transfer error block %ld\n",
 408                       page->index);
 409                size = -EINVAL;
 410        }
 411
 412        flush_dcache_page(p->page);
 413
 414        if (size > 0)
 415                p->offset += size;
 416
 417        return size;
 418}
 419
 420static int
 421lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
 422{
 423        return __splice_from_pipe(pipe, sd, lo_splice_actor);
 424}
 425
 426static int
 427do_lo_receive(struct loop_device *lo,
 428              struct bio_vec *bvec, int bsize, loff_t pos)
 429{
 430        struct lo_read_data cookie;
 431        struct splice_desc sd;
 432        struct file *file;
 433        long retval;
 434
 435        cookie.lo = lo;
 436        cookie.page = bvec->bv_page;
 437        cookie.offset = bvec->bv_offset;
 438        cookie.bsize = bsize;
 439
 440        sd.len = 0;
 441        sd.total_len = bvec->bv_len;
 442        sd.flags = 0;
 443        sd.pos = pos;
 444        sd.u.data = &cookie;
 445
 446        file = lo->lo_backing_file;
 447        retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
 448
 449        if (retval < 0)
 450                return retval;
 451
 452        return 0;
 453}
 454
 455static int
 456lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 457{
 458        struct bio_vec *bvec;
 459        int i, ret = 0;
 460
 461        bio_for_each_segment(bvec, bio, i) {
 462                ret = do_lo_receive(lo, bvec, bsize, pos);
 463                if (ret < 0)
 464                        break;
 465                pos += bvec->bv_len;
 466        }
 467        return ret;
 468}
 469
 470static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 471{
 472        loff_t pos;
 473        int ret;
 474
 475        pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 476
 477        if (bio_rw(bio) == WRITE) {
 478                bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
 479                struct file *file = lo->lo_backing_file;
 480
 481                if (barrier) {
 482                        if (unlikely(!file->f_op->fsync)) {
 483                                ret = -EOPNOTSUPP;
 484                                goto out;
 485                        }
 486
 487                        ret = vfs_fsync(file, file->f_path.dentry, 0);
 488                        if (unlikely(ret)) {
 489                                ret = -EIO;
 490                                goto out;
 491                        }
 492                }
 493
 494                ret = lo_send(lo, bio, pos);
 495
 496                if (barrier && !ret) {
 497                        ret = vfs_fsync(file, file->f_path.dentry, 0);
 498                        if (unlikely(ret))
 499                                ret = -EIO;
 500                }
 501        } else
 502                ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
 503
 504out:
 505        return ret;
 506}
 507
 508/*
 509 * Add bio to back of pending list
 510 */
 511static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 512{
 513        bio_list_add(&lo->lo_bio_list, bio);
 514}
 515
 516/*
 517 * Grab first pending buffer
 518 */
 519static struct bio *loop_get_bio(struct loop_device *lo)
 520{
 521        return bio_list_pop(&lo->lo_bio_list);
 522}
 523
 524static int loop_make_request(struct request_queue *q, struct bio *old_bio)
 525{
 526        struct loop_device *lo = q->queuedata;
 527        int rw = bio_rw(old_bio);
 528
 529        if (rw == READA)
 530                rw = READ;
 531
 532        BUG_ON(!lo || (rw != READ && rw != WRITE));
 533
 534        spin_lock_irq(&lo->lo_lock);
 535        if (lo->lo_state != Lo_bound)
 536                goto out;
 537        if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
 538                goto out;
 539        loop_add_bio(lo, old_bio);
 540        wake_up(&lo->lo_event);
 541        spin_unlock_irq(&lo->lo_lock);
 542        return 0;
 543
 544out:
 545        spin_unlock_irq(&lo->lo_lock);
 546        bio_io_error(old_bio);
 547        return 0;
 548}
 549
 550/*
 551 * kick off io on the underlying address space
 552 */
 553static void loop_unplug(struct request_queue *q)
 554{
 555        struct loop_device *lo = q->queuedata;
 556
 557        queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
 558        blk_run_address_space(lo->lo_backing_file->f_mapping);
 559}
 560
 561struct switch_request {
 562        struct file *file;
 563        struct completion wait;
 564};
 565
 566static void do_loop_switch(struct loop_device *, struct switch_request *);
 567
 568static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
 569{
 570        if (unlikely(!bio->bi_bdev)) {
 571                do_loop_switch(lo, bio->bi_private);
 572                bio_put(bio);
 573        } else {
 574                int ret = do_bio_filebacked(lo, bio);
 575                bio_endio(bio, ret);
 576        }
 577}
 578
 579/*
 580 * worker thread that handles reads/writes to file backed loop devices,
 581 * to avoid blocking in our make_request_fn. it also does loop decrypting
 582 * on reads for block backed loop, as that is too heavy to do from
 583 * b_end_io context where irqs may be disabled.
 584 *
 585 * Loop explanation:  loop_clr_fd() sets lo_state to Lo_rundown before
 586 * calling kthread_stop().  Therefore once kthread_should_stop() is
 587 * true, make_request will not place any more requests.  Therefore
 588 * once kthread_should_stop() is true and lo_bio is NULL, we are
 589 * done with the loop.
 590 */
 591static int loop_thread(void *data)
 592{
 593        struct loop_device *lo = data;
 594        struct bio *bio;
 595
 596        set_user_nice(current, -20);
 597
 598        while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
 599
 600                wait_event_interruptible(lo->lo_event,
 601                                !bio_list_empty(&lo->lo_bio_list) ||
 602                                kthread_should_stop());
 603
 604                if (bio_list_empty(&lo->lo_bio_list))
 605                        continue;
 606                spin_lock_irq(&lo->lo_lock);
 607                bio = loop_get_bio(lo);
 608                spin_unlock_irq(&lo->lo_lock);
 609
 610                BUG_ON(!bio);
 611                loop_handle_bio(lo, bio);
 612        }
 613
 614        return 0;
 615}
 616
 617/*
 618 * loop_switch performs the hard work of switching a backing store.
 619 * First it needs to flush existing IO, it does this by sending a magic
 620 * BIO down the pipe. The completion of this BIO does the actual switch.
 621 */
 622static int loop_switch(struct loop_device *lo, struct file *file)
 623{
 624        struct switch_request w;
 625        struct bio *bio = bio_alloc(GFP_KERNEL, 0);
 626        if (!bio)
 627                return -ENOMEM;
 628        init_completion(&w.wait);
 629        w.file = file;
 630        bio->bi_private = &w;
 631        bio->bi_bdev = NULL;
 632        loop_make_request(lo->lo_queue, bio);
 633        wait_for_completion(&w.wait);
 634        return 0;
 635}
 636
 637/*
 638 * Helper to flush the IOs in loop, but keeping loop thread running
 639 */
 640static int loop_flush(struct loop_device *lo)
 641{
 642        /* loop not yet configured, no running thread, nothing to flush */
 643        if (!lo->lo_thread)
 644                return 0;
 645
 646        return loop_switch(lo, NULL);
 647}
 648
 649/*
 650 * Do the actual switch; called from the BIO completion routine
 651 */
 652static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
 653{
 654        struct file *file = p->file;
 655        struct file *old_file = lo->lo_backing_file;
 656        struct address_space *mapping;
 657
 658        /* if no new file, only flush of queued bios requested */
 659        if (!file)
 660                goto out;
 661
 662        mapping = file->f_mapping;
 663        mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
 664        lo->lo_backing_file = file;
 665        lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
 666                mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
 667        lo->old_gfp_mask = mapping_gfp_mask(mapping);
 668        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 669out:
 670        complete(&p->wait);
 671}
 672
 673
 674/*
 675 * loop_change_fd switched the backing store of a loopback device to
 676 * a new file. This is useful for operating system installers to free up
 677 * the original file and in High Availability environments to switch to
 678 * an alternative location for the content in case of server meltdown.
 679 * This can only work if the loop device is used read-only, and if the
 680 * new backing store is the same size and type as the old backing store.
 681 */
 682static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 683                          unsigned int arg)
 684{
 685        struct file     *file, *old_file;
 686        struct inode    *inode;
 687        int             error;
 688
 689        error = -ENXIO;
 690        if (lo->lo_state != Lo_bound)
 691                goto out;
 692
 693        /* the loop device has to be read-only */
 694        error = -EINVAL;
 695        if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
 696                goto out;
 697
 698        error = -EBADF;
 699        file = fget(arg);
 700        if (!file)
 701                goto out;
 702
 703        inode = file->f_mapping->host;
 704        old_file = lo->lo_backing_file;
 705
 706        error = -EINVAL;
 707
 708        if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 709                goto out_putf;
 710
 711        /* size of the new backing store needs to be the same */
 712        if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
 713                goto out_putf;
 714
 715        /* and ... switch */
 716        error = loop_switch(lo, file);
 717        if (error)
 718                goto out_putf;
 719
 720        fput(old_file);
 721        if (max_part > 0)
 722                ioctl_by_bdev(bdev, BLKRRPART, 0);
 723        return 0;
 724
 725 out_putf:
 726        fput(file);
 727 out:
 728        return error;
 729}
 730
 731static inline int is_loop_device(struct file *file)
 732{
 733        struct inode *i = file->f_mapping->host;
 734
 735        return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
 736}
 737
 738static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 739                       struct block_device *bdev, unsigned int arg)
 740{
 741        struct file     *file, *f;
 742        struct inode    *inode;
 743        struct address_space *mapping;
 744        unsigned lo_blocksize;
 745        int             lo_flags = 0;
 746        int             error;
 747        loff_t          size;
 748
 749        /* This is safe, since we have a reference from open(). */
 750        __module_get(THIS_MODULE);
 751
 752        error = -EBADF;
 753        file = fget(arg);
 754        if (!file)
 755                goto out;
 756
 757        error = -EBUSY;
 758        if (lo->lo_state != Lo_unbound)
 759                goto out_putf;
 760
 761        /* Avoid recursion */
 762        f = file;
 763        while (is_loop_device(f)) {
 764                struct loop_device *l;
 765
 766                if (f->f_mapping->host->i_bdev == bdev)
 767                        goto out_putf;
 768
 769                l = f->f_mapping->host->i_bdev->bd_disk->private_data;
 770                if (l->lo_state == Lo_unbound) {
 771                        error = -EINVAL;
 772                        goto out_putf;
 773                }
 774                f = l->lo_backing_file;
 775        }
 776
 777        mapping = file->f_mapping;
 778        inode = mapping->host;
 779
 780        if (!(file->f_mode & FMODE_WRITE))
 781                lo_flags |= LO_FLAGS_READ_ONLY;
 782
 783        error = -EINVAL;
 784        if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
 785                const struct address_space_operations *aops = mapping->a_ops;
 786
 787                if (aops->write_begin)
 788                        lo_flags |= LO_FLAGS_USE_AOPS;
 789                if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
 790                        lo_flags |= LO_FLAGS_READ_ONLY;
 791
 792                lo_blocksize = S_ISBLK(inode->i_mode) ?
 793                        inode->i_bdev->bd_block_size : PAGE_SIZE;
 794
 795                error = 0;
 796        } else {
 797                goto out_putf;
 798        }
 799
 800        size = get_loop_size(lo, file);
 801
 802        if ((loff_t)(sector_t)size != size) {
 803                error = -EFBIG;
 804                goto out_putf;
 805        }
 806
 807        if (!(mode & FMODE_WRITE))
 808                lo_flags |= LO_FLAGS_READ_ONLY;
 809
 810        set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
 811
 812        lo->lo_blocksize = lo_blocksize;
 813        lo->lo_device = bdev;
 814        lo->lo_flags = lo_flags;
 815        lo->lo_backing_file = file;
 816        lo->transfer = transfer_none;
 817        lo->ioctl = NULL;
 818        lo->lo_sizelimit = 0;
 819        lo->old_gfp_mask = mapping_gfp_mask(mapping);
 820        mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
 821
 822        bio_list_init(&lo->lo_bio_list);
 823
 824        /*
 825         * set queue make_request_fn, and add limits based on lower level
 826         * device
 827         */
 828        blk_queue_make_request(lo->lo_queue, loop_make_request);
 829        lo->lo_queue->queuedata = lo;
 830        lo->lo_queue->unplug_fn = loop_unplug;
 831
 832        if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
 833                blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN, NULL);
 834
 835        set_capacity(lo->lo_disk, size);
 836        bd_set_size(bdev, size << 9);
 837
 838        set_blocksize(bdev, lo_blocksize);
 839
 840        lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
 841                                                lo->lo_number);
 842        if (IS_ERR(lo->lo_thread)) {
 843                error = PTR_ERR(lo->lo_thread);
 844                goto out_clr;
 845        }
 846        lo->lo_state = Lo_bound;
 847        wake_up_process(lo->lo_thread);
 848        if (max_part > 0)
 849                ioctl_by_bdev(bdev, BLKRRPART, 0);
 850        return 0;
 851
 852out_clr:
 853        lo->lo_thread = NULL;
 854        lo->lo_device = NULL;
 855        lo->lo_backing_file = NULL;
 856        lo->lo_flags = 0;
 857        set_capacity(lo->lo_disk, 0);
 858        invalidate_bdev(bdev);
 859        bd_set_size(bdev, 0);
 860        mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
 861        lo->lo_state = Lo_unbound;
 862 out_putf:
 863        fput(file);
 864 out:
 865        /* This is safe: open() is still holding a reference. */
 866        module_put(THIS_MODULE);
 867        return error;
 868}
 869
 870static int
 871loop_release_xfer(struct loop_device *lo)
 872{
 873        int err = 0;
 874        struct loop_func_table *xfer = lo->lo_encryption;
 875
 876        if (xfer) {
 877                if (xfer->release)
 878                        err = xfer->release(lo);
 879                lo->transfer = NULL;
 880                lo->lo_encryption = NULL;
 881                module_put(xfer->owner);
 882        }
 883        return err;
 884}
 885
 886static int
 887loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
 888               const struct loop_info64 *i)
 889{
 890        int err = 0;
 891
 892        if (xfer) {
 893                struct module *owner = xfer->owner;
 894
 895                if (!try_module_get(owner))
 896                        return -EINVAL;
 897                if (xfer->init)
 898                        err = xfer->init(lo, i);
 899                if (err)
 900                        module_put(owner);
 901                else
 902                        lo->lo_encryption = xfer;
 903        }
 904        return err;
 905}
 906
 907static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 908{
 909        struct file *filp = lo->lo_backing_file;
 910        gfp_t gfp = lo->old_gfp_mask;
 911
 912        if (lo->lo_state != Lo_bound)
 913                return -ENXIO;
 914
 915        if (lo->lo_refcnt > 1)  /* we needed one fd for the ioctl */
 916                return -EBUSY;
 917
 918        if (filp == NULL)
 919                return -EINVAL;
 920
 921        spin_lock_irq(&lo->lo_lock);
 922        lo->lo_state = Lo_rundown;
 923        spin_unlock_irq(&lo->lo_lock);
 924
 925        kthread_stop(lo->lo_thread);
 926
 927        lo->lo_queue->unplug_fn = NULL;
 928        lo->lo_backing_file = NULL;
 929
 930        loop_release_xfer(lo);
 931        lo->transfer = NULL;
 932        lo->ioctl = NULL;
 933        lo->lo_device = NULL;
 934        lo->lo_encryption = NULL;
 935        lo->lo_offset = 0;
 936        lo->lo_sizelimit = 0;
 937        lo->lo_encrypt_key_size = 0;
 938        lo->lo_flags = 0;
 939        lo->lo_thread = NULL;
 940        memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
 941        memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
 942        memset(lo->lo_file_name, 0, LO_NAME_SIZE);
 943        if (bdev)
 944                invalidate_bdev(bdev);
 945        set_capacity(lo->lo_disk, 0);
 946        if (bdev)
 947                bd_set_size(bdev, 0);
 948        mapping_set_gfp_mask(filp->f_mapping, gfp);
 949        lo->lo_state = Lo_unbound;
 950        /* This is safe: open() is still holding a reference. */
 951        module_put(THIS_MODULE);
 952        if (max_part > 0 && bdev)
 953                ioctl_by_bdev(bdev, BLKRRPART, 0);
 954        mutex_unlock(&lo->lo_ctl_mutex);
 955        /*
 956         * Need not hold lo_ctl_mutex to fput backing file.
 957         * Calling fput holding lo_ctl_mutex triggers a circular
 958         * lock dependency possibility warning as fput can take
 959         * bd_mutex which is usually taken before lo_ctl_mutex.
 960         */
 961        fput(filp);
 962        return 0;
 963}
 964
 965static int
 966loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
 967{
 968        int err;
 969        struct loop_func_table *xfer;
 970        uid_t uid = current_uid();
 971
 972        if (lo->lo_encrypt_key_size &&
 973            lo->lo_key_owner != uid &&
 974            !capable(CAP_SYS_ADMIN))
 975                return -EPERM;
 976        if (lo->lo_state != Lo_bound)
 977                return -ENXIO;
 978        if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
 979                return -EINVAL;
 980
 981        err = loop_release_xfer(lo);
 982        if (err)
 983                return err;
 984
 985        if (info->lo_encrypt_type) {
 986                unsigned int type = info->lo_encrypt_type;
 987
 988                if (type >= MAX_LO_CRYPT)
 989                        return -EINVAL;
 990                xfer = xfer_funcs[type];
 991                if (xfer == NULL)
 992                        return -EINVAL;
 993        } else
 994                xfer = NULL;
 995
 996        err = loop_init_xfer(lo, xfer, info);
 997        if (err)
 998                return err;
 999
1000        if (lo->lo_offset != info->lo_offset ||
1001            lo->lo_sizelimit != info->lo_sizelimit) {
1002                lo->lo_offset = info->lo_offset;
1003                lo->lo_sizelimit = info->lo_sizelimit;
1004                if (figure_loop_size(lo))
1005                        return -EFBIG;
1006        }
1007
1008        memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1009        memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1010        lo->lo_file_name[LO_NAME_SIZE-1] = 0;
1011        lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
1012
1013        if (!xfer)
1014                xfer = &none_funcs;
1015        lo->transfer = xfer->transfer;
1016        lo->ioctl = xfer->ioctl;
1017
1018        if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
1019             (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1020                lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1021
1022        lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1023        lo->lo_init[0] = info->lo_init[0];
1024        lo->lo_init[1] = info->lo_init[1];
1025        if (info->lo_encrypt_key_size) {
1026                memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1027                       info->lo_encrypt_key_size);
1028                lo->lo_key_owner = uid;
1029        }       
1030
1031        return 0;
1032}
1033
1034static int
1035loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1036{
1037        struct file *file = lo->lo_backing_file;
1038        struct kstat stat;
1039        int error;
1040
1041        if (lo->lo_state != Lo_bound)
1042                return -ENXIO;
1043        error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
1044        if (error)
1045                return error;
1046        memset(info, 0, sizeof(*info));
1047        info->lo_number = lo->lo_number;
1048        info->lo_device = huge_encode_dev(stat.dev);
1049        info->lo_inode = stat.ino;
1050        info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1051        info->lo_offset = lo->lo_offset;
1052        info->lo_sizelimit = lo->lo_sizelimit;
1053        info->lo_flags = lo->lo_flags;
1054        memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1055        memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1056        info->lo_encrypt_type =
1057                lo->lo_encryption ? lo->lo_encryption->number : 0;
1058        if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1059                info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1060                memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1061                       lo->lo_encrypt_key_size);
1062        }
1063        return 0;
1064}
1065
1066static void
1067loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1068{
1069        memset(info64, 0, sizeof(*info64));
1070        info64->lo_number = info->lo_number;
1071        info64->lo_device = info->lo_device;
1072        info64->lo_inode = info->lo_inode;
1073        info64->lo_rdevice = info->lo_rdevice;
1074        info64->lo_offset = info->lo_offset;
1075        info64->lo_sizelimit = 0;
1076        info64->lo_encrypt_type = info->lo_encrypt_type;
1077        info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1078        info64->lo_flags = info->lo_flags;
1079        info64->lo_init[0] = info->lo_init[0];
1080        info64->lo_init[1] = info->lo_init[1];
1081        if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1082                memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1083        else
1084                memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1085        memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1086}
1087
1088static int
1089loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1090{
1091        memset(info, 0, sizeof(*info));
1092        info->lo_number = info64->lo_number;
1093        info->lo_device = info64->lo_device;
1094        info->lo_inode = info64->lo_inode;
1095        info->lo_rdevice = info64->lo_rdevice;
1096        info->lo_offset = info64->lo_offset;
1097        info->lo_encrypt_type = info64->lo_encrypt_type;
1098        info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1099        info->lo_flags = info64->lo_flags;
1100        info->lo_init[0] = info64->lo_init[0];
1101        info->lo_init[1] = info64->lo_init[1];
1102        if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1103                memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1104        else
1105                memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1106        memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1107
1108        /* error in case values were truncated */
1109        if (info->lo_device != info64->lo_device ||
1110            info->lo_rdevice != info64->lo_rdevice ||
1111            info->lo_inode != info64->lo_inode ||
1112            info->lo_offset != info64->lo_offset)
1113                return -EOVERFLOW;
1114
1115        return 0;
1116}
1117
1118static int
1119loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1120{
1121        struct loop_info info;
1122        struct loop_info64 info64;
1123
1124        if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1125                return -EFAULT;
1126        loop_info64_from_old(&info, &info64);
1127        return loop_set_status(lo, &info64);
1128}
1129
1130static int
1131loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1132{
1133        struct loop_info64 info64;
1134
1135        if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1136                return -EFAULT;
1137        return loop_set_status(lo, &info64);
1138}
1139
1140static int
1141loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1142        struct loop_info info;
1143        struct loop_info64 info64;
1144        int err = 0;
1145
1146        if (!arg)
1147                err = -EINVAL;
1148        if (!err)
1149                err = loop_get_status(lo, &info64);
1150        if (!err)
1151                err = loop_info64_to_old(&info64, &info);
1152        if (!err && copy_to_user(arg, &info, sizeof(info)))
1153                err = -EFAULT;
1154
1155        return err;
1156}
1157
1158static int
1159loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1160        struct loop_info64 info64;
1161        int err = 0;
1162
1163        if (!arg)
1164                err = -EINVAL;
1165        if (!err)
1166                err = loop_get_status(lo, &info64);
1167        if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1168                err = -EFAULT;
1169
1170        return err;
1171}
1172
1173static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
1174{
1175        int err;
1176        sector_t sec;
1177        loff_t sz;
1178
1179        err = -ENXIO;
1180        if (unlikely(lo->lo_state != Lo_bound))
1181                goto out;
1182        err = figure_loop_size(lo);
1183        if (unlikely(err))
1184                goto out;
1185        sec = get_capacity(lo->lo_disk);
1186        /* the width of sector_t may be narrow for bit-shift */
1187        sz = sec;
1188        sz <<= 9;
1189        mutex_lock(&bdev->bd_mutex);
1190        bd_set_size(bdev, sz);
1191        mutex_unlock(&bdev->bd_mutex);
1192
1193 out:
1194        return err;
1195}
1196
1197static int lo_ioctl(struct block_device *bdev, fmode_t mode,
1198        unsigned int cmd, unsigned long arg)
1199{
1200        struct loop_device *lo = bdev->bd_disk->private_data;
1201        int err;
1202
1203        mutex_lock_nested(&lo->lo_ctl_mutex, 1);
1204        switch (cmd) {
1205        case LOOP_SET_FD:
1206                err = loop_set_fd(lo, mode, bdev, arg);
1207                break;
1208        case LOOP_CHANGE_FD:
1209                err = loop_change_fd(lo, bdev, arg);
1210                break;
1211        case LOOP_CLR_FD:
1212                /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
1213                err = loop_clr_fd(lo, bdev);
1214                if (!err)
1215                        goto out_unlocked;
1216                break;
1217        case LOOP_SET_STATUS:
1218                err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1219                break;
1220        case LOOP_GET_STATUS:
1221                err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1222                break;
1223        case LOOP_SET_STATUS64:
1224                err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1225                break;
1226        case LOOP_GET_STATUS64:
1227                err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1228                break;
1229        case LOOP_SET_CAPACITY:
1230                err = -EPERM;
1231                if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1232                        err = loop_set_capacity(lo, bdev);
1233                break;
1234        default:
1235                err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1236        }
1237        mutex_unlock(&lo->lo_ctl_mutex);
1238
1239out_unlocked:
1240        return err;
1241}
1242
1243#ifdef CONFIG_COMPAT
1244struct compat_loop_info {
1245        compat_int_t    lo_number;      /* ioctl r/o */
1246        compat_dev_t    lo_device;      /* ioctl r/o */
1247        compat_ulong_t  lo_inode;       /* ioctl r/o */
1248        compat_dev_t    lo_rdevice;     /* ioctl r/o */
1249        compat_int_t    lo_offset;
1250        compat_int_t    lo_encrypt_type;
1251        compat_int_t    lo_encrypt_key_size;    /* ioctl w/o */
1252        compat_int_t    lo_flags;       /* ioctl r/o */
1253        char            lo_name[LO_NAME_SIZE];
1254        unsigned char   lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1255        compat_ulong_t  lo_init[2];
1256        char            reserved[4];
1257};
1258
1259/*
1260 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1261 * - noinlined to reduce stack space usage in main part of driver
1262 */
1263static noinline int
1264loop_info64_from_compat(const struct compat_loop_info __user *arg,
1265                        struct loop_info64 *info64)
1266{
1267        struct compat_loop_info info;
1268
1269        if (copy_from_user(&info, arg, sizeof(info)))
1270                return -EFAULT;
1271
1272        memset(info64, 0, sizeof(*info64));
1273        info64->lo_number = info.lo_number;
1274        info64->lo_device = info.lo_device;
1275        info64->lo_inode = info.lo_inode;
1276        info64->lo_rdevice = info.lo_rdevice;
1277        info64->lo_offset = info.lo_offset;
1278        info64->lo_sizelimit = 0;
1279        info64->lo_encrypt_type = info.lo_encrypt_type;
1280        info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1281        info64->lo_flags = info.lo_flags;
1282        info64->lo_init[0] = info.lo_init[0];
1283        info64->lo_init[1] = info.lo_init[1];
1284        if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1285                memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1286        else
1287                memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1288        memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1289        return 0;
1290}
1291
1292/*
1293 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1294 * - noinlined to reduce stack space usage in main part of driver
1295 */
1296static noinline int
1297loop_info64_to_compat(const struct loop_info64 *info64,
1298                      struct compat_loop_info __user *arg)
1299{
1300        struct compat_loop_info info;
1301
1302        memset(&info, 0, sizeof(info));
1303        info.lo_number = info64->lo_number;
1304        info.lo_device = info64->lo_device;
1305        info.lo_inode = info64->lo_inode;
1306        info.lo_rdevice = info64->lo_rdevice;
1307        info.lo_offset = info64->lo_offset;
1308        info.lo_encrypt_type = info64->lo_encrypt_type;
1309        info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1310        info.lo_flags = info64->lo_flags;
1311        info.lo_init[0] = info64->lo_init[0];
1312        info.lo_init[1] = info64->lo_init[1];
1313        if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1314                memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1315        else
1316                memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1317        memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1318
1319        /* error in case values were truncated */
1320        if (info.lo_device != info64->lo_device ||
1321            info.lo_rdevice != info64->lo_rdevice ||
1322            info.lo_inode != info64->lo_inode ||
1323            info.lo_offset != info64->lo_offset ||
1324            info.lo_init[0] != info64->lo_init[0] ||
1325            info.lo_init[1] != info64->lo_init[1])
1326                return -EOVERFLOW;
1327
1328        if (copy_to_user(arg, &info, sizeof(info)))
1329                return -EFAULT;
1330        return 0;
1331}
1332
1333static int
1334loop_set_status_compat(struct loop_device *lo,
1335                       const struct compat_loop_info __user *arg)
1336{
1337        struct loop_info64 info64;
1338        int ret;
1339
1340        ret = loop_info64_from_compat(arg, &info64);
1341        if (ret < 0)
1342                return ret;
1343        return loop_set_status(lo, &info64);
1344}
1345
1346static int
1347loop_get_status_compat(struct loop_device *lo,
1348                       struct compat_loop_info __user *arg)
1349{
1350        struct loop_info64 info64;
1351        int err = 0;
1352
1353        if (!arg)
1354                err = -EINVAL;
1355        if (!err)
1356                err = loop_get_status(lo, &info64);
1357        if (!err)
1358                err = loop_info64_to_compat(&info64, arg);
1359        return err;
1360}
1361
1362static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1363                           unsigned int cmd, unsigned long arg)
1364{
1365        struct loop_device *lo = bdev->bd_disk->private_data;
1366        int err;
1367
1368        switch(cmd) {
1369        case LOOP_SET_STATUS:
1370                mutex_lock(&lo->lo_ctl_mutex);
1371                err = loop_set_status_compat(
1372                        lo, (const struct compat_loop_info __user *) arg);
1373                mutex_unlock(&lo->lo_ctl_mutex);
1374                break;
1375        case LOOP_GET_STATUS:
1376                mutex_lock(&lo->lo_ctl_mutex);
1377                err = loop_get_status_compat(
1378                        lo, (struct compat_loop_info __user *) arg);
1379                mutex_unlock(&lo->lo_ctl_mutex);
1380                break;
1381        case LOOP_SET_CAPACITY:
1382        case LOOP_CLR_FD:
1383        case LOOP_GET_STATUS64:
1384        case LOOP_SET_STATUS64:
1385                arg = (unsigned long) compat_ptr(arg);
1386        case LOOP_SET_FD:
1387        case LOOP_CHANGE_FD:
1388                err = lo_ioctl(bdev, mode, cmd, arg);
1389                break;
1390        default:
1391                err = -ENOIOCTLCMD;
1392                break;
1393        }
1394        return err;
1395}
1396#endif
1397
1398static int lo_open(struct block_device *bdev, fmode_t mode)
1399{
1400        struct loop_device *lo = bdev->bd_disk->private_data;
1401
1402        mutex_lock(&lo->lo_ctl_mutex);
1403        lo->lo_refcnt++;
1404        mutex_unlock(&lo->lo_ctl_mutex);
1405
1406        return 0;
1407}
1408
1409static int lo_release(struct gendisk *disk, fmode_t mode)
1410{
1411        struct loop_device *lo = disk->private_data;
1412        int err;
1413
1414        mutex_lock(&lo->lo_ctl_mutex);
1415
1416        if (--lo->lo_refcnt)
1417                goto out;
1418
1419        if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1420                /*
1421                 * In autoclear mode, stop the loop thread
1422                 * and remove configuration after last close.
1423                 */
1424                err = loop_clr_fd(lo, NULL);
1425                if (!err)
1426                        goto out_unlocked;
1427        } else {
1428                /*
1429                 * Otherwise keep thread (if running) and config,
1430                 * but flush possible ongoing bios in thread.
1431                 */
1432                loop_flush(lo);
1433        }
1434
1435out:
1436        mutex_unlock(&lo->lo_ctl_mutex);
1437out_unlocked:
1438        return 0;
1439}
1440
1441static const struct block_device_operations lo_fops = {
1442        .owner =        THIS_MODULE,
1443        .open =         lo_open,
1444        .release =      lo_release,
1445        .ioctl =        lo_ioctl,
1446#ifdef CONFIG_COMPAT
1447        .compat_ioctl = lo_compat_ioctl,
1448#endif
1449};
1450
1451/*
1452 * And now the modules code and kernel interface.
1453 */
1454static int max_loop;
1455module_param(max_loop, int, 0);
1456MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
1457module_param(max_part, int, 0);
1458MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
1459MODULE_LICENSE("GPL");
1460MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1461
1462int loop_register_transfer(struct loop_func_table *funcs)
1463{
1464        unsigned int n = funcs->number;
1465
1466        if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1467                return -EINVAL;
1468        xfer_funcs[n] = funcs;
1469        return 0;
1470}
1471
1472int loop_unregister_transfer(int number)
1473{
1474        unsigned int n = number;
1475        struct loop_device *lo;
1476        struct loop_func_table *xfer;
1477
1478        if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1479                return -EINVAL;
1480
1481        xfer_funcs[n] = NULL;
1482
1483        list_for_each_entry(lo, &loop_devices, lo_list) {
1484                mutex_lock(&lo->lo_ctl_mutex);
1485
1486                if (lo->lo_encryption == xfer)
1487                        loop_release_xfer(lo);
1488
1489                mutex_unlock(&lo->lo_ctl_mutex);
1490        }
1491
1492        return 0;
1493}
1494
1495EXPORT_SYMBOL(loop_register_transfer);
1496EXPORT_SYMBOL(loop_unregister_transfer);
1497
1498static struct loop_device *loop_alloc(int i)
1499{
1500        struct loop_device *lo;
1501        struct gendisk *disk;
1502
1503        lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1504        if (!lo)
1505                goto out;
1506
1507        lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1508        if (!lo->lo_queue)
1509                goto out_free_dev;
1510
1511        disk = lo->lo_disk = alloc_disk(1 << part_shift);
1512        if (!disk)
1513                goto out_free_queue;
1514
1515        mutex_init(&lo->lo_ctl_mutex);
1516        lo->lo_number           = i;
1517        lo->lo_thread           = NULL;
1518        init_waitqueue_head(&lo->lo_event);
1519        spin_lock_init(&lo->lo_lock);
1520        disk->major             = LOOP_MAJOR;
1521        disk->first_minor       = i << part_shift;
1522        disk->fops              = &lo_fops;
1523        disk->private_data      = lo;
1524        disk->queue             = lo->lo_queue;
1525        sprintf(disk->disk_name, "loop%d", i);
1526        return lo;
1527
1528out_free_queue:
1529        blk_cleanup_queue(lo->lo_queue);
1530out_free_dev:
1531        kfree(lo);
1532out:
1533        return NULL;
1534}
1535
1536static void loop_free(struct loop_device *lo)
1537{
1538        blk_cleanup_queue(lo->lo_queue);
1539        put_disk(lo->lo_disk);
1540        list_del(&lo->lo_list);
1541        kfree(lo);
1542}
1543
1544static struct loop_device *loop_init_one(int i)
1545{
1546        struct loop_device *lo;
1547
1548        list_for_each_entry(lo, &loop_devices, lo_list) {
1549                if (lo->lo_number == i)
1550                        return lo;
1551        }
1552
1553        lo = loop_alloc(i);
1554        if (lo) {
1555                add_disk(lo->lo_disk);
1556                list_add_tail(&lo->lo_list, &loop_devices);
1557        }
1558        return lo;
1559}
1560
1561static void loop_del_one(struct loop_device *lo)
1562{
1563        del_gendisk(lo->lo_disk);
1564        loop_free(lo);
1565}
1566
1567static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1568{
1569        struct loop_device *lo;
1570        struct kobject *kobj;
1571
1572        mutex_lock(&loop_devices_mutex);
1573        lo = loop_init_one(dev & MINORMASK);
1574        kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
1575        mutex_unlock(&loop_devices_mutex);
1576
1577        *part = 0;
1578        return kobj;
1579}
1580
1581static int __init loop_init(void)
1582{
1583        int i, nr;
1584        unsigned long range;
1585        struct loop_device *lo, *next;
1586
1587        /*
1588         * loop module now has a feature to instantiate underlying device
1589         * structure on-demand, provided that there is an access dev node.
1590         * However, this will not work well with user space tool that doesn't
1591         * know about such "feature".  In order to not break any existing
1592         * tool, we do the following:
1593         *
1594         * (1) if max_loop is specified, create that many upfront, and this
1595         *     also becomes a hard limit.
1596         * (2) if max_loop is not specified, create 8 loop device on module
1597         *     load, user can further extend loop device by create dev node
1598         *     themselves and have kernel automatically instantiate actual
1599         *     device on-demand.
1600         */
1601
1602        part_shift = 0;
1603        if (max_part > 0)
1604                part_shift = fls(max_part);
1605
1606        if (max_loop > 1UL << (MINORBITS - part_shift))
1607                return -EINVAL;
1608
1609        if (max_loop) {
1610                nr = max_loop;
1611                range = max_loop;
1612        } else {
1613                nr = 8;
1614                range = 1UL << (MINORBITS - part_shift);
1615        }
1616
1617        if (register_blkdev(LOOP_MAJOR, "loop"))
1618                return -EIO;
1619
1620        for (i = 0; i < nr; i++) {
1621                lo = loop_alloc(i);
1622                if (!lo)
1623                        goto Enomem;
1624                list_add_tail(&lo->lo_list, &loop_devices);
1625        }
1626
1627        /* point of no return */
1628
1629        list_for_each_entry(lo, &loop_devices, lo_list)
1630                add_disk(lo->lo_disk);
1631
1632        blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1633                                  THIS_MODULE, loop_probe, NULL, NULL);
1634
1635        printk(KERN_INFO "loop: module loaded\n");
1636        return 0;
1637
1638Enomem:
1639        printk(KERN_INFO "loop: out of memory\n");
1640
1641        list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1642                loop_free(lo);
1643
1644        unregister_blkdev(LOOP_MAJOR, "loop");
1645        return -ENOMEM;
1646}
1647
1648static void __exit loop_exit(void)
1649{
1650        unsigned long range;
1651        struct loop_device *lo, *next;
1652
1653        range = max_loop ? max_loop :  1UL << (MINORBITS - part_shift);
1654
1655        list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1656                loop_del_one(lo);
1657
1658        blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
1659        unregister_blkdev(LOOP_MAJOR, "loop");
1660}
1661
1662module_init(loop_init);
1663module_exit(loop_exit);
1664
1665#ifndef MODULE
1666static int __init max_loop_setup(char *str)
1667{
1668        max_loop = simple_strtol(str, NULL, 0);
1669        return 1;
1670}
1671
1672__setup("max_loop=", max_loop_setup);
1673#endif
1674