linux/arch/um/drivers/ubd_kern.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2018 Cambridge Greys Ltd
   3 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   4 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   5 * Licensed under the GPL
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/cdrom.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/ctype.h>
  34#include <linux/slab.h>
  35#include <linux/vmalloc.h>
  36#include <linux/platform_device.h>
  37#include <linux/scatterlist.h>
  38#include <asm/tlbflush.h>
  39#include <kern_util.h>
  40#include "mconsole_kern.h"
  41#include <init.h>
  42#include <irq_kern.h>
  43#include "ubd.h"
  44#include <os.h>
  45#include "cow.h"
  46
  47/* Max request size is determined by sector mask - 32K */
  48#define UBD_MAX_REQUEST (8 * sizeof(long))
  49
  50struct io_thread_req {
  51        struct request *req;
  52        int fds[2];
  53        unsigned long offsets[2];
  54        unsigned long long offset;
  55        unsigned long length;
  56        char *buffer;
  57        int sectorsize;
  58        unsigned long sector_mask;
  59        unsigned long long cow_offset;
  60        unsigned long bitmap_words[2];
  61        int error;
  62};
  63
  64
  65static struct io_thread_req * (*irq_req_buffer)[];
  66static struct io_thread_req *irq_remainder;
  67static int irq_remainder_size;
  68
  69static struct io_thread_req * (*io_req_buffer)[];
  70static struct io_thread_req *io_remainder;
  71static int io_remainder_size;
  72
  73
  74
  75static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  76{
  77        __u64 n;
  78        int bits, off;
  79
  80        bits = sizeof(data[0]) * 8;
  81        n = bit / bits;
  82        off = bit % bits;
  83        return (data[n] & (1 << off)) != 0;
  84}
  85
  86static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  87{
  88        __u64 n;
  89        int bits, off;
  90
  91        bits = sizeof(data[0]) * 8;
  92        n = bit / bits;
  93        off = bit % bits;
  94        data[n] |= (1 << off);
  95}
  96/*End stuff from ubd_user.h*/
  97
  98#define DRIVER_NAME "uml-blkdev"
  99
 100static DEFINE_MUTEX(ubd_lock);
 101static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 102
 103static int ubd_open(struct block_device *bdev, fmode_t mode);
 104static void ubd_release(struct gendisk *disk, fmode_t mode);
 105static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 106                     unsigned int cmd, unsigned long arg);
 107static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 108
 109#define MAX_DEV (16)
 110
 111static const struct block_device_operations ubd_blops = {
 112        .owner          = THIS_MODULE,
 113        .open           = ubd_open,
 114        .release        = ubd_release,
 115        .ioctl          = ubd_ioctl,
 116        .getgeo         = ubd_getgeo,
 117};
 118
 119/* Protected by ubd_lock */
 120static int fake_major = UBD_MAJOR;
 121static struct gendisk *ubd_gendisk[MAX_DEV];
 122static struct gendisk *fake_gendisk[MAX_DEV];
 123
 124#ifdef CONFIG_BLK_DEV_UBD_SYNC
 125#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 126                                         .cl = 1 })
 127#else
 128#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 129                                         .cl = 1 })
 130#endif
 131static struct openflags global_openflags = OPEN_FLAGS;
 132
 133struct cow {
 134        /* backing file name */
 135        char *file;
 136        /* backing file fd */
 137        int fd;
 138        unsigned long *bitmap;
 139        unsigned long bitmap_len;
 140        int bitmap_offset;
 141        int data_offset;
 142};
 143
 144#define MAX_SG 64
 145
 146struct ubd {
 147        /* name (and fd, below) of the file opened for writing, either the
 148         * backing or the cow file. */
 149        char *file;
 150        int count;
 151        int fd;
 152        __u64 size;
 153        struct openflags boot_openflags;
 154        struct openflags openflags;
 155        unsigned shared:1;
 156        unsigned no_cow:1;
 157        unsigned no_trim:1;
 158        struct cow cow;
 159        struct platform_device pdev;
 160        struct request_queue *queue;
 161        struct blk_mq_tag_set tag_set;
 162        spinlock_t lock;
 163};
 164
 165#define DEFAULT_COW { \
 166        .file =                 NULL, \
 167        .fd =                   -1,     \
 168        .bitmap =               NULL, \
 169        .bitmap_offset =        0, \
 170        .data_offset =          0, \
 171}
 172
 173#define DEFAULT_UBD { \
 174        .file =                 NULL, \
 175        .count =                0, \
 176        .fd =                   -1, \
 177        .size =                 -1, \
 178        .boot_openflags =       OPEN_FLAGS, \
 179        .openflags =            OPEN_FLAGS, \
 180        .no_cow =               0, \
 181        .no_trim =              0, \
 182        .shared =               0, \
 183        .cow =                  DEFAULT_COW, \
 184        .lock =                 __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 185}
 186
 187/* Protected by ubd_lock */
 188static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 189
 190/* Only changed by fake_ide_setup which is a setup */
 191static int fake_ide = 0;
 192static struct proc_dir_entry *proc_ide_root = NULL;
 193static struct proc_dir_entry *proc_ide = NULL;
 194
 195static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 196                                 const struct blk_mq_queue_data *bd);
 197
 198static void make_proc_ide(void)
 199{
 200        proc_ide_root = proc_mkdir("ide", NULL);
 201        proc_ide = proc_mkdir("ide0", proc_ide_root);
 202}
 203
 204static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 205{
 206        seq_puts(m, "disk\n");
 207        return 0;
 208}
 209
 210static void make_ide_entries(const char *dev_name)
 211{
 212        struct proc_dir_entry *dir, *ent;
 213        char name[64];
 214
 215        if(proc_ide_root == NULL) make_proc_ide();
 216
 217        dir = proc_mkdir(dev_name, proc_ide);
 218        if(!dir) return;
 219
 220        ent = proc_create_single("media", S_IRUGO, dir,
 221                        fake_ide_media_proc_show);
 222        if(!ent) return;
 223        snprintf(name, sizeof(name), "ide0/%s", dev_name);
 224        proc_symlink(dev_name, proc_ide_root, name);
 225}
 226
 227static int fake_ide_setup(char *str)
 228{
 229        fake_ide = 1;
 230        return 1;
 231}
 232
 233__setup("fake_ide", fake_ide_setup);
 234
 235__uml_help(fake_ide_setup,
 236"fake_ide\n"
 237"    Create ide0 entries that map onto ubd devices.\n\n"
 238);
 239
 240static int parse_unit(char **ptr)
 241{
 242        char *str = *ptr, *end;
 243        int n = -1;
 244
 245        if(isdigit(*str)) {
 246                n = simple_strtoul(str, &end, 0);
 247                if(end == str)
 248                        return -1;
 249                *ptr = end;
 250        }
 251        else if (('a' <= *str) && (*str <= 'z')) {
 252                n = *str - 'a';
 253                str++;
 254                *ptr = str;
 255        }
 256        return n;
 257}
 258
 259/* If *index_out == -1 at exit, the passed option was a general one;
 260 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 261 * should not be freed on exit.
 262 */
 263static int ubd_setup_common(char *str, int *index_out, char **error_out)
 264{
 265        struct ubd *ubd_dev;
 266        struct openflags flags = global_openflags;
 267        char *backing_file;
 268        int n, err = 0, i;
 269
 270        if(index_out) *index_out = -1;
 271        n = *str;
 272        if(n == '='){
 273                char *end;
 274                int major;
 275
 276                str++;
 277                if(!strcmp(str, "sync")){
 278                        global_openflags = of_sync(global_openflags);
 279                        return err;
 280                }
 281
 282                err = -EINVAL;
 283                major = simple_strtoul(str, &end, 0);
 284                if((*end != '\0') || (end == str)){
 285                        *error_out = "Didn't parse major number";
 286                        return err;
 287                }
 288
 289                mutex_lock(&ubd_lock);
 290                if (fake_major != UBD_MAJOR) {
 291                        *error_out = "Can't assign a fake major twice";
 292                        goto out1;
 293                }
 294
 295                fake_major = major;
 296
 297                printk(KERN_INFO "Setting extra ubd major number to %d\n",
 298                       major);
 299                err = 0;
 300        out1:
 301                mutex_unlock(&ubd_lock);
 302                return err;
 303        }
 304
 305        n = parse_unit(&str);
 306        if(n < 0){
 307                *error_out = "Couldn't parse device number";
 308                return -EINVAL;
 309        }
 310        if(n >= MAX_DEV){
 311                *error_out = "Device number out of range";
 312                return 1;
 313        }
 314
 315        err = -EBUSY;
 316        mutex_lock(&ubd_lock);
 317
 318        ubd_dev = &ubd_devs[n];
 319        if(ubd_dev->file != NULL){
 320                *error_out = "Device is already configured";
 321                goto out;
 322        }
 323
 324        if (index_out)
 325                *index_out = n;
 326
 327        err = -EINVAL;
 328        for (i = 0; i < sizeof("rscdt="); i++) {
 329                switch (*str) {
 330                case 'r':
 331                        flags.w = 0;
 332                        break;
 333                case 's':
 334                        flags.s = 1;
 335                        break;
 336                case 'd':
 337                        ubd_dev->no_cow = 1;
 338                        break;
 339                case 'c':
 340                        ubd_dev->shared = 1;
 341                        break;
 342                case 't':
 343                        ubd_dev->no_trim = 1;
 344                        break;
 345                case '=':
 346                        str++;
 347                        goto break_loop;
 348                default:
 349                        *error_out = "Expected '=' or flag letter "
 350                                "(r, s, c, t or d)";
 351                        goto out;
 352                }
 353                str++;
 354        }
 355
 356        if (*str == '=')
 357                *error_out = "Too many flags specified";
 358        else
 359                *error_out = "Missing '='";
 360        goto out;
 361
 362break_loop:
 363        backing_file = strchr(str, ',');
 364
 365        if (backing_file == NULL)
 366                backing_file = strchr(str, ':');
 367
 368        if(backing_file != NULL){
 369                if(ubd_dev->no_cow){
 370                        *error_out = "Can't specify both 'd' and a cow file";
 371                        goto out;
 372                }
 373                else {
 374                        *backing_file = '\0';
 375                        backing_file++;
 376                }
 377        }
 378        err = 0;
 379        ubd_dev->file = str;
 380        ubd_dev->cow.file = backing_file;
 381        ubd_dev->boot_openflags = flags;
 382out:
 383        mutex_unlock(&ubd_lock);
 384        return err;
 385}
 386
 387static int ubd_setup(char *str)
 388{
 389        char *error;
 390        int err;
 391
 392        err = ubd_setup_common(str, NULL, &error);
 393        if(err)
 394                printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 395                       "%s\n", str, error);
 396        return 1;
 397}
 398
 399__setup("ubd", ubd_setup);
 400__uml_help(ubd_setup,
 401"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 402"    This is used to associate a device with a file in the underlying\n"
 403"    filesystem. When specifying two filenames, the first one is the\n"
 404"    COW name and the second is the backing file name. As separator you can\n"
 405"    use either a ':' or a ',': the first one allows writing things like;\n"
 406"       ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 407"    while with a ',' the shell would not expand the 2nd '~'.\n"
 408"    When using only one filename, UML will detect whether to treat it like\n"
 409"    a COW file or a backing file. To override this detection, add the 'd'\n"
 410"    flag:\n"
 411"       ubd0d=BackingFile\n"
 412"    Usually, there is a filesystem in the file, but \n"
 413"    that's not required. Swap devices containing swap files can be\n"
 414"    specified like this. Also, a file which doesn't contain a\n"
 415"    filesystem can have its contents read in the virtual \n"
 416"    machine by running 'dd' on the device. <n> must be in the range\n"
 417"    0 to 7. Appending an 'r' to the number will cause that device\n"
 418"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 419"    an 's' will cause data to be written to disk on the host immediately.\n"
 420"    'c' will cause the device to be treated as being shared between multiple\n"
 421"    UMLs and file locking will be turned off - this is appropriate for a\n"
 422"    cluster filesystem and inappropriate at almost all other times.\n\n"
 423"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 424);
 425
 426static int udb_setup(char *str)
 427{
 428        printk("udb%s specified on command line is almost certainly a ubd -> "
 429               "udb TYPO\n", str);
 430        return 1;
 431}
 432
 433__setup("udb", udb_setup);
 434__uml_help(udb_setup,
 435"udb\n"
 436"    This option is here solely to catch ubd -> udb typos, which can be\n"
 437"    to impossible to catch visually unless you specifically look for\n"
 438"    them.  The only result of any option starting with 'udb' is an error\n"
 439"    in the boot output.\n\n"
 440);
 441
 442/* Only changed by ubd_init, which is an initcall. */
 443static int thread_fd = -1;
 444
 445/* Function to read several request pointers at a time
 446* handling fractional reads if (and as) needed
 447*/
 448
 449static int bulk_req_safe_read(
 450        int fd,
 451        struct io_thread_req * (*request_buffer)[],
 452        struct io_thread_req **remainder,
 453        int *remainder_size,
 454        int max_recs
 455        )
 456{
 457        int n = 0;
 458        int res = 0;
 459
 460        if (*remainder_size > 0) {
 461                memmove(
 462                        (char *) request_buffer,
 463                        (char *) remainder, *remainder_size
 464                );
 465                n = *remainder_size;
 466        }
 467
 468        res = os_read_file(
 469                        fd,
 470                        ((char *) request_buffer) + *remainder_size,
 471                        sizeof(struct io_thread_req *)*max_recs
 472                                - *remainder_size
 473                );
 474        if (res > 0) {
 475                n += res;
 476                if ((n % sizeof(struct io_thread_req *)) > 0) {
 477                        /*
 478                        * Read somehow returned not a multiple of dword
 479                        * theoretically possible, but never observed in the
 480                        * wild, so read routine must be able to handle it
 481                        */
 482                        *remainder_size = n % sizeof(struct io_thread_req *);
 483                        WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 484                        memmove(
 485                                remainder,
 486                                ((char *) request_buffer) +
 487                                        (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 488                                *remainder_size
 489                        );
 490                        n = n - *remainder_size;
 491                }
 492        } else {
 493                n = res;
 494        }
 495        return n;
 496}
 497
 498/* Called without dev->lock held, and only in interrupt context. */
 499static void ubd_handler(void)
 500{
 501        int n;
 502        int count;
 503
 504        while(1){
 505                n = bulk_req_safe_read(
 506                        thread_fd,
 507                        irq_req_buffer,
 508                        &irq_remainder,
 509                        &irq_remainder_size,
 510                        UBD_REQ_BUFFER_SIZE
 511                );
 512                if (n < 0) {
 513                        if(n == -EAGAIN)
 514                                break;
 515                        printk(KERN_ERR "spurious interrupt in ubd_handler, "
 516                               "err = %d\n", -n);
 517                        return;
 518                }
 519                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 520                        struct io_thread_req *io_req = (*irq_req_buffer)[count];
 521
 522                        if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 523                                blk_queue_max_discard_sectors(io_req->req->q, 0);
 524                                blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 525                                blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
 526                        }
 527                        if ((io_req->error) || (io_req->buffer == NULL))
 528                                blk_mq_end_request(io_req->req, io_req->error);
 529                        else {
 530                                if (!blk_update_request(io_req->req, io_req->error, io_req->length))
 531                                        __blk_mq_end_request(io_req->req, io_req->error);
 532                        }
 533                        kfree(io_req);
 534                }
 535        }
 536}
 537
 538static irqreturn_t ubd_intr(int irq, void *dev)
 539{
 540        ubd_handler();
 541        return IRQ_HANDLED;
 542}
 543
 544/* Only changed by ubd_init, which is an initcall. */
 545static int io_pid = -1;
 546
 547static void kill_io_thread(void)
 548{
 549        if(io_pid != -1)
 550                os_kill_process(io_pid, 1);
 551}
 552
 553__uml_exitcall(kill_io_thread);
 554
 555static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 556{
 557        char *file;
 558        int fd;
 559        int err;
 560
 561        __u32 version;
 562        __u32 align;
 563        char *backing_file;
 564        time_t mtime;
 565        unsigned long long size;
 566        int sector_size;
 567        int bitmap_offset;
 568
 569        if (ubd_dev->file && ubd_dev->cow.file) {
 570                file = ubd_dev->cow.file;
 571
 572                goto out;
 573        }
 574
 575        fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 576        if (fd < 0)
 577                return fd;
 578
 579        err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 580                &mtime, &size, &sector_size, &align, &bitmap_offset);
 581        os_close_file(fd);
 582
 583        if(err == -EINVAL)
 584                file = ubd_dev->file;
 585        else
 586                file = backing_file;
 587
 588out:
 589        return os_file_size(file, size_out);
 590}
 591
 592static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 593{
 594        int err;
 595
 596        err = os_pread_file(fd, buf, len, offset);
 597        if (err < 0)
 598                return err;
 599
 600        return 0;
 601}
 602
 603static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 604{
 605        unsigned long modtime;
 606        unsigned long long actual;
 607        int err;
 608
 609        err = os_file_modtime(file, &modtime);
 610        if (err < 0) {
 611                printk(KERN_ERR "Failed to get modification time of backing "
 612                       "file \"%s\", err = %d\n", file, -err);
 613                return err;
 614        }
 615
 616        err = os_file_size(file, &actual);
 617        if (err < 0) {
 618                printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 619                       "err = %d\n", file, -err);
 620                return err;
 621        }
 622
 623        if (actual != size) {
 624                /*__u64 can be a long on AMD64 and with %lu GCC complains; so
 625                 * the typecast.*/
 626                printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 627                       "vs backing file\n", (unsigned long long) size, actual);
 628                return -EINVAL;
 629        }
 630        if (modtime != mtime) {
 631                printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
 632                       "backing file\n", mtime, modtime);
 633                return -EINVAL;
 634        }
 635        return 0;
 636}
 637
 638static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 639{
 640        struct uml_stat buf1, buf2;
 641        int err;
 642
 643        if (from_cmdline == NULL)
 644                return 0;
 645        if (!strcmp(from_cmdline, from_cow))
 646                return 0;
 647
 648        err = os_stat_file(from_cmdline, &buf1);
 649        if (err < 0) {
 650                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 651                       -err);
 652                return 0;
 653        }
 654        err = os_stat_file(from_cow, &buf2);
 655        if (err < 0) {
 656                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 657                       -err);
 658                return 1;
 659        }
 660        if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 661                return 0;
 662
 663        printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 664               "\"%s\" specified in COW header of \"%s\"\n",
 665               from_cmdline, from_cow, cow);
 666        return 1;
 667}
 668
 669static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 670                  char **backing_file_out, int *bitmap_offset_out,
 671                  unsigned long *bitmap_len_out, int *data_offset_out,
 672                  int *create_cow_out)
 673{
 674        time_t mtime;
 675        unsigned long long size;
 676        __u32 version, align;
 677        char *backing_file;
 678        int fd, err, sectorsize, asked_switch, mode = 0644;
 679
 680        fd = os_open_file(file, *openflags, mode);
 681        if (fd < 0) {
 682                if ((fd == -ENOENT) && (create_cow_out != NULL))
 683                        *create_cow_out = 1;
 684                if (!openflags->w ||
 685                    ((fd != -EROFS) && (fd != -EACCES)))
 686                        return fd;
 687                openflags->w = 0;
 688                fd = os_open_file(file, *openflags, mode);
 689                if (fd < 0)
 690                        return fd;
 691        }
 692
 693        if (shared)
 694                printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 695        else {
 696                err = os_lock_file(fd, openflags->w);
 697                if (err < 0) {
 698                        printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 699                               file, -err);
 700                        goto out_close;
 701                }
 702        }
 703
 704        /* Successful return case! */
 705        if (backing_file_out == NULL)
 706                return fd;
 707
 708        err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 709                              &size, &sectorsize, &align, bitmap_offset_out);
 710        if (err && (*backing_file_out != NULL)) {
 711                printk(KERN_ERR "Failed to read COW header from COW file "
 712                       "\"%s\", errno = %d\n", file, -err);
 713                goto out_close;
 714        }
 715        if (err)
 716                return fd;
 717
 718        asked_switch = path_requires_switch(*backing_file_out, backing_file,
 719                                            file);
 720
 721        /* Allow switching only if no mismatch. */
 722        if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 723                                                   mtime)) {
 724                printk(KERN_ERR "Switching backing file to '%s'\n",
 725                       *backing_file_out);
 726                err = write_cow_header(file, fd, *backing_file_out,
 727                                       sectorsize, align, &size);
 728                if (err) {
 729                        printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 730                        goto out_close;
 731                }
 732        } else {
 733                *backing_file_out = backing_file;
 734                err = backing_file_mismatch(*backing_file_out, size, mtime);
 735                if (err)
 736                        goto out_close;
 737        }
 738
 739        cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 740                  bitmap_len_out, data_offset_out);
 741
 742        return fd;
 743 out_close:
 744        os_close_file(fd);
 745        return err;
 746}
 747
 748static int create_cow_file(char *cow_file, char *backing_file,
 749                    struct openflags flags,
 750                    int sectorsize, int alignment, int *bitmap_offset_out,
 751                    unsigned long *bitmap_len_out, int *data_offset_out)
 752{
 753        int err, fd;
 754
 755        flags.c = 1;
 756        fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 757        if (fd < 0) {
 758                err = fd;
 759                printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 760                       cow_file, -err);
 761                goto out;
 762        }
 763
 764        err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 765                            bitmap_offset_out, bitmap_len_out,
 766                            data_offset_out);
 767        if (!err)
 768                return fd;
 769        os_close_file(fd);
 770 out:
 771        return err;
 772}
 773
 774static void ubd_close_dev(struct ubd *ubd_dev)
 775{
 776        os_close_file(ubd_dev->fd);
 777        if(ubd_dev->cow.file == NULL)
 778                return;
 779
 780        os_close_file(ubd_dev->cow.fd);
 781        vfree(ubd_dev->cow.bitmap);
 782        ubd_dev->cow.bitmap = NULL;
 783}
 784
 785static int ubd_open_dev(struct ubd *ubd_dev)
 786{
 787        struct openflags flags;
 788        char **back_ptr;
 789        int err, create_cow, *create_ptr;
 790        int fd;
 791
 792        ubd_dev->openflags = ubd_dev->boot_openflags;
 793        create_cow = 0;
 794        create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 795        back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 796
 797        fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 798                                back_ptr, &ubd_dev->cow.bitmap_offset,
 799                                &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 800                                create_ptr);
 801
 802        if((fd == -ENOENT) && create_cow){
 803                fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 804                                          ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 805                                          &ubd_dev->cow.bitmap_offset,
 806                                          &ubd_dev->cow.bitmap_len,
 807                                          &ubd_dev->cow.data_offset);
 808                if(fd >= 0){
 809                        printk(KERN_INFO "Creating \"%s\" as COW file for "
 810                               "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 811                }
 812        }
 813
 814        if(fd < 0){
 815                printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 816                       -fd);
 817                return fd;
 818        }
 819        ubd_dev->fd = fd;
 820
 821        if(ubd_dev->cow.file != NULL){
 822                blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 823
 824                err = -ENOMEM;
 825                ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 826                if(ubd_dev->cow.bitmap == NULL){
 827                        printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 828                        goto error;
 829                }
 830                flush_tlb_kernel_vm();
 831
 832                err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 833                                      ubd_dev->cow.bitmap_offset,
 834                                      ubd_dev->cow.bitmap_len);
 835                if(err < 0)
 836                        goto error;
 837
 838                flags = ubd_dev->openflags;
 839                flags.w = 0;
 840                err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 841                                    NULL, NULL, NULL, NULL);
 842                if(err < 0) goto error;
 843                ubd_dev->cow.fd = err;
 844        }
 845        if (ubd_dev->no_trim == 0) {
 846                ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 847                ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
 848                blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 849                blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 850                blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
 851        }
 852        blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 853        return 0;
 854 error:
 855        os_close_file(ubd_dev->fd);
 856        return err;
 857}
 858
 859static void ubd_device_release(struct device *dev)
 860{
 861        struct ubd *ubd_dev = dev_get_drvdata(dev);
 862
 863        blk_cleanup_queue(ubd_dev->queue);
 864        blk_mq_free_tag_set(&ubd_dev->tag_set);
 865        *ubd_dev = ((struct ubd) DEFAULT_UBD);
 866}
 867
 868static int ubd_disk_register(int major, u64 size, int unit,
 869                             struct gendisk **disk_out)
 870{
 871        struct device *parent = NULL;
 872        struct gendisk *disk;
 873
 874        disk = alloc_disk(1 << UBD_SHIFT);
 875        if(disk == NULL)
 876                return -ENOMEM;
 877
 878        disk->major = major;
 879        disk->first_minor = unit << UBD_SHIFT;
 880        disk->fops = &ubd_blops;
 881        set_capacity(disk, size / 512);
 882        if (major == UBD_MAJOR)
 883                sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 884        else
 885                sprintf(disk->disk_name, "ubd_fake%d", unit);
 886
 887        /* sysfs register (not for ide fake devices) */
 888        if (major == UBD_MAJOR) {
 889                ubd_devs[unit].pdev.id   = unit;
 890                ubd_devs[unit].pdev.name = DRIVER_NAME;
 891                ubd_devs[unit].pdev.dev.release = ubd_device_release;
 892                dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 893                platform_device_register(&ubd_devs[unit].pdev);
 894                parent = &ubd_devs[unit].pdev.dev;
 895        }
 896
 897        disk->private_data = &ubd_devs[unit];
 898        disk->queue = ubd_devs[unit].queue;
 899        device_add_disk(parent, disk, NULL);
 900
 901        *disk_out = disk;
 902        return 0;
 903}
 904
 905#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 906
 907static const struct blk_mq_ops ubd_mq_ops = {
 908        .queue_rq = ubd_queue_rq,
 909};
 910
 911static int ubd_add(int n, char **error_out)
 912{
 913        struct ubd *ubd_dev = &ubd_devs[n];
 914        int err = 0;
 915
 916        if(ubd_dev->file == NULL)
 917                goto out;
 918
 919        err = ubd_file_size(ubd_dev, &ubd_dev->size);
 920        if(err < 0){
 921                *error_out = "Couldn't determine size of device's file";
 922                goto out;
 923        }
 924
 925        ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 926
 927        ubd_dev->tag_set.ops = &ubd_mq_ops;
 928        ubd_dev->tag_set.queue_depth = 64;
 929        ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 930        ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 931        ubd_dev->tag_set.driver_data = ubd_dev;
 932        ubd_dev->tag_set.nr_hw_queues = 1;
 933
 934        err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 935        if (err)
 936                goto out;
 937
 938        ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
 939        if (IS_ERR(ubd_dev->queue)) {
 940                err = PTR_ERR(ubd_dev->queue);
 941                goto out_cleanup_tags;
 942        }
 943
 944        ubd_dev->queue->queuedata = ubd_dev;
 945        blk_queue_write_cache(ubd_dev->queue, true, false);
 946
 947        blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 948        err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 949        if(err){
 950                *error_out = "Failed to register device";
 951                goto out_cleanup_tags;
 952        }
 953
 954        if (fake_major != UBD_MAJOR)
 955                ubd_disk_register(fake_major, ubd_dev->size, n,
 956                                  &fake_gendisk[n]);
 957
 958        /*
 959         * Perhaps this should also be under the "if (fake_major)" above
 960         * using the fake_disk->disk_name
 961         */
 962        if (fake_ide)
 963                make_ide_entries(ubd_gendisk[n]->disk_name);
 964
 965        err = 0;
 966out:
 967        return err;
 968
 969out_cleanup_tags:
 970        blk_mq_free_tag_set(&ubd_dev->tag_set);
 971        if (!(IS_ERR(ubd_dev->queue)))
 972                blk_cleanup_queue(ubd_dev->queue);
 973        goto out;
 974}
 975
 976static int ubd_config(char *str, char **error_out)
 977{
 978        int n, ret;
 979
 980        /* This string is possibly broken up and stored, so it's only
 981         * freed if ubd_setup_common fails, or if only general options
 982         * were set.
 983         */
 984        str = kstrdup(str, GFP_KERNEL);
 985        if (str == NULL) {
 986                *error_out = "Failed to allocate memory";
 987                return -ENOMEM;
 988        }
 989
 990        ret = ubd_setup_common(str, &n, error_out);
 991        if (ret)
 992                goto err_free;
 993
 994        if (n == -1) {
 995                ret = 0;
 996                goto err_free;
 997        }
 998
 999        mutex_lock(&ubd_lock);
1000        ret = ubd_add(n, error_out);
1001        if (ret)
1002                ubd_devs[n].file = NULL;
1003        mutex_unlock(&ubd_lock);
1004
1005out:
1006        return ret;
1007
1008err_free:
1009        kfree(str);
1010        goto out;
1011}
1012
1013static int ubd_get_config(char *name, char *str, int size, char **error_out)
1014{
1015        struct ubd *ubd_dev;
1016        int n, len = 0;
1017
1018        n = parse_unit(&name);
1019        if((n >= MAX_DEV) || (n < 0)){
1020                *error_out = "ubd_get_config : device number out of range";
1021                return -1;
1022        }
1023
1024        ubd_dev = &ubd_devs[n];
1025        mutex_lock(&ubd_lock);
1026
1027        if(ubd_dev->file == NULL){
1028                CONFIG_CHUNK(str, size, len, "", 1);
1029                goto out;
1030        }
1031
1032        CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1033
1034        if(ubd_dev->cow.file != NULL){
1035                CONFIG_CHUNK(str, size, len, ",", 0);
1036                CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1037        }
1038        else CONFIG_CHUNK(str, size, len, "", 1);
1039
1040 out:
1041        mutex_unlock(&ubd_lock);
1042        return len;
1043}
1044
1045static int ubd_id(char **str, int *start_out, int *end_out)
1046{
1047        int n;
1048
1049        n = parse_unit(str);
1050        *start_out = 0;
1051        *end_out = MAX_DEV - 1;
1052        return n;
1053}
1054
1055static int ubd_remove(int n, char **error_out)
1056{
1057        struct gendisk *disk = ubd_gendisk[n];
1058        struct ubd *ubd_dev;
1059        int err = -ENODEV;
1060
1061        mutex_lock(&ubd_lock);
1062
1063        ubd_dev = &ubd_devs[n];
1064
1065        if(ubd_dev->file == NULL)
1066                goto out;
1067
1068        /* you cannot remove a open disk */
1069        err = -EBUSY;
1070        if(ubd_dev->count > 0)
1071                goto out;
1072
1073        ubd_gendisk[n] = NULL;
1074        if(disk != NULL){
1075                del_gendisk(disk);
1076                put_disk(disk);
1077        }
1078
1079        if(fake_gendisk[n] != NULL){
1080                del_gendisk(fake_gendisk[n]);
1081                put_disk(fake_gendisk[n]);
1082                fake_gendisk[n] = NULL;
1083        }
1084
1085        err = 0;
1086        platform_device_unregister(&ubd_dev->pdev);
1087out:
1088        mutex_unlock(&ubd_lock);
1089        return err;
1090}
1091
1092/* All these are called by mconsole in process context and without
1093 * ubd-specific locks.  The structure itself is const except for .list.
1094 */
1095static struct mc_device ubd_mc = {
1096        .list           = LIST_HEAD_INIT(ubd_mc.list),
1097        .name           = "ubd",
1098        .config         = ubd_config,
1099        .get_config     = ubd_get_config,
1100        .id             = ubd_id,
1101        .remove         = ubd_remove,
1102};
1103
1104static int __init ubd_mc_init(void)
1105{
1106        mconsole_register_dev(&ubd_mc);
1107        return 0;
1108}
1109
1110__initcall(ubd_mc_init);
1111
1112static int __init ubd0_init(void)
1113{
1114        struct ubd *ubd_dev = &ubd_devs[0];
1115
1116        mutex_lock(&ubd_lock);
1117        if(ubd_dev->file == NULL)
1118                ubd_dev->file = "root_fs";
1119        mutex_unlock(&ubd_lock);
1120
1121        return 0;
1122}
1123
1124__initcall(ubd0_init);
1125
1126/* Used in ubd_init, which is an initcall */
1127static struct platform_driver ubd_driver = {
1128        .driver = {
1129                .name  = DRIVER_NAME,
1130        },
1131};
1132
1133static int __init ubd_init(void)
1134{
1135        char *error;
1136        int i, err;
1137
1138        if (register_blkdev(UBD_MAJOR, "ubd"))
1139                return -1;
1140
1141        if (fake_major != UBD_MAJOR) {
1142                char name[sizeof("ubd_nnn\0")];
1143
1144                snprintf(name, sizeof(name), "ubd_%d", fake_major);
1145                if (register_blkdev(fake_major, "ubd"))
1146                        return -1;
1147        }
1148
1149        irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1150                                       sizeof(struct io_thread_req *),
1151                                       GFP_KERNEL
1152                );
1153        irq_remainder = 0;
1154
1155        if (irq_req_buffer == NULL) {
1156                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1157                return -1;
1158        }
1159        io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1160                                      sizeof(struct io_thread_req *),
1161                                      GFP_KERNEL
1162                );
1163
1164        io_remainder = 0;
1165
1166        if (io_req_buffer == NULL) {
1167                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1168                return -1;
1169        }
1170        platform_driver_register(&ubd_driver);
1171        mutex_lock(&ubd_lock);
1172        for (i = 0; i < MAX_DEV; i++){
1173                err = ubd_add(i, &error);
1174                if(err)
1175                        printk(KERN_ERR "Failed to initialize ubd device %d :"
1176                               "%s\n", i, error);
1177        }
1178        mutex_unlock(&ubd_lock);
1179        return 0;
1180}
1181
1182late_initcall(ubd_init);
1183
1184static int __init ubd_driver_init(void){
1185        unsigned long stack;
1186        int err;
1187
1188        /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1189        if(global_openflags.s){
1190                printk(KERN_INFO "ubd: Synchronous mode\n");
1191                /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1192                 * enough. So use anyway the io thread. */
1193        }
1194        stack = alloc_stack(0, 0);
1195        io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1196                                 &thread_fd);
1197        if(io_pid < 0){
1198                printk(KERN_ERR
1199                       "ubd : Failed to start I/O thread (errno = %d) - "
1200                       "falling back to synchronous I/O\n", -io_pid);
1201                io_pid = -1;
1202                return 0;
1203        }
1204        err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1205                             0, "ubd", ubd_devs);
1206        if(err != 0)
1207                printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1208        return 0;
1209}
1210
1211device_initcall(ubd_driver_init);
1212
1213static int ubd_open(struct block_device *bdev, fmode_t mode)
1214{
1215        struct gendisk *disk = bdev->bd_disk;
1216        struct ubd *ubd_dev = disk->private_data;
1217        int err = 0;
1218
1219        mutex_lock(&ubd_mutex);
1220        if(ubd_dev->count == 0){
1221                err = ubd_open_dev(ubd_dev);
1222                if(err){
1223                        printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1224                               disk->disk_name, ubd_dev->file, -err);
1225                        goto out;
1226                }
1227        }
1228        ubd_dev->count++;
1229        set_disk_ro(disk, !ubd_dev->openflags.w);
1230
1231        /* This should no more be needed. And it didn't work anyway to exclude
1232         * read-write remounting of filesystems.*/
1233        /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1234                if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1235                err = -EROFS;
1236        }*/
1237out:
1238        mutex_unlock(&ubd_mutex);
1239        return err;
1240}
1241
1242static void ubd_release(struct gendisk *disk, fmode_t mode)
1243{
1244        struct ubd *ubd_dev = disk->private_data;
1245
1246        mutex_lock(&ubd_mutex);
1247        if(--ubd_dev->count == 0)
1248                ubd_close_dev(ubd_dev);
1249        mutex_unlock(&ubd_mutex);
1250}
1251
1252static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1253                          __u64 *cow_offset, unsigned long *bitmap,
1254                          __u64 bitmap_offset, unsigned long *bitmap_words,
1255                          __u64 bitmap_len)
1256{
1257        __u64 sector = io_offset >> SECTOR_SHIFT;
1258        int i, update_bitmap = 0;
1259
1260        for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1261                if(cow_mask != NULL)
1262                        ubd_set_bit(i, (unsigned char *) cow_mask);
1263                if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1264                        continue;
1265
1266                update_bitmap = 1;
1267                ubd_set_bit(sector + i, (unsigned char *) bitmap);
1268        }
1269
1270        if(!update_bitmap)
1271                return;
1272
1273        *cow_offset = sector / (sizeof(unsigned long) * 8);
1274
1275        /* This takes care of the case where we're exactly at the end of the
1276         * device, and *cow_offset + 1 is off the end.  So, just back it up
1277         * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1278         * for the original diagnosis.
1279         */
1280        if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1281                                         sizeof(unsigned long)) - 1))
1282                (*cow_offset)--;
1283
1284        bitmap_words[0] = bitmap[*cow_offset];
1285        bitmap_words[1] = bitmap[*cow_offset + 1];
1286
1287        *cow_offset *= sizeof(unsigned long);
1288        *cow_offset += bitmap_offset;
1289}
1290
1291static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1292                       __u64 bitmap_offset, __u64 bitmap_len)
1293{
1294        __u64 sector = req->offset >> SECTOR_SHIFT;
1295        int i;
1296
1297        if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
1298                panic("Operation too long");
1299
1300        if (req_op(req->req) == REQ_OP_READ) {
1301                for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
1302                        if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1303                                ubd_set_bit(i, (unsigned char *)
1304                                            &req->sector_mask);
1305                }
1306        }
1307        else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1308                           &req->cow_offset, bitmap, bitmap_offset,
1309                           req->bitmap_words, bitmap_len);
1310}
1311
1312static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
1313                u64 off, struct bio_vec *bvec)
1314{
1315        struct ubd *dev = hctx->queue->queuedata;
1316        struct io_thread_req *io_req;
1317        int ret;
1318
1319        io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC);
1320        if (!io_req)
1321                return -ENOMEM;
1322
1323        io_req->req = req;
1324        if (dev->cow.file)
1325                io_req->fds[0] = dev->cow.fd;
1326        else
1327                io_req->fds[0] = dev->fd;
1328        io_req->error = 0;
1329
1330        if (bvec != NULL) {
1331                io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
1332                io_req->length = bvec->bv_len;
1333        } else {
1334                io_req->buffer = NULL;
1335                io_req->length = blk_rq_bytes(req);
1336        }
1337
1338        io_req->sectorsize = SECTOR_SIZE;
1339        io_req->fds[1] = dev->fd;
1340        io_req->cow_offset = -1;
1341        io_req->offset = off;
1342        io_req->sector_mask = 0;
1343        io_req->offsets[0] = 0;
1344        io_req->offsets[1] = dev->cow.data_offset;
1345
1346        if (dev->cow.file)
1347                cowify_req(io_req, dev->cow.bitmap,
1348                           dev->cow.bitmap_offset, dev->cow.bitmap_len);
1349
1350        ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1351        if (ret != sizeof(io_req)) {
1352                if (ret != -EAGAIN)
1353                        pr_err("write to io thread failed: %d\n", -ret);
1354                kfree(io_req);
1355        }
1356        return ret;
1357}
1358
1359static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
1360{
1361        struct req_iterator iter;
1362        struct bio_vec bvec;
1363        int ret;
1364        u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
1365
1366        rq_for_each_segment(bvec, req, iter) {
1367                ret = ubd_queue_one_vec(hctx, req, off, &bvec);
1368                if (ret < 0)
1369                        return ret;
1370                off += bvec.bv_len;
1371        }
1372        return 0;
1373}
1374
1375static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1376                                 const struct blk_mq_queue_data *bd)
1377{
1378        struct ubd *ubd_dev = hctx->queue->queuedata;
1379        struct request *req = bd->rq;
1380        int ret = 0, res = BLK_STS_OK;
1381
1382        blk_mq_start_request(req);
1383
1384        spin_lock_irq(&ubd_dev->lock);
1385
1386        switch (req_op(req)) {
1387        /* operations with no lentgth/offset arguments */
1388        case REQ_OP_FLUSH:
1389                ret = ubd_queue_one_vec(hctx, req, 0, NULL);
1390                break;
1391        case REQ_OP_READ:
1392        case REQ_OP_WRITE:
1393                ret = queue_rw_req(hctx, req);
1394                break;
1395        case REQ_OP_DISCARD:
1396        case REQ_OP_WRITE_ZEROES:
1397                ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
1398                break;
1399        default:
1400                WARN_ON_ONCE(1);
1401                res = BLK_STS_NOTSUPP;
1402        }
1403
1404        spin_unlock_irq(&ubd_dev->lock);
1405
1406        if (ret < 0)
1407                blk_mq_requeue_request(req, true);
1408
1409        return res;
1410}
1411
1412static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1413{
1414        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1415
1416        geo->heads = 128;
1417        geo->sectors = 32;
1418        geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1419        return 0;
1420}
1421
1422static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1423                     unsigned int cmd, unsigned long arg)
1424{
1425        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1426        u16 ubd_id[ATA_ID_WORDS];
1427
1428        switch (cmd) {
1429                struct cdrom_volctrl volume;
1430        case HDIO_GET_IDENTITY:
1431                memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1432                ubd_id[ATA_ID_CYLS]     = ubd_dev->size / (128 * 32 * 512);
1433                ubd_id[ATA_ID_HEADS]    = 128;
1434                ubd_id[ATA_ID_SECTORS]  = 32;
1435                if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1436                                 sizeof(ubd_id)))
1437                        return -EFAULT;
1438                return 0;
1439
1440        case CDROMVOLREAD:
1441                if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1442                        return -EFAULT;
1443                volume.channel0 = 255;
1444                volume.channel1 = 255;
1445                volume.channel2 = 255;
1446                volume.channel3 = 255;
1447                if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1448                        return -EFAULT;
1449                return 0;
1450        }
1451        return -EINVAL;
1452}
1453
1454static int map_error(int error_code)
1455{
1456        switch (error_code) {
1457        case 0:
1458                return BLK_STS_OK;
1459        case ENOSYS:
1460        case EOPNOTSUPP:
1461                return BLK_STS_NOTSUPP;
1462        case ENOSPC:
1463                return BLK_STS_NOSPC;
1464        }
1465        return BLK_STS_IOERR;
1466}
1467
1468/*
1469 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1470 *
1471 * The following functions are part of UML hypervisor code.
1472 * All functions from here onwards are executed as a helper
1473 * thread and are not allowed to execute any kernel functions.
1474 *
1475 * Any communication must occur strictly via shared memory and IPC.
1476 *
1477 * Do not add printks, locks, kernel memory operations, etc - it
1478 * will result in unpredictable behaviour and/or crashes.
1479 */
1480
1481static int update_bitmap(struct io_thread_req *req)
1482{
1483        int n;
1484
1485        if(req->cow_offset == -1)
1486                return map_error(0);
1487
1488        n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1489                          sizeof(req->bitmap_words), req->cow_offset);
1490        if (n != sizeof(req->bitmap_words))
1491                return map_error(-n);
1492
1493        return map_error(0);
1494}
1495
1496static void do_io(struct io_thread_req *req)
1497{
1498        char *buf = NULL;
1499        unsigned long len;
1500        int n, nsectors, start, end, bit;
1501        __u64 off;
1502
1503        /* FLUSH is really a special case, we cannot "case" it with others */
1504
1505        if (req_op(req->req) == REQ_OP_FLUSH) {
1506                /* fds[0] is always either the rw image or our cow file */
1507                req->error = map_error(-os_sync_file(req->fds[0]));
1508                return;
1509        }
1510
1511        nsectors = req->length / req->sectorsize;
1512        start = 0;
1513        do {
1514                bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1515                end = start;
1516                while((end < nsectors) &&
1517                      (ubd_test_bit(end, (unsigned char *)
1518                                    &req->sector_mask) == bit))
1519                        end++;
1520
1521                off = req->offset + req->offsets[bit] +
1522                        start * req->sectorsize;
1523                len = (end - start) * req->sectorsize;
1524                if (req->buffer != NULL)
1525                        buf = &req->buffer[start * req->sectorsize];
1526
1527                switch (req_op(req->req)) {
1528                case REQ_OP_READ:
1529                        n = 0;
1530                        do {
1531                                buf = &buf[n];
1532                                len -= n;
1533                                n = os_pread_file(req->fds[bit], buf, len, off);
1534                                if (n < 0) {
1535                                        req->error = map_error(-n);
1536                                        return;
1537                                }
1538                        } while((n < len) && (n != 0));
1539                        if (n < len) memset(&buf[n], 0, len - n);
1540                        break;
1541                case REQ_OP_WRITE:
1542                        n = os_pwrite_file(req->fds[bit], buf, len, off);
1543                        if(n != len){
1544                                req->error = map_error(-n);
1545                                return;
1546                        }
1547                        break;
1548                case REQ_OP_DISCARD:
1549                case REQ_OP_WRITE_ZEROES:
1550                        n = os_falloc_punch(req->fds[bit], off, len);
1551                        if (n) {
1552                                req->error = map_error(-n);
1553                                return;
1554                        }
1555                        break;
1556                default:
1557                        WARN_ON_ONCE(1);
1558                        req->error = BLK_STS_NOTSUPP;
1559                        return;
1560                }
1561
1562                start = end;
1563        } while(start < nsectors);
1564
1565        req->error = update_bitmap(req);
1566}
1567
1568/* Changed in start_io_thread, which is serialized by being called only
1569 * from ubd_init, which is an initcall.
1570 */
1571int kernel_fd = -1;
1572
1573/* Only changed by the io thread. XXX: currently unused. */
1574static int io_count = 0;
1575
1576int io_thread(void *arg)
1577{
1578        int n, count, written, res;
1579
1580        os_fix_helper_signals();
1581
1582        while(1){
1583                n = bulk_req_safe_read(
1584                        kernel_fd,
1585                        io_req_buffer,
1586                        &io_remainder,
1587                        &io_remainder_size,
1588                        UBD_REQ_BUFFER_SIZE
1589                );
1590                if (n < 0) {
1591                        if (n == -EAGAIN) {
1592                                ubd_read_poll(-1);
1593                                continue;
1594                        }
1595                }
1596
1597                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1598                        io_count++;
1599                        do_io((*io_req_buffer)[count]);
1600                }
1601
1602                written = 0;
1603
1604                do {
1605                        res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1606                        if (res >= 0) {
1607                                written += res;
1608                        }
1609                        if (written < n) {
1610                                ubd_write_poll(-1);
1611                        }
1612                } while (written < n);
1613        }
1614
1615        return 0;
1616}
1617