linux/arch/um/drivers/ubd_kern.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/major.h>
  31#include <linux/cdrom.h>
  32#include <linux/proc_fs.h>
  33#include <linux/seq_file.h>
  34#include <linux/ctype.h>
  35#include <linux/slab.h>
  36#include <linux/vmalloc.h>
  37#include <linux/platform_device.h>
  38#include <linux/scatterlist.h>
  39#include <asm/tlbflush.h>
  40#include <kern_util.h>
  41#include "mconsole_kern.h"
  42#include <init.h>
  43#include <irq_kern.h>
  44#include "ubd.h"
  45#include <os.h>
  46#include "cow.h"
  47
  48/* Max request size is determined by sector mask - 32K */
  49#define UBD_MAX_REQUEST (8 * sizeof(long))
  50
  51struct io_desc {
  52        char *buffer;
  53        unsigned long length;
  54        unsigned long sector_mask;
  55        unsigned long long cow_offset;
  56        unsigned long bitmap_words[2];
  57};
  58
  59struct io_thread_req {
  60        struct request *req;
  61        int fds[2];
  62        unsigned long offsets[2];
  63        unsigned long long offset;
  64        int sectorsize;
  65        int error;
  66
  67        int desc_cnt;
  68        /* io_desc has to be the last element of the struct */
  69        struct io_desc io_desc[];
  70};
  71
  72
  73static struct io_thread_req * (*irq_req_buffer)[];
  74static struct io_thread_req *irq_remainder;
  75static int irq_remainder_size;
  76
  77static struct io_thread_req * (*io_req_buffer)[];
  78static struct io_thread_req *io_remainder;
  79static int io_remainder_size;
  80
  81
  82
  83static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  84{
  85        __u64 n;
  86        int bits, off;
  87
  88        bits = sizeof(data[0]) * 8;
  89        n = bit / bits;
  90        off = bit % bits;
  91        return (data[n] & (1 << off)) != 0;
  92}
  93
  94static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  95{
  96        __u64 n;
  97        int bits, off;
  98
  99        bits = sizeof(data[0]) * 8;
 100        n = bit / bits;
 101        off = bit % bits;
 102        data[n] |= (1 << off);
 103}
 104/*End stuff from ubd_user.h*/
 105
 106#define DRIVER_NAME "uml-blkdev"
 107
 108static DEFINE_MUTEX(ubd_lock);
 109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 110
 111static int ubd_open(struct block_device *bdev, fmode_t mode);
 112static void ubd_release(struct gendisk *disk, fmode_t mode);
 113static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 114                     unsigned int cmd, unsigned long arg);
 115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 116
 117#define MAX_DEV (16)
 118
 119static const struct block_device_operations ubd_blops = {
 120        .owner          = THIS_MODULE,
 121        .open           = ubd_open,
 122        .release        = ubd_release,
 123        .ioctl          = ubd_ioctl,
 124        .compat_ioctl   = blkdev_compat_ptr_ioctl,
 125        .getgeo         = ubd_getgeo,
 126};
 127
 128/* Protected by ubd_lock */
 129static struct gendisk *ubd_gendisk[MAX_DEV];
 130
 131#ifdef CONFIG_BLK_DEV_UBD_SYNC
 132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 133                                         .cl = 1 })
 134#else
 135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 136                                         .cl = 1 })
 137#endif
 138static struct openflags global_openflags = OPEN_FLAGS;
 139
 140struct cow {
 141        /* backing file name */
 142        char *file;
 143        /* backing file fd */
 144        int fd;
 145        unsigned long *bitmap;
 146        unsigned long bitmap_len;
 147        int bitmap_offset;
 148        int data_offset;
 149};
 150
 151#define MAX_SG 64
 152
 153struct ubd {
 154        /* name (and fd, below) of the file opened for writing, either the
 155         * backing or the cow file. */
 156        char *file;
 157        char *serial;
 158        int count;
 159        int fd;
 160        __u64 size;
 161        struct openflags boot_openflags;
 162        struct openflags openflags;
 163        unsigned shared:1;
 164        unsigned no_cow:1;
 165        unsigned no_trim:1;
 166        struct cow cow;
 167        struct platform_device pdev;
 168        struct request_queue *queue;
 169        struct blk_mq_tag_set tag_set;
 170        spinlock_t lock;
 171};
 172
 173#define DEFAULT_COW { \
 174        .file =                 NULL, \
 175        .fd =                   -1,     \
 176        .bitmap =               NULL, \
 177        .bitmap_offset =        0, \
 178        .data_offset =          0, \
 179}
 180
 181#define DEFAULT_UBD { \
 182        .file =                 NULL, \
 183        .serial =               NULL, \
 184        .count =                0, \
 185        .fd =                   -1, \
 186        .size =                 -1, \
 187        .boot_openflags =       OPEN_FLAGS, \
 188        .openflags =            OPEN_FLAGS, \
 189        .no_cow =               0, \
 190        .no_trim =              0, \
 191        .shared =               0, \
 192        .cow =                  DEFAULT_COW, \
 193        .lock =                 __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 194}
 195
 196/* Protected by ubd_lock */
 197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 198
 199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 200                                 const struct blk_mq_queue_data *bd);
 201
 202static int fake_ide_setup(char *str)
 203{
 204        pr_warn("The fake_ide option has been removed\n");
 205        return 1;
 206}
 207__setup("fake_ide", fake_ide_setup);
 208
 209__uml_help(fake_ide_setup,
 210"fake_ide\n"
 211"    Obsolete stub.\n\n"
 212);
 213
 214static int parse_unit(char **ptr)
 215{
 216        char *str = *ptr, *end;
 217        int n = -1;
 218
 219        if(isdigit(*str)) {
 220                n = simple_strtoul(str, &end, 0);
 221                if(end == str)
 222                        return -1;
 223                *ptr = end;
 224        }
 225        else if (('a' <= *str) && (*str <= 'z')) {
 226                n = *str - 'a';
 227                str++;
 228                *ptr = str;
 229        }
 230        return n;
 231}
 232
 233/* If *index_out == -1 at exit, the passed option was a general one;
 234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 235 * should not be freed on exit.
 236 */
 237static int ubd_setup_common(char *str, int *index_out, char **error_out)
 238{
 239        struct ubd *ubd_dev;
 240        struct openflags flags = global_openflags;
 241        char *file, *backing_file, *serial;
 242        int n, err = 0, i;
 243
 244        if(index_out) *index_out = -1;
 245        n = *str;
 246        if(n == '='){
 247                str++;
 248                if(!strcmp(str, "sync")){
 249                        global_openflags = of_sync(global_openflags);
 250                        return err;
 251                }
 252
 253                pr_warn("fake major not supported any more\n");
 254                return 0;
 255        }
 256
 257        n = parse_unit(&str);
 258        if(n < 0){
 259                *error_out = "Couldn't parse device number";
 260                return -EINVAL;
 261        }
 262        if(n >= MAX_DEV){
 263                *error_out = "Device number out of range";
 264                return 1;
 265        }
 266
 267        err = -EBUSY;
 268        mutex_lock(&ubd_lock);
 269
 270        ubd_dev = &ubd_devs[n];
 271        if(ubd_dev->file != NULL){
 272                *error_out = "Device is already configured";
 273                goto out;
 274        }
 275
 276        if (index_out)
 277                *index_out = n;
 278
 279        err = -EINVAL;
 280        for (i = 0; i < sizeof("rscdt="); i++) {
 281                switch (*str) {
 282                case 'r':
 283                        flags.w = 0;
 284                        break;
 285                case 's':
 286                        flags.s = 1;
 287                        break;
 288                case 'd':
 289                        ubd_dev->no_cow = 1;
 290                        break;
 291                case 'c':
 292                        ubd_dev->shared = 1;
 293                        break;
 294                case 't':
 295                        ubd_dev->no_trim = 1;
 296                        break;
 297                case '=':
 298                        str++;
 299                        goto break_loop;
 300                default:
 301                        *error_out = "Expected '=' or flag letter "
 302                                "(r, s, c, t or d)";
 303                        goto out;
 304                }
 305                str++;
 306        }
 307
 308        if (*str == '=')
 309                *error_out = "Too many flags specified";
 310        else
 311                *error_out = "Missing '='";
 312        goto out;
 313
 314break_loop:
 315        file = strsep(&str, ",:");
 316        if (*file == '\0')
 317                file = NULL;
 318
 319        backing_file = strsep(&str, ",:");
 320        if (backing_file && *backing_file == '\0')
 321                backing_file = NULL;
 322
 323        serial = strsep(&str, ",:");
 324        if (serial && *serial == '\0')
 325                serial = NULL;
 326
 327        if (backing_file && ubd_dev->no_cow) {
 328                *error_out = "Can't specify both 'd' and a cow file";
 329                goto out;
 330        }
 331
 332        err = 0;
 333        ubd_dev->file = file;
 334        ubd_dev->cow.file = backing_file;
 335        ubd_dev->serial = serial;
 336        ubd_dev->boot_openflags = flags;
 337out:
 338        mutex_unlock(&ubd_lock);
 339        return err;
 340}
 341
 342static int ubd_setup(char *str)
 343{
 344        char *error;
 345        int err;
 346
 347        err = ubd_setup_common(str, NULL, &error);
 348        if(err)
 349                printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 350                       "%s\n", str, error);
 351        return 1;
 352}
 353
 354__setup("ubd", ubd_setup);
 355__uml_help(ubd_setup,
 356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 357"    This is used to associate a device with a file in the underlying\n"
 358"    filesystem. When specifying two filenames, the first one is the\n"
 359"    COW name and the second is the backing file name. As separator you can\n"
 360"    use either a ':' or a ',': the first one allows writing things like;\n"
 361"       ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 362"    while with a ',' the shell would not expand the 2nd '~'.\n"
 363"    When using only one filename, UML will detect whether to treat it like\n"
 364"    a COW file or a backing file. To override this detection, add the 'd'\n"
 365"    flag:\n"
 366"       ubd0d=BackingFile\n"
 367"    Usually, there is a filesystem in the file, but \n"
 368"    that's not required. Swap devices containing swap files can be\n"
 369"    specified like this. Also, a file which doesn't contain a\n"
 370"    filesystem can have its contents read in the virtual \n"
 371"    machine by running 'dd' on the device. <n> must be in the range\n"
 372"    0 to 7. Appending an 'r' to the number will cause that device\n"
 373"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 374"    an 's' will cause data to be written to disk on the host immediately.\n"
 375"    'c' will cause the device to be treated as being shared between multiple\n"
 376"    UMLs and file locking will be turned off - this is appropriate for a\n"
 377"    cluster filesystem and inappropriate at almost all other times.\n\n"
 378"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 379"    An optional device serial number can be exposed using the serial parameter\n"
 380"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
 381"    useful when a unique number should be given to the device. Note when\n"
 382"    specifying a label, the filename2 must be also presented. It can be\n"
 383"    an empty string, in which case the backing file is not used:\n"
 384"       ubd0=File,,Serial\n"
 385);
 386
 387static int udb_setup(char *str)
 388{
 389        printk("udb%s specified on command line is almost certainly a ubd -> "
 390               "udb TYPO\n", str);
 391        return 1;
 392}
 393
 394__setup("udb", udb_setup);
 395__uml_help(udb_setup,
 396"udb\n"
 397"    This option is here solely to catch ubd -> udb typos, which can be\n"
 398"    to impossible to catch visually unless you specifically look for\n"
 399"    them.  The only result of any option starting with 'udb' is an error\n"
 400"    in the boot output.\n\n"
 401);
 402
 403/* Only changed by ubd_init, which is an initcall. */
 404static int thread_fd = -1;
 405
 406/* Function to read several request pointers at a time
 407* handling fractional reads if (and as) needed
 408*/
 409
 410static int bulk_req_safe_read(
 411        int fd,
 412        struct io_thread_req * (*request_buffer)[],
 413        struct io_thread_req **remainder,
 414        int *remainder_size,
 415        int max_recs
 416        )
 417{
 418        int n = 0;
 419        int res = 0;
 420
 421        if (*remainder_size > 0) {
 422                memmove(
 423                        (char *) request_buffer,
 424                        (char *) remainder, *remainder_size
 425                );
 426                n = *remainder_size;
 427        }
 428
 429        res = os_read_file(
 430                        fd,
 431                        ((char *) request_buffer) + *remainder_size,
 432                        sizeof(struct io_thread_req *)*max_recs
 433                                - *remainder_size
 434                );
 435        if (res > 0) {
 436                n += res;
 437                if ((n % sizeof(struct io_thread_req *)) > 0) {
 438                        /*
 439                        * Read somehow returned not a multiple of dword
 440                        * theoretically possible, but never observed in the
 441                        * wild, so read routine must be able to handle it
 442                        */
 443                        *remainder_size = n % sizeof(struct io_thread_req *);
 444                        WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 445                        memmove(
 446                                remainder,
 447                                ((char *) request_buffer) +
 448                                        (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 449                                *remainder_size
 450                        );
 451                        n = n - *remainder_size;
 452                }
 453        } else {
 454                n = res;
 455        }
 456        return n;
 457}
 458
 459/* Called without dev->lock held, and only in interrupt context. */
 460static void ubd_handler(void)
 461{
 462        int n;
 463        int count;
 464
 465        while(1){
 466                n = bulk_req_safe_read(
 467                        thread_fd,
 468                        irq_req_buffer,
 469                        &irq_remainder,
 470                        &irq_remainder_size,
 471                        UBD_REQ_BUFFER_SIZE
 472                );
 473                if (n < 0) {
 474                        if(n == -EAGAIN)
 475                                break;
 476                        printk(KERN_ERR "spurious interrupt in ubd_handler, "
 477                               "err = %d\n", -n);
 478                        return;
 479                }
 480                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 481                        struct io_thread_req *io_req = (*irq_req_buffer)[count];
 482
 483                        if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 484                                blk_queue_max_discard_sectors(io_req->req->q, 0);
 485                                blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 486                                blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
 487                        }
 488                        blk_mq_end_request(io_req->req, io_req->error);
 489                        kfree(io_req);
 490                }
 491        }
 492}
 493
 494static irqreturn_t ubd_intr(int irq, void *dev)
 495{
 496        ubd_handler();
 497        return IRQ_HANDLED;
 498}
 499
 500/* Only changed by ubd_init, which is an initcall. */
 501static int io_pid = -1;
 502
 503static void kill_io_thread(void)
 504{
 505        if(io_pid != -1)
 506                os_kill_process(io_pid, 1);
 507}
 508
 509__uml_exitcall(kill_io_thread);
 510
 511static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 512{
 513        char *file;
 514        int fd;
 515        int err;
 516
 517        __u32 version;
 518        __u32 align;
 519        char *backing_file;
 520        time64_t mtime;
 521        unsigned long long size;
 522        int sector_size;
 523        int bitmap_offset;
 524
 525        if (ubd_dev->file && ubd_dev->cow.file) {
 526                file = ubd_dev->cow.file;
 527
 528                goto out;
 529        }
 530
 531        fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 532        if (fd < 0)
 533                return fd;
 534
 535        err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 536                &mtime, &size, &sector_size, &align, &bitmap_offset);
 537        os_close_file(fd);
 538
 539        if(err == -EINVAL)
 540                file = ubd_dev->file;
 541        else
 542                file = backing_file;
 543
 544out:
 545        return os_file_size(file, size_out);
 546}
 547
 548static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 549{
 550        int err;
 551
 552        err = os_pread_file(fd, buf, len, offset);
 553        if (err < 0)
 554                return err;
 555
 556        return 0;
 557}
 558
 559static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 560{
 561        time64_t modtime;
 562        unsigned long long actual;
 563        int err;
 564
 565        err = os_file_modtime(file, &modtime);
 566        if (err < 0) {
 567                printk(KERN_ERR "Failed to get modification time of backing "
 568                       "file \"%s\", err = %d\n", file, -err);
 569                return err;
 570        }
 571
 572        err = os_file_size(file, &actual);
 573        if (err < 0) {
 574                printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 575                       "err = %d\n", file, -err);
 576                return err;
 577        }
 578
 579        if (actual != size) {
 580                /*__u64 can be a long on AMD64 and with %lu GCC complains; so
 581                 * the typecast.*/
 582                printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 583                       "vs backing file\n", (unsigned long long) size, actual);
 584                return -EINVAL;
 585        }
 586        if (modtime != mtime) {
 587                printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 588                       "backing file\n", mtime, modtime);
 589                return -EINVAL;
 590        }
 591        return 0;
 592}
 593
 594static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 595{
 596        struct uml_stat buf1, buf2;
 597        int err;
 598
 599        if (from_cmdline == NULL)
 600                return 0;
 601        if (!strcmp(from_cmdline, from_cow))
 602                return 0;
 603
 604        err = os_stat_file(from_cmdline, &buf1);
 605        if (err < 0) {
 606                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 607                       -err);
 608                return 0;
 609        }
 610        err = os_stat_file(from_cow, &buf2);
 611        if (err < 0) {
 612                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 613                       -err);
 614                return 1;
 615        }
 616        if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 617                return 0;
 618
 619        printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 620               "\"%s\" specified in COW header of \"%s\"\n",
 621               from_cmdline, from_cow, cow);
 622        return 1;
 623}
 624
 625static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 626                  char **backing_file_out, int *bitmap_offset_out,
 627                  unsigned long *bitmap_len_out, int *data_offset_out,
 628                  int *create_cow_out)
 629{
 630        time64_t mtime;
 631        unsigned long long size;
 632        __u32 version, align;
 633        char *backing_file;
 634        int fd, err, sectorsize, asked_switch, mode = 0644;
 635
 636        fd = os_open_file(file, *openflags, mode);
 637        if (fd < 0) {
 638                if ((fd == -ENOENT) && (create_cow_out != NULL))
 639                        *create_cow_out = 1;
 640                if (!openflags->w ||
 641                    ((fd != -EROFS) && (fd != -EACCES)))
 642                        return fd;
 643                openflags->w = 0;
 644                fd = os_open_file(file, *openflags, mode);
 645                if (fd < 0)
 646                        return fd;
 647        }
 648
 649        if (shared)
 650                printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 651        else {
 652                err = os_lock_file(fd, openflags->w);
 653                if (err < 0) {
 654                        printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 655                               file, -err);
 656                        goto out_close;
 657                }
 658        }
 659
 660        /* Successful return case! */
 661        if (backing_file_out == NULL)
 662                return fd;
 663
 664        err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 665                              &size, &sectorsize, &align, bitmap_offset_out);
 666        if (err && (*backing_file_out != NULL)) {
 667                printk(KERN_ERR "Failed to read COW header from COW file "
 668                       "\"%s\", errno = %d\n", file, -err);
 669                goto out_close;
 670        }
 671        if (err)
 672                return fd;
 673
 674        asked_switch = path_requires_switch(*backing_file_out, backing_file,
 675                                            file);
 676
 677        /* Allow switching only if no mismatch. */
 678        if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 679                                                   mtime)) {
 680                printk(KERN_ERR "Switching backing file to '%s'\n",
 681                       *backing_file_out);
 682                err = write_cow_header(file, fd, *backing_file_out,
 683                                       sectorsize, align, &size);
 684                if (err) {
 685                        printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 686                        goto out_close;
 687                }
 688        } else {
 689                *backing_file_out = backing_file;
 690                err = backing_file_mismatch(*backing_file_out, size, mtime);
 691                if (err)
 692                        goto out_close;
 693        }
 694
 695        cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 696                  bitmap_len_out, data_offset_out);
 697
 698        return fd;
 699 out_close:
 700        os_close_file(fd);
 701        return err;
 702}
 703
 704static int create_cow_file(char *cow_file, char *backing_file,
 705                    struct openflags flags,
 706                    int sectorsize, int alignment, int *bitmap_offset_out,
 707                    unsigned long *bitmap_len_out, int *data_offset_out)
 708{
 709        int err, fd;
 710
 711        flags.c = 1;
 712        fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 713        if (fd < 0) {
 714                err = fd;
 715                printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 716                       cow_file, -err);
 717                goto out;
 718        }
 719
 720        err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 721                            bitmap_offset_out, bitmap_len_out,
 722                            data_offset_out);
 723        if (!err)
 724                return fd;
 725        os_close_file(fd);
 726 out:
 727        return err;
 728}
 729
 730static void ubd_close_dev(struct ubd *ubd_dev)
 731{
 732        os_close_file(ubd_dev->fd);
 733        if(ubd_dev->cow.file == NULL)
 734                return;
 735
 736        os_close_file(ubd_dev->cow.fd);
 737        vfree(ubd_dev->cow.bitmap);
 738        ubd_dev->cow.bitmap = NULL;
 739}
 740
 741static int ubd_open_dev(struct ubd *ubd_dev)
 742{
 743        struct openflags flags;
 744        char **back_ptr;
 745        int err, create_cow, *create_ptr;
 746        int fd;
 747
 748        ubd_dev->openflags = ubd_dev->boot_openflags;
 749        create_cow = 0;
 750        create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 751        back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 752
 753        fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 754                                back_ptr, &ubd_dev->cow.bitmap_offset,
 755                                &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 756                                create_ptr);
 757
 758        if((fd == -ENOENT) && create_cow){
 759                fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 760                                          ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 761                                          &ubd_dev->cow.bitmap_offset,
 762                                          &ubd_dev->cow.bitmap_len,
 763                                          &ubd_dev->cow.data_offset);
 764                if(fd >= 0){
 765                        printk(KERN_INFO "Creating \"%s\" as COW file for "
 766                               "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 767                }
 768        }
 769
 770        if(fd < 0){
 771                printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 772                       -fd);
 773                return fd;
 774        }
 775        ubd_dev->fd = fd;
 776
 777        if(ubd_dev->cow.file != NULL){
 778                blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 779
 780                err = -ENOMEM;
 781                ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 782                if(ubd_dev->cow.bitmap == NULL){
 783                        printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 784                        goto error;
 785                }
 786                flush_tlb_kernel_vm();
 787
 788                err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 789                                      ubd_dev->cow.bitmap_offset,
 790                                      ubd_dev->cow.bitmap_len);
 791                if(err < 0)
 792                        goto error;
 793
 794                flags = ubd_dev->openflags;
 795                flags.w = 0;
 796                err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 797                                    NULL, NULL, NULL, NULL);
 798                if(err < 0) goto error;
 799                ubd_dev->cow.fd = err;
 800        }
 801        if (ubd_dev->no_trim == 0) {
 802                ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 803                ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
 804                blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 805                blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 806                blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
 807        }
 808        blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 809        return 0;
 810 error:
 811        os_close_file(ubd_dev->fd);
 812        return err;
 813}
 814
 815static void ubd_device_release(struct device *dev)
 816{
 817        struct ubd *ubd_dev = dev_get_drvdata(dev);
 818
 819        blk_mq_free_tag_set(&ubd_dev->tag_set);
 820        *ubd_dev = ((struct ubd) DEFAULT_UBD);
 821}
 822
 823static ssize_t serial_show(struct device *dev,
 824                           struct device_attribute *attr, char *buf)
 825{
 826        struct gendisk *disk = dev_to_disk(dev);
 827        struct ubd *ubd_dev = disk->private_data;
 828
 829        if (!ubd_dev)
 830                return 0;
 831
 832        return sprintf(buf, "%s", ubd_dev->serial);
 833}
 834
 835static DEVICE_ATTR_RO(serial);
 836
 837static struct attribute *ubd_attrs[] = {
 838        &dev_attr_serial.attr,
 839        NULL,
 840};
 841
 842static umode_t ubd_attrs_are_visible(struct kobject *kobj,
 843                                     struct attribute *a, int n)
 844{
 845        return a->mode;
 846}
 847
 848static const struct attribute_group ubd_attr_group = {
 849        .attrs = ubd_attrs,
 850        .is_visible = ubd_attrs_are_visible,
 851};
 852
 853static const struct attribute_group *ubd_attr_groups[] = {
 854        &ubd_attr_group,
 855        NULL,
 856};
 857
 858static int ubd_disk_register(int major, u64 size, int unit,
 859                             struct gendisk *disk)
 860{
 861        disk->major = major;
 862        disk->first_minor = unit << UBD_SHIFT;
 863        disk->minors = 1 << UBD_SHIFT;
 864        disk->fops = &ubd_blops;
 865        set_capacity(disk, size / 512);
 866        sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 867
 868        ubd_devs[unit].pdev.id   = unit;
 869        ubd_devs[unit].pdev.name = DRIVER_NAME;
 870        ubd_devs[unit].pdev.dev.release = ubd_device_release;
 871        dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 872        platform_device_register(&ubd_devs[unit].pdev);
 873
 874        disk->private_data = &ubd_devs[unit];
 875        disk->queue = ubd_devs[unit].queue;
 876        return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
 877}
 878
 879#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 880
 881static const struct blk_mq_ops ubd_mq_ops = {
 882        .queue_rq = ubd_queue_rq,
 883};
 884
 885static int ubd_add(int n, char **error_out)
 886{
 887        struct ubd *ubd_dev = &ubd_devs[n];
 888        struct gendisk *disk;
 889        int err = 0;
 890
 891        if(ubd_dev->file == NULL)
 892                goto out;
 893
 894        err = ubd_file_size(ubd_dev, &ubd_dev->size);
 895        if(err < 0){
 896                *error_out = "Couldn't determine size of device's file";
 897                goto out;
 898        }
 899
 900        ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 901
 902        ubd_dev->tag_set.ops = &ubd_mq_ops;
 903        ubd_dev->tag_set.queue_depth = 64;
 904        ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 905        ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 906        ubd_dev->tag_set.driver_data = ubd_dev;
 907        ubd_dev->tag_set.nr_hw_queues = 1;
 908
 909        err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 910        if (err)
 911                goto out;
 912
 913        disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
 914        if (IS_ERR(disk)) {
 915                err = PTR_ERR(disk);
 916                goto out_cleanup_tags;
 917        }
 918        ubd_dev->queue = disk->queue;
 919
 920        blk_queue_write_cache(ubd_dev->queue, true, false);
 921        blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 922        blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
 923        err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
 924        if (err)
 925                goto out_cleanup_disk;
 926
 927        ubd_gendisk[n] = disk;
 928        return 0;
 929
 930out_cleanup_disk:
 931        blk_cleanup_disk(disk);
 932out_cleanup_tags:
 933        blk_mq_free_tag_set(&ubd_dev->tag_set);
 934out:
 935        return err;
 936}
 937
 938static int ubd_config(char *str, char **error_out)
 939{
 940        int n, ret;
 941
 942        /* This string is possibly broken up and stored, so it's only
 943         * freed if ubd_setup_common fails, or if only general options
 944         * were set.
 945         */
 946        str = kstrdup(str, GFP_KERNEL);
 947        if (str == NULL) {
 948                *error_out = "Failed to allocate memory";
 949                return -ENOMEM;
 950        }
 951
 952        ret = ubd_setup_common(str, &n, error_out);
 953        if (ret)
 954                goto err_free;
 955
 956        if (n == -1) {
 957                ret = 0;
 958                goto err_free;
 959        }
 960
 961        mutex_lock(&ubd_lock);
 962        ret = ubd_add(n, error_out);
 963        if (ret)
 964                ubd_devs[n].file = NULL;
 965        mutex_unlock(&ubd_lock);
 966
 967out:
 968        return ret;
 969
 970err_free:
 971        kfree(str);
 972        goto out;
 973}
 974
 975static int ubd_get_config(char *name, char *str, int size, char **error_out)
 976{
 977        struct ubd *ubd_dev;
 978        int n, len = 0;
 979
 980        n = parse_unit(&name);
 981        if((n >= MAX_DEV) || (n < 0)){
 982                *error_out = "ubd_get_config : device number out of range";
 983                return -1;
 984        }
 985
 986        ubd_dev = &ubd_devs[n];
 987        mutex_lock(&ubd_lock);
 988
 989        if(ubd_dev->file == NULL){
 990                CONFIG_CHUNK(str, size, len, "", 1);
 991                goto out;
 992        }
 993
 994        CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 995
 996        if(ubd_dev->cow.file != NULL){
 997                CONFIG_CHUNK(str, size, len, ",", 0);
 998                CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 999        }
1000        else CONFIG_CHUNK(str, size, len, "", 1);
1001
1002 out:
1003        mutex_unlock(&ubd_lock);
1004        return len;
1005}
1006
1007static int ubd_id(char **str, int *start_out, int *end_out)
1008{
1009        int n;
1010
1011        n = parse_unit(str);
1012        *start_out = 0;
1013        *end_out = MAX_DEV - 1;
1014        return n;
1015}
1016
1017static int ubd_remove(int n, char **error_out)
1018{
1019        struct gendisk *disk = ubd_gendisk[n];
1020        struct ubd *ubd_dev;
1021        int err = -ENODEV;
1022
1023        mutex_lock(&ubd_lock);
1024
1025        ubd_dev = &ubd_devs[n];
1026
1027        if(ubd_dev->file == NULL)
1028                goto out;
1029
1030        /* you cannot remove a open disk */
1031        err = -EBUSY;
1032        if(ubd_dev->count > 0)
1033                goto out;
1034
1035        ubd_gendisk[n] = NULL;
1036        if(disk != NULL){
1037                del_gendisk(disk);
1038                blk_cleanup_disk(disk);
1039        }
1040
1041        err = 0;
1042        platform_device_unregister(&ubd_dev->pdev);
1043out:
1044        mutex_unlock(&ubd_lock);
1045        return err;
1046}
1047
1048/* All these are called by mconsole in process context and without
1049 * ubd-specific locks.  The structure itself is const except for .list.
1050 */
1051static struct mc_device ubd_mc = {
1052        .list           = LIST_HEAD_INIT(ubd_mc.list),
1053        .name           = "ubd",
1054        .config         = ubd_config,
1055        .get_config     = ubd_get_config,
1056        .id             = ubd_id,
1057        .remove         = ubd_remove,
1058};
1059
1060static int __init ubd_mc_init(void)
1061{
1062        mconsole_register_dev(&ubd_mc);
1063        return 0;
1064}
1065
1066__initcall(ubd_mc_init);
1067
1068static int __init ubd0_init(void)
1069{
1070        struct ubd *ubd_dev = &ubd_devs[0];
1071
1072        mutex_lock(&ubd_lock);
1073        if(ubd_dev->file == NULL)
1074                ubd_dev->file = "root_fs";
1075        mutex_unlock(&ubd_lock);
1076
1077        return 0;
1078}
1079
1080__initcall(ubd0_init);
1081
1082/* Used in ubd_init, which is an initcall */
1083static struct platform_driver ubd_driver = {
1084        .driver = {
1085                .name  = DRIVER_NAME,
1086        },
1087};
1088
1089static int __init ubd_init(void)
1090{
1091        char *error;
1092        int i, err;
1093
1094        if (register_blkdev(UBD_MAJOR, "ubd"))
1095                return -1;
1096
1097        irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1098                                       sizeof(struct io_thread_req *),
1099                                       GFP_KERNEL
1100                );
1101        irq_remainder = 0;
1102
1103        if (irq_req_buffer == NULL) {
1104                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1105                return -1;
1106        }
1107        io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1108                                      sizeof(struct io_thread_req *),
1109                                      GFP_KERNEL
1110                );
1111
1112        io_remainder = 0;
1113
1114        if (io_req_buffer == NULL) {
1115                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1116                return -1;
1117        }
1118        platform_driver_register(&ubd_driver);
1119        mutex_lock(&ubd_lock);
1120        for (i = 0; i < MAX_DEV; i++){
1121                err = ubd_add(i, &error);
1122                if(err)
1123                        printk(KERN_ERR "Failed to initialize ubd device %d :"
1124                               "%s\n", i, error);
1125        }
1126        mutex_unlock(&ubd_lock);
1127        return 0;
1128}
1129
1130late_initcall(ubd_init);
1131
1132static int __init ubd_driver_init(void){
1133        unsigned long stack;
1134        int err;
1135
1136        /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1137        if(global_openflags.s){
1138                printk(KERN_INFO "ubd: Synchronous mode\n");
1139                /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1140                 * enough. So use anyway the io thread. */
1141        }
1142        stack = alloc_stack(0, 0);
1143        io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1144        if(io_pid < 0){
1145                printk(KERN_ERR
1146                       "ubd : Failed to start I/O thread (errno = %d) - "
1147                       "falling back to synchronous I/O\n", -io_pid);
1148                io_pid = -1;
1149                return 0;
1150        }
1151        err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1152                             0, "ubd", ubd_devs);
1153        if(err < 0)
1154                printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1155        return 0;
1156}
1157
1158device_initcall(ubd_driver_init);
1159
1160static int ubd_open(struct block_device *bdev, fmode_t mode)
1161{
1162        struct gendisk *disk = bdev->bd_disk;
1163        struct ubd *ubd_dev = disk->private_data;
1164        int err = 0;
1165
1166        mutex_lock(&ubd_mutex);
1167        if(ubd_dev->count == 0){
1168                err = ubd_open_dev(ubd_dev);
1169                if(err){
1170                        printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1171                               disk->disk_name, ubd_dev->file, -err);
1172                        goto out;
1173                }
1174        }
1175        ubd_dev->count++;
1176        set_disk_ro(disk, !ubd_dev->openflags.w);
1177
1178        /* This should no more be needed. And it didn't work anyway to exclude
1179         * read-write remounting of filesystems.*/
1180        /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1181                if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1182                err = -EROFS;
1183        }*/
1184out:
1185        mutex_unlock(&ubd_mutex);
1186        return err;
1187}
1188
1189static void ubd_release(struct gendisk *disk, fmode_t mode)
1190{
1191        struct ubd *ubd_dev = disk->private_data;
1192
1193        mutex_lock(&ubd_mutex);
1194        if(--ubd_dev->count == 0)
1195                ubd_close_dev(ubd_dev);
1196        mutex_unlock(&ubd_mutex);
1197}
1198
1199static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1200                          __u64 *cow_offset, unsigned long *bitmap,
1201                          __u64 bitmap_offset, unsigned long *bitmap_words,
1202                          __u64 bitmap_len)
1203{
1204        __u64 sector = io_offset >> SECTOR_SHIFT;
1205        int i, update_bitmap = 0;
1206
1207        for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1208                if(cow_mask != NULL)
1209                        ubd_set_bit(i, (unsigned char *) cow_mask);
1210                if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1211                        continue;
1212
1213                update_bitmap = 1;
1214                ubd_set_bit(sector + i, (unsigned char *) bitmap);
1215        }
1216
1217        if(!update_bitmap)
1218                return;
1219
1220        *cow_offset = sector / (sizeof(unsigned long) * 8);
1221
1222        /* This takes care of the case where we're exactly at the end of the
1223         * device, and *cow_offset + 1 is off the end.  So, just back it up
1224         * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1225         * for the original diagnosis.
1226         */
1227        if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1228                                         sizeof(unsigned long)) - 1))
1229                (*cow_offset)--;
1230
1231        bitmap_words[0] = bitmap[*cow_offset];
1232        bitmap_words[1] = bitmap[*cow_offset + 1];
1233
1234        *cow_offset *= sizeof(unsigned long);
1235        *cow_offset += bitmap_offset;
1236}
1237
1238static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1239                       unsigned long offset, unsigned long *bitmap,
1240                       __u64 bitmap_offset, __u64 bitmap_len)
1241{
1242        __u64 sector = offset >> SECTOR_SHIFT;
1243        int i;
1244
1245        if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1246                panic("Operation too long");
1247
1248        if (req_op(req->req) == REQ_OP_READ) {
1249                for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1250                        if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1251                                ubd_set_bit(i, (unsigned char *)
1252                                            &segment->sector_mask);
1253                }
1254        } else {
1255                cowify_bitmap(offset, segment->length, &segment->sector_mask,
1256                              &segment->cow_offset, bitmap, bitmap_offset,
1257                              segment->bitmap_words, bitmap_len);
1258        }
1259}
1260
1261static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1262                        struct request *req)
1263{
1264        struct bio_vec bvec;
1265        struct req_iterator iter;
1266        int i = 0;
1267        unsigned long byte_offset = io_req->offset;
1268        int op = req_op(req);
1269
1270        if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1271                io_req->io_desc[0].buffer = NULL;
1272                io_req->io_desc[0].length = blk_rq_bytes(req);
1273        } else {
1274                rq_for_each_segment(bvec, req, iter) {
1275                        BUG_ON(i >= io_req->desc_cnt);
1276
1277                        io_req->io_desc[i].buffer = bvec_virt(&bvec);
1278                        io_req->io_desc[i].length = bvec.bv_len;
1279                        i++;
1280                }
1281        }
1282
1283        if (dev->cow.file) {
1284                for (i = 0; i < io_req->desc_cnt; i++) {
1285                        cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1286                                   dev->cow.bitmap, dev->cow.bitmap_offset,
1287                                   dev->cow.bitmap_len);
1288                        byte_offset += io_req->io_desc[i].length;
1289                }
1290
1291        }
1292}
1293
1294static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1295                                           int desc_cnt)
1296{
1297        struct io_thread_req *io_req;
1298        int i;
1299
1300        io_req = kmalloc(sizeof(*io_req) +
1301                         (desc_cnt * sizeof(struct io_desc)),
1302                         GFP_ATOMIC);
1303        if (!io_req)
1304                return NULL;
1305
1306        io_req->req = req;
1307        if (dev->cow.file)
1308                io_req->fds[0] = dev->cow.fd;
1309        else
1310                io_req->fds[0] = dev->fd;
1311        io_req->error = 0;
1312        io_req->sectorsize = SECTOR_SIZE;
1313        io_req->fds[1] = dev->fd;
1314        io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1315        io_req->offsets[0] = 0;
1316        io_req->offsets[1] = dev->cow.data_offset;
1317
1318        for (i = 0 ; i < desc_cnt; i++) {
1319                io_req->io_desc[i].sector_mask = 0;
1320                io_req->io_desc[i].cow_offset = -1;
1321        }
1322
1323        return io_req;
1324}
1325
1326static int ubd_submit_request(struct ubd *dev, struct request *req)
1327{
1328        int segs = 0;
1329        struct io_thread_req *io_req;
1330        int ret;
1331        int op = req_op(req);
1332
1333        if (op == REQ_OP_FLUSH)
1334                segs = 0;
1335        else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1336                segs = 1;
1337        else
1338                segs = blk_rq_nr_phys_segments(req);
1339
1340        io_req = ubd_alloc_req(dev, req, segs);
1341        if (!io_req)
1342                return -ENOMEM;
1343
1344        io_req->desc_cnt = segs;
1345        if (segs)
1346                ubd_map_req(dev, io_req, req);
1347
1348        ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1349        if (ret != sizeof(io_req)) {
1350                if (ret != -EAGAIN)
1351                        pr_err("write to io thread failed: %d\n", -ret);
1352                kfree(io_req);
1353        }
1354        return ret;
1355}
1356
1357static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1358                                 const struct blk_mq_queue_data *bd)
1359{
1360        struct ubd *ubd_dev = hctx->queue->queuedata;
1361        struct request *req = bd->rq;
1362        int ret = 0, res = BLK_STS_OK;
1363
1364        blk_mq_start_request(req);
1365
1366        spin_lock_irq(&ubd_dev->lock);
1367
1368        switch (req_op(req)) {
1369        case REQ_OP_FLUSH:
1370        case REQ_OP_READ:
1371        case REQ_OP_WRITE:
1372        case REQ_OP_DISCARD:
1373        case REQ_OP_WRITE_ZEROES:
1374                ret = ubd_submit_request(ubd_dev, req);
1375                break;
1376        default:
1377                WARN_ON_ONCE(1);
1378                res = BLK_STS_NOTSUPP;
1379        }
1380
1381        spin_unlock_irq(&ubd_dev->lock);
1382
1383        if (ret < 0) {
1384                if (ret == -ENOMEM)
1385                        res = BLK_STS_RESOURCE;
1386                else
1387                        res = BLK_STS_DEV_RESOURCE;
1388        }
1389
1390        return res;
1391}
1392
1393static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1394{
1395        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1396
1397        geo->heads = 128;
1398        geo->sectors = 32;
1399        geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1400        return 0;
1401}
1402
1403static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1404                     unsigned int cmd, unsigned long arg)
1405{
1406        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1407        u16 ubd_id[ATA_ID_WORDS];
1408
1409        switch (cmd) {
1410                struct cdrom_volctrl volume;
1411        case HDIO_GET_IDENTITY:
1412                memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1413                ubd_id[ATA_ID_CYLS]     = ubd_dev->size / (128 * 32 * 512);
1414                ubd_id[ATA_ID_HEADS]    = 128;
1415                ubd_id[ATA_ID_SECTORS]  = 32;
1416                if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1417                                 sizeof(ubd_id)))
1418                        return -EFAULT;
1419                return 0;
1420
1421        case CDROMVOLREAD:
1422                if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1423                        return -EFAULT;
1424                volume.channel0 = 255;
1425                volume.channel1 = 255;
1426                volume.channel2 = 255;
1427                volume.channel3 = 255;
1428                if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1429                        return -EFAULT;
1430                return 0;
1431        }
1432        return -EINVAL;
1433}
1434
1435static int map_error(int error_code)
1436{
1437        switch (error_code) {
1438        case 0:
1439                return BLK_STS_OK;
1440        case ENOSYS:
1441        case EOPNOTSUPP:
1442                return BLK_STS_NOTSUPP;
1443        case ENOSPC:
1444                return BLK_STS_NOSPC;
1445        }
1446        return BLK_STS_IOERR;
1447}
1448
1449/*
1450 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1451 *
1452 * The following functions are part of UML hypervisor code.
1453 * All functions from here onwards are executed as a helper
1454 * thread and are not allowed to execute any kernel functions.
1455 *
1456 * Any communication must occur strictly via shared memory and IPC.
1457 *
1458 * Do not add printks, locks, kernel memory operations, etc - it
1459 * will result in unpredictable behaviour and/or crashes.
1460 */
1461
1462static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1463{
1464        int n;
1465
1466        if (segment->cow_offset == -1)
1467                return map_error(0);
1468
1469        n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1470                          sizeof(segment->bitmap_words), segment->cow_offset);
1471        if (n != sizeof(segment->bitmap_words))
1472                return map_error(-n);
1473
1474        return map_error(0);
1475}
1476
1477static void do_io(struct io_thread_req *req, struct io_desc *desc)
1478{
1479        char *buf = NULL;
1480        unsigned long len;
1481        int n, nsectors, start, end, bit;
1482        __u64 off;
1483
1484        /* FLUSH is really a special case, we cannot "case" it with others */
1485
1486        if (req_op(req->req) == REQ_OP_FLUSH) {
1487                /* fds[0] is always either the rw image or our cow file */
1488                req->error = map_error(-os_sync_file(req->fds[0]));
1489                return;
1490        }
1491
1492        nsectors = desc->length / req->sectorsize;
1493        start = 0;
1494        do {
1495                bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1496                end = start;
1497                while((end < nsectors) &&
1498                      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1499                        end++;
1500
1501                off = req->offset + req->offsets[bit] +
1502                        start * req->sectorsize;
1503                len = (end - start) * req->sectorsize;
1504                if (desc->buffer != NULL)
1505                        buf = &desc->buffer[start * req->sectorsize];
1506
1507                switch (req_op(req->req)) {
1508                case REQ_OP_READ:
1509                        n = 0;
1510                        do {
1511                                buf = &buf[n];
1512                                len -= n;
1513                                n = os_pread_file(req->fds[bit], buf, len, off);
1514                                if (n < 0) {
1515                                        req->error = map_error(-n);
1516                                        return;
1517                                }
1518                        } while((n < len) && (n != 0));
1519                        if (n < len) memset(&buf[n], 0, len - n);
1520                        break;
1521                case REQ_OP_WRITE:
1522                        n = os_pwrite_file(req->fds[bit], buf, len, off);
1523                        if(n != len){
1524                                req->error = map_error(-n);
1525                                return;
1526                        }
1527                        break;
1528                case REQ_OP_DISCARD:
1529                        n = os_falloc_punch(req->fds[bit], off, len);
1530                        if (n) {
1531                                req->error = map_error(-n);
1532                                return;
1533                        }
1534                        break;
1535                case REQ_OP_WRITE_ZEROES:
1536                        n = os_falloc_zeroes(req->fds[bit], off, len);
1537                        if (n) {
1538                                req->error = map_error(-n);
1539                                return;
1540                        }
1541                        break;
1542                default:
1543                        WARN_ON_ONCE(1);
1544                        req->error = BLK_STS_NOTSUPP;
1545                        return;
1546                }
1547
1548                start = end;
1549        } while(start < nsectors);
1550
1551        req->offset += len;
1552        req->error = update_bitmap(req, desc);
1553}
1554
1555/* Changed in start_io_thread, which is serialized by being called only
1556 * from ubd_init, which is an initcall.
1557 */
1558int kernel_fd = -1;
1559
1560/* Only changed by the io thread. XXX: currently unused. */
1561static int io_count = 0;
1562
1563int io_thread(void *arg)
1564{
1565        int n, count, written, res;
1566
1567        os_fix_helper_signals();
1568
1569        while(1){
1570                n = bulk_req_safe_read(
1571                        kernel_fd,
1572                        io_req_buffer,
1573                        &io_remainder,
1574                        &io_remainder_size,
1575                        UBD_REQ_BUFFER_SIZE
1576                );
1577                if (n <= 0) {
1578                        if (n == -EAGAIN)
1579                                ubd_read_poll(-1);
1580
1581                        continue;
1582                }
1583
1584                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1585                        struct io_thread_req *req = (*io_req_buffer)[count];
1586                        int i;
1587
1588                        io_count++;
1589                        for (i = 0; !req->error && i < req->desc_cnt; i++)
1590                                do_io(req, &(req->io_desc[i]));
1591
1592                }
1593
1594                written = 0;
1595
1596                do {
1597                        res = os_write_file(kernel_fd,
1598                                            ((char *) io_req_buffer) + written,
1599                                            n - written);
1600                        if (res >= 0) {
1601                                written += res;
1602                        }
1603                        if (written < n) {
1604                                ubd_write_poll(-1);
1605                        }
1606                } while (written < n);
1607        }
1608
1609        return 0;
1610}
1611