linux/arch/um/drivers/ubd_kern.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018 Cambridge Greys Ltd
   4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   6 */
   7
   8/* 2001-09-28...2002-04-17
   9 * Partition stuff by James_McMechan@hotmail.com
  10 * old style ubd by setting UBD_SHIFT to 0
  11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  12 * partitions have changed in 2.5
  13 * 2003-01-29 more tinkering for 2.5.59-1
  14 * This should now address the sysfs problems and has
  15 * the symlink for devfs to allow for booting with
  16 * the common /dev/ubd/discX/... names rather than
  17 * only /dev/ubdN/discN this version also has lots of
  18 * clean ups preparing for ubd-many.
  19 * James McMechan
  20 */
  21
  22#define UBD_SHIFT 4
  23
  24#include <linux/module.h>
  25#include <linux/init.h>
  26#include <linux/blkdev.h>
  27#include <linux/blk-mq.h>
  28#include <linux/ata.h>
  29#include <linux/hdreg.h>
  30#include <linux/cdrom.h>
  31#include <linux/proc_fs.h>
  32#include <linux/seq_file.h>
  33#include <linux/ctype.h>
  34#include <linux/slab.h>
  35#include <linux/vmalloc.h>
  36#include <linux/platform_device.h>
  37#include <linux/scatterlist.h>
  38#include <asm/tlbflush.h>
  39#include <kern_util.h>
  40#include "mconsole_kern.h"
  41#include <init.h>
  42#include <irq_kern.h>
  43#include "ubd.h"
  44#include <os.h>
  45#include "cow.h"
  46
  47/* Max request size is determined by sector mask - 32K */
  48#define UBD_MAX_REQUEST (8 * sizeof(long))
  49
  50struct io_desc {
  51        char *buffer;
  52        unsigned long length;
  53        unsigned long sector_mask;
  54        unsigned long long cow_offset;
  55        unsigned long bitmap_words[2];
  56};
  57
  58struct io_thread_req {
  59        struct request *req;
  60        int fds[2];
  61        unsigned long offsets[2];
  62        unsigned long long offset;
  63        int sectorsize;
  64        int error;
  65
  66        int desc_cnt;
  67        /* io_desc has to be the last element of the struct */
  68        struct io_desc io_desc[];
  69};
  70
  71
  72static struct io_thread_req * (*irq_req_buffer)[];
  73static struct io_thread_req *irq_remainder;
  74static int irq_remainder_size;
  75
  76static struct io_thread_req * (*io_req_buffer)[];
  77static struct io_thread_req *io_remainder;
  78static int io_remainder_size;
  79
  80
  81
  82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  83{
  84        __u64 n;
  85        int bits, off;
  86
  87        bits = sizeof(data[0]) * 8;
  88        n = bit / bits;
  89        off = bit % bits;
  90        return (data[n] & (1 << off)) != 0;
  91}
  92
  93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  94{
  95        __u64 n;
  96        int bits, off;
  97
  98        bits = sizeof(data[0]) * 8;
  99        n = bit / bits;
 100        off = bit % bits;
 101        data[n] |= (1 << off);
 102}
 103/*End stuff from ubd_user.h*/
 104
 105#define DRIVER_NAME "uml-blkdev"
 106
 107static DEFINE_MUTEX(ubd_lock);
 108static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 109
 110static int ubd_open(struct block_device *bdev, fmode_t mode);
 111static void ubd_release(struct gendisk *disk, fmode_t mode);
 112static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 113                     unsigned int cmd, unsigned long arg);
 114static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 115
 116#define MAX_DEV (16)
 117
 118static const struct block_device_operations ubd_blops = {
 119        .owner          = THIS_MODULE,
 120        .open           = ubd_open,
 121        .release        = ubd_release,
 122        .ioctl          = ubd_ioctl,
 123        .compat_ioctl   = blkdev_compat_ptr_ioctl,
 124        .getgeo         = ubd_getgeo,
 125};
 126
 127/* Protected by ubd_lock */
 128static struct gendisk *ubd_gendisk[MAX_DEV];
 129
 130#ifdef CONFIG_BLK_DEV_UBD_SYNC
 131#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 132                                         .cl = 1 })
 133#else
 134#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 135                                         .cl = 1 })
 136#endif
 137static struct openflags global_openflags = OPEN_FLAGS;
 138
 139struct cow {
 140        /* backing file name */
 141        char *file;
 142        /* backing file fd */
 143        int fd;
 144        unsigned long *bitmap;
 145        unsigned long bitmap_len;
 146        int bitmap_offset;
 147        int data_offset;
 148};
 149
 150#define MAX_SG 64
 151
 152struct ubd {
 153        /* name (and fd, below) of the file opened for writing, either the
 154         * backing or the cow file. */
 155        char *file;
 156        char *serial;
 157        int count;
 158        int fd;
 159        __u64 size;
 160        struct openflags boot_openflags;
 161        struct openflags openflags;
 162        unsigned shared:1;
 163        unsigned no_cow:1;
 164        unsigned no_trim:1;
 165        struct cow cow;
 166        struct platform_device pdev;
 167        struct request_queue *queue;
 168        struct blk_mq_tag_set tag_set;
 169        spinlock_t lock;
 170};
 171
 172#define DEFAULT_COW { \
 173        .file =                 NULL, \
 174        .fd =                   -1,     \
 175        .bitmap =               NULL, \
 176        .bitmap_offset =        0, \
 177        .data_offset =          0, \
 178}
 179
 180#define DEFAULT_UBD { \
 181        .file =                 NULL, \
 182        .serial =               NULL, \
 183        .count =                0, \
 184        .fd =                   -1, \
 185        .size =                 -1, \
 186        .boot_openflags =       OPEN_FLAGS, \
 187        .openflags =            OPEN_FLAGS, \
 188        .no_cow =               0, \
 189        .no_trim =              0, \
 190        .shared =               0, \
 191        .cow =                  DEFAULT_COW, \
 192        .lock =                 __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 193}
 194
 195/* Protected by ubd_lock */
 196static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 197
 198static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
 199                                 const struct blk_mq_queue_data *bd);
 200
 201static int fake_ide_setup(char *str)
 202{
 203        pr_warn("The fake_ide option has been removed\n");
 204        return 1;
 205}
 206__setup("fake_ide", fake_ide_setup);
 207
 208__uml_help(fake_ide_setup,
 209"fake_ide\n"
 210"    Obsolete stub.\n\n"
 211);
 212
 213static int parse_unit(char **ptr)
 214{
 215        char *str = *ptr, *end;
 216        int n = -1;
 217
 218        if(isdigit(*str)) {
 219                n = simple_strtoul(str, &end, 0);
 220                if(end == str)
 221                        return -1;
 222                *ptr = end;
 223        }
 224        else if (('a' <= *str) && (*str <= 'z')) {
 225                n = *str - 'a';
 226                str++;
 227                *ptr = str;
 228        }
 229        return n;
 230}
 231
 232/* If *index_out == -1 at exit, the passed option was a general one;
 233 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 234 * should not be freed on exit.
 235 */
 236static int ubd_setup_common(char *str, int *index_out, char **error_out)
 237{
 238        struct ubd *ubd_dev;
 239        struct openflags flags = global_openflags;
 240        char *file, *backing_file, *serial;
 241        int n, err = 0, i;
 242
 243        if(index_out) *index_out = -1;
 244        n = *str;
 245        if(n == '='){
 246                str++;
 247                if(!strcmp(str, "sync")){
 248                        global_openflags = of_sync(global_openflags);
 249                        return err;
 250                }
 251
 252                pr_warn("fake major not supported any more\n");
 253                return 0;
 254        }
 255
 256        n = parse_unit(&str);
 257        if(n < 0){
 258                *error_out = "Couldn't parse device number";
 259                return -EINVAL;
 260        }
 261        if(n >= MAX_DEV){
 262                *error_out = "Device number out of range";
 263                return 1;
 264        }
 265
 266        err = -EBUSY;
 267        mutex_lock(&ubd_lock);
 268
 269        ubd_dev = &ubd_devs[n];
 270        if(ubd_dev->file != NULL){
 271                *error_out = "Device is already configured";
 272                goto out;
 273        }
 274
 275        if (index_out)
 276                *index_out = n;
 277
 278        err = -EINVAL;
 279        for (i = 0; i < sizeof("rscdt="); i++) {
 280                switch (*str) {
 281                case 'r':
 282                        flags.w = 0;
 283                        break;
 284                case 's':
 285                        flags.s = 1;
 286                        break;
 287                case 'd':
 288                        ubd_dev->no_cow = 1;
 289                        break;
 290                case 'c':
 291                        ubd_dev->shared = 1;
 292                        break;
 293                case 't':
 294                        ubd_dev->no_trim = 1;
 295                        break;
 296                case '=':
 297                        str++;
 298                        goto break_loop;
 299                default:
 300                        *error_out = "Expected '=' or flag letter "
 301                                "(r, s, c, t or d)";
 302                        goto out;
 303                }
 304                str++;
 305        }
 306
 307        if (*str == '=')
 308                *error_out = "Too many flags specified";
 309        else
 310                *error_out = "Missing '='";
 311        goto out;
 312
 313break_loop:
 314        file = strsep(&str, ",:");
 315        if (*file == '\0')
 316                file = NULL;
 317
 318        backing_file = strsep(&str, ",:");
 319        if (backing_file && *backing_file == '\0')
 320                backing_file = NULL;
 321
 322        serial = strsep(&str, ",:");
 323        if (serial && *serial == '\0')
 324                serial = NULL;
 325
 326        if (backing_file && ubd_dev->no_cow) {
 327                *error_out = "Can't specify both 'd' and a cow file";
 328                goto out;
 329        }
 330
 331        err = 0;
 332        ubd_dev->file = file;
 333        ubd_dev->cow.file = backing_file;
 334        ubd_dev->serial = serial;
 335        ubd_dev->boot_openflags = flags;
 336out:
 337        mutex_unlock(&ubd_lock);
 338        return err;
 339}
 340
 341static int ubd_setup(char *str)
 342{
 343        char *error;
 344        int err;
 345
 346        err = ubd_setup_common(str, NULL, &error);
 347        if(err)
 348                printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 349                       "%s\n", str, error);
 350        return 1;
 351}
 352
 353__setup("ubd", ubd_setup);
 354__uml_help(ubd_setup,
 355"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
 356"    This is used to associate a device with a file in the underlying\n"
 357"    filesystem. When specifying two filenames, the first one is the\n"
 358"    COW name and the second is the backing file name. As separator you can\n"
 359"    use either a ':' or a ',': the first one allows writing things like;\n"
 360"       ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 361"    while with a ',' the shell would not expand the 2nd '~'.\n"
 362"    When using only one filename, UML will detect whether to treat it like\n"
 363"    a COW file or a backing file. To override this detection, add the 'd'\n"
 364"    flag:\n"
 365"       ubd0d=BackingFile\n"
 366"    Usually, there is a filesystem in the file, but \n"
 367"    that's not required. Swap devices containing swap files can be\n"
 368"    specified like this. Also, a file which doesn't contain a\n"
 369"    filesystem can have its contents read in the virtual \n"
 370"    machine by running 'dd' on the device. <n> must be in the range\n"
 371"    0 to 7. Appending an 'r' to the number will cause that device\n"
 372"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 373"    an 's' will cause data to be written to disk on the host immediately.\n"
 374"    'c' will cause the device to be treated as being shared between multiple\n"
 375"    UMLs and file locking will be turned off - this is appropriate for a\n"
 376"    cluster filesystem and inappropriate at almost all other times.\n\n"
 377"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 378"    An optional device serial number can be exposed using the serial parameter\n"
 379"    on the cmdline which is exposed as a sysfs entry. This is particularly\n"
 380"    useful when a unique number should be given to the device. Note when\n"
 381"    specifying a label, the filename2 must be also presented. It can be\n"
 382"    an empty string, in which case the backing file is not used:\n"
 383"       ubd0=File,,Serial\n"
 384);
 385
 386static int udb_setup(char *str)
 387{
 388        printk("udb%s specified on command line is almost certainly a ubd -> "
 389               "udb TYPO\n", str);
 390        return 1;
 391}
 392
 393__setup("udb", udb_setup);
 394__uml_help(udb_setup,
 395"udb\n"
 396"    This option is here solely to catch ubd -> udb typos, which can be\n"
 397"    to impossible to catch visually unless you specifically look for\n"
 398"    them.  The only result of any option starting with 'udb' is an error\n"
 399"    in the boot output.\n\n"
 400);
 401
 402/* Only changed by ubd_init, which is an initcall. */
 403static int thread_fd = -1;
 404
 405/* Function to read several request pointers at a time
 406* handling fractional reads if (and as) needed
 407*/
 408
 409static int bulk_req_safe_read(
 410        int fd,
 411        struct io_thread_req * (*request_buffer)[],
 412        struct io_thread_req **remainder,
 413        int *remainder_size,
 414        int max_recs
 415        )
 416{
 417        int n = 0;
 418        int res = 0;
 419
 420        if (*remainder_size > 0) {
 421                memmove(
 422                        (char *) request_buffer,
 423                        (char *) remainder, *remainder_size
 424                );
 425                n = *remainder_size;
 426        }
 427
 428        res = os_read_file(
 429                        fd,
 430                        ((char *) request_buffer) + *remainder_size,
 431                        sizeof(struct io_thread_req *)*max_recs
 432                                - *remainder_size
 433                );
 434        if (res > 0) {
 435                n += res;
 436                if ((n % sizeof(struct io_thread_req *)) > 0) {
 437                        /*
 438                        * Read somehow returned not a multiple of dword
 439                        * theoretically possible, but never observed in the
 440                        * wild, so read routine must be able to handle it
 441                        */
 442                        *remainder_size = n % sizeof(struct io_thread_req *);
 443                        WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 444                        memmove(
 445                                remainder,
 446                                ((char *) request_buffer) +
 447                                        (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 448                                *remainder_size
 449                        );
 450                        n = n - *remainder_size;
 451                }
 452        } else {
 453                n = res;
 454        }
 455        return n;
 456}
 457
 458/* Called without dev->lock held, and only in interrupt context. */
 459static void ubd_handler(void)
 460{
 461        int n;
 462        int count;
 463
 464        while(1){
 465                n = bulk_req_safe_read(
 466                        thread_fd,
 467                        irq_req_buffer,
 468                        &irq_remainder,
 469                        &irq_remainder_size,
 470                        UBD_REQ_BUFFER_SIZE
 471                );
 472                if (n < 0) {
 473                        if(n == -EAGAIN)
 474                                break;
 475                        printk(KERN_ERR "spurious interrupt in ubd_handler, "
 476                               "err = %d\n", -n);
 477                        return;
 478                }
 479                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 480                        struct io_thread_req *io_req = (*irq_req_buffer)[count];
 481
 482                        if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
 483                                blk_queue_max_discard_sectors(io_req->req->q, 0);
 484                                blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
 485                                blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
 486                        }
 487                        blk_mq_end_request(io_req->req, io_req->error);
 488                        kfree(io_req);
 489                }
 490        }
 491}
 492
 493static irqreturn_t ubd_intr(int irq, void *dev)
 494{
 495        ubd_handler();
 496        return IRQ_HANDLED;
 497}
 498
 499/* Only changed by ubd_init, which is an initcall. */
 500static int io_pid = -1;
 501
 502static void kill_io_thread(void)
 503{
 504        if(io_pid != -1)
 505                os_kill_process(io_pid, 1);
 506}
 507
 508__uml_exitcall(kill_io_thread);
 509
 510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 511{
 512        char *file;
 513        int fd;
 514        int err;
 515
 516        __u32 version;
 517        __u32 align;
 518        char *backing_file;
 519        time64_t mtime;
 520        unsigned long long size;
 521        int sector_size;
 522        int bitmap_offset;
 523
 524        if (ubd_dev->file && ubd_dev->cow.file) {
 525                file = ubd_dev->cow.file;
 526
 527                goto out;
 528        }
 529
 530        fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 531        if (fd < 0)
 532                return fd;
 533
 534        err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 535                &mtime, &size, &sector_size, &align, &bitmap_offset);
 536        os_close_file(fd);
 537
 538        if(err == -EINVAL)
 539                file = ubd_dev->file;
 540        else
 541                file = backing_file;
 542
 543out:
 544        return os_file_size(file, size_out);
 545}
 546
 547static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 548{
 549        int err;
 550
 551        err = os_pread_file(fd, buf, len, offset);
 552        if (err < 0)
 553                return err;
 554
 555        return 0;
 556}
 557
 558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
 559{
 560        time64_t modtime;
 561        unsigned long long actual;
 562        int err;
 563
 564        err = os_file_modtime(file, &modtime);
 565        if (err < 0) {
 566                printk(KERN_ERR "Failed to get modification time of backing "
 567                       "file \"%s\", err = %d\n", file, -err);
 568                return err;
 569        }
 570
 571        err = os_file_size(file, &actual);
 572        if (err < 0) {
 573                printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 574                       "err = %d\n", file, -err);
 575                return err;
 576        }
 577
 578        if (actual != size) {
 579                /*__u64 can be a long on AMD64 and with %lu GCC complains; so
 580                 * the typecast.*/
 581                printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 582                       "vs backing file\n", (unsigned long long) size, actual);
 583                return -EINVAL;
 584        }
 585        if (modtime != mtime) {
 586                printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
 587                       "backing file\n", mtime, modtime);
 588                return -EINVAL;
 589        }
 590        return 0;
 591}
 592
 593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 594{
 595        struct uml_stat buf1, buf2;
 596        int err;
 597
 598        if (from_cmdline == NULL)
 599                return 0;
 600        if (!strcmp(from_cmdline, from_cow))
 601                return 0;
 602
 603        err = os_stat_file(from_cmdline, &buf1);
 604        if (err < 0) {
 605                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 606                       -err);
 607                return 0;
 608        }
 609        err = os_stat_file(from_cow, &buf2);
 610        if (err < 0) {
 611                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 612                       -err);
 613                return 1;
 614        }
 615        if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 616                return 0;
 617
 618        printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 619               "\"%s\" specified in COW header of \"%s\"\n",
 620               from_cmdline, from_cow, cow);
 621        return 1;
 622}
 623
 624static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 625                  char **backing_file_out, int *bitmap_offset_out,
 626                  unsigned long *bitmap_len_out, int *data_offset_out,
 627                  int *create_cow_out)
 628{
 629        time64_t mtime;
 630        unsigned long long size;
 631        __u32 version, align;
 632        char *backing_file;
 633        int fd, err, sectorsize, asked_switch, mode = 0644;
 634
 635        fd = os_open_file(file, *openflags, mode);
 636        if (fd < 0) {
 637                if ((fd == -ENOENT) && (create_cow_out != NULL))
 638                        *create_cow_out = 1;
 639                if (!openflags->w ||
 640                    ((fd != -EROFS) && (fd != -EACCES)))
 641                        return fd;
 642                openflags->w = 0;
 643                fd = os_open_file(file, *openflags, mode);
 644                if (fd < 0)
 645                        return fd;
 646        }
 647
 648        if (shared)
 649                printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 650        else {
 651                err = os_lock_file(fd, openflags->w);
 652                if (err < 0) {
 653                        printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 654                               file, -err);
 655                        goto out_close;
 656                }
 657        }
 658
 659        /* Successful return case! */
 660        if (backing_file_out == NULL)
 661                return fd;
 662
 663        err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 664                              &size, &sectorsize, &align, bitmap_offset_out);
 665        if (err && (*backing_file_out != NULL)) {
 666                printk(KERN_ERR "Failed to read COW header from COW file "
 667                       "\"%s\", errno = %d\n", file, -err);
 668                goto out_close;
 669        }
 670        if (err)
 671                return fd;
 672
 673        asked_switch = path_requires_switch(*backing_file_out, backing_file,
 674                                            file);
 675
 676        /* Allow switching only if no mismatch. */
 677        if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 678                                                   mtime)) {
 679                printk(KERN_ERR "Switching backing file to '%s'\n",
 680                       *backing_file_out);
 681                err = write_cow_header(file, fd, *backing_file_out,
 682                                       sectorsize, align, &size);
 683                if (err) {
 684                        printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 685                        goto out_close;
 686                }
 687        } else {
 688                *backing_file_out = backing_file;
 689                err = backing_file_mismatch(*backing_file_out, size, mtime);
 690                if (err)
 691                        goto out_close;
 692        }
 693
 694        cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 695                  bitmap_len_out, data_offset_out);
 696
 697        return fd;
 698 out_close:
 699        os_close_file(fd);
 700        return err;
 701}
 702
 703static int create_cow_file(char *cow_file, char *backing_file,
 704                    struct openflags flags,
 705                    int sectorsize, int alignment, int *bitmap_offset_out,
 706                    unsigned long *bitmap_len_out, int *data_offset_out)
 707{
 708        int err, fd;
 709
 710        flags.c = 1;
 711        fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 712        if (fd < 0) {
 713                err = fd;
 714                printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 715                       cow_file, -err);
 716                goto out;
 717        }
 718
 719        err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 720                            bitmap_offset_out, bitmap_len_out,
 721                            data_offset_out);
 722        if (!err)
 723                return fd;
 724        os_close_file(fd);
 725 out:
 726        return err;
 727}
 728
 729static void ubd_close_dev(struct ubd *ubd_dev)
 730{
 731        os_close_file(ubd_dev->fd);
 732        if(ubd_dev->cow.file == NULL)
 733                return;
 734
 735        os_close_file(ubd_dev->cow.fd);
 736        vfree(ubd_dev->cow.bitmap);
 737        ubd_dev->cow.bitmap = NULL;
 738}
 739
 740static int ubd_open_dev(struct ubd *ubd_dev)
 741{
 742        struct openflags flags;
 743        char **back_ptr;
 744        int err, create_cow, *create_ptr;
 745        int fd;
 746
 747        ubd_dev->openflags = ubd_dev->boot_openflags;
 748        create_cow = 0;
 749        create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 750        back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 751
 752        fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 753                                back_ptr, &ubd_dev->cow.bitmap_offset,
 754                                &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 755                                create_ptr);
 756
 757        if((fd == -ENOENT) && create_cow){
 758                fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 759                                          ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
 760                                          &ubd_dev->cow.bitmap_offset,
 761                                          &ubd_dev->cow.bitmap_len,
 762                                          &ubd_dev->cow.data_offset);
 763                if(fd >= 0){
 764                        printk(KERN_INFO "Creating \"%s\" as COW file for "
 765                               "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 766                }
 767        }
 768
 769        if(fd < 0){
 770                printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 771                       -fd);
 772                return fd;
 773        }
 774        ubd_dev->fd = fd;
 775
 776        if(ubd_dev->cow.file != NULL){
 777                blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 778
 779                err = -ENOMEM;
 780                ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 781                if(ubd_dev->cow.bitmap == NULL){
 782                        printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 783                        goto error;
 784                }
 785                flush_tlb_kernel_vm();
 786
 787                err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 788                                      ubd_dev->cow.bitmap_offset,
 789                                      ubd_dev->cow.bitmap_len);
 790                if(err < 0)
 791                        goto error;
 792
 793                flags = ubd_dev->openflags;
 794                flags.w = 0;
 795                err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 796                                    NULL, NULL, NULL, NULL);
 797                if(err < 0) goto error;
 798                ubd_dev->cow.fd = err;
 799        }
 800        if (ubd_dev->no_trim == 0) {
 801                ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
 802                ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
 803                blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 804                blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
 805                blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
 806        }
 807        blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
 808        return 0;
 809 error:
 810        os_close_file(ubd_dev->fd);
 811        return err;
 812}
 813
 814static void ubd_device_release(struct device *dev)
 815{
 816        struct ubd *ubd_dev = dev_get_drvdata(dev);
 817
 818        blk_mq_free_tag_set(&ubd_dev->tag_set);
 819        *ubd_dev = ((struct ubd) DEFAULT_UBD);
 820}
 821
 822static ssize_t serial_show(struct device *dev,
 823                           struct device_attribute *attr, char *buf)
 824{
 825        struct gendisk *disk = dev_to_disk(dev);
 826        struct ubd *ubd_dev = disk->private_data;
 827
 828        if (!ubd_dev)
 829                return 0;
 830
 831        return sprintf(buf, "%s", ubd_dev->serial);
 832}
 833
 834static DEVICE_ATTR_RO(serial);
 835
 836static struct attribute *ubd_attrs[] = {
 837        &dev_attr_serial.attr,
 838        NULL,
 839};
 840
 841static umode_t ubd_attrs_are_visible(struct kobject *kobj,
 842                                     struct attribute *a, int n)
 843{
 844        return a->mode;
 845}
 846
 847static const struct attribute_group ubd_attr_group = {
 848        .attrs = ubd_attrs,
 849        .is_visible = ubd_attrs_are_visible,
 850};
 851
 852static const struct attribute_group *ubd_attr_groups[] = {
 853        &ubd_attr_group,
 854        NULL,
 855};
 856
 857static void ubd_disk_register(int major, u64 size, int unit,
 858                              struct gendisk *disk)
 859{
 860        disk->major = major;
 861        disk->first_minor = unit << UBD_SHIFT;
 862        disk->minors = 1 << UBD_SHIFT;
 863        disk->fops = &ubd_blops;
 864        set_capacity(disk, size / 512);
 865        sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 866
 867        ubd_devs[unit].pdev.id   = unit;
 868        ubd_devs[unit].pdev.name = DRIVER_NAME;
 869        ubd_devs[unit].pdev.dev.release = ubd_device_release;
 870        dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 871        platform_device_register(&ubd_devs[unit].pdev);
 872
 873        disk->private_data = &ubd_devs[unit];
 874        disk->queue = ubd_devs[unit].queue;
 875        device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups);
 876}
 877
 878#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 879
 880static const struct blk_mq_ops ubd_mq_ops = {
 881        .queue_rq = ubd_queue_rq,
 882};
 883
 884static int ubd_add(int n, char **error_out)
 885{
 886        struct ubd *ubd_dev = &ubd_devs[n];
 887        struct gendisk *disk;
 888        int err = 0;
 889
 890        if(ubd_dev->file == NULL)
 891                goto out;
 892
 893        err = ubd_file_size(ubd_dev, &ubd_dev->size);
 894        if(err < 0){
 895                *error_out = "Couldn't determine size of device's file";
 896                goto out;
 897        }
 898
 899        ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 900
 901        ubd_dev->tag_set.ops = &ubd_mq_ops;
 902        ubd_dev->tag_set.queue_depth = 64;
 903        ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
 904        ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 905        ubd_dev->tag_set.driver_data = ubd_dev;
 906        ubd_dev->tag_set.nr_hw_queues = 1;
 907
 908        err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
 909        if (err)
 910                goto out;
 911
 912        disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev);
 913        if (IS_ERR(disk)) {
 914                err = PTR_ERR(disk);
 915                goto out_cleanup_tags;
 916        }
 917        ubd_dev->queue = disk->queue;
 918
 919        blk_queue_write_cache(ubd_dev->queue, true, false);
 920        blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 921        blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
 922        ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk);
 923        ubd_gendisk[n] = disk;
 924        return 0;
 925
 926out_cleanup_tags:
 927        blk_mq_free_tag_set(&ubd_dev->tag_set);
 928out:
 929        return err;
 930}
 931
 932static int ubd_config(char *str, char **error_out)
 933{
 934        int n, ret;
 935
 936        /* This string is possibly broken up and stored, so it's only
 937         * freed if ubd_setup_common fails, or if only general options
 938         * were set.
 939         */
 940        str = kstrdup(str, GFP_KERNEL);
 941        if (str == NULL) {
 942                *error_out = "Failed to allocate memory";
 943                return -ENOMEM;
 944        }
 945
 946        ret = ubd_setup_common(str, &n, error_out);
 947        if (ret)
 948                goto err_free;
 949
 950        if (n == -1) {
 951                ret = 0;
 952                goto err_free;
 953        }
 954
 955        mutex_lock(&ubd_lock);
 956        ret = ubd_add(n, error_out);
 957        if (ret)
 958                ubd_devs[n].file = NULL;
 959        mutex_unlock(&ubd_lock);
 960
 961out:
 962        return ret;
 963
 964err_free:
 965        kfree(str);
 966        goto out;
 967}
 968
 969static int ubd_get_config(char *name, char *str, int size, char **error_out)
 970{
 971        struct ubd *ubd_dev;
 972        int n, len = 0;
 973
 974        n = parse_unit(&name);
 975        if((n >= MAX_DEV) || (n < 0)){
 976                *error_out = "ubd_get_config : device number out of range";
 977                return -1;
 978        }
 979
 980        ubd_dev = &ubd_devs[n];
 981        mutex_lock(&ubd_lock);
 982
 983        if(ubd_dev->file == NULL){
 984                CONFIG_CHUNK(str, size, len, "", 1);
 985                goto out;
 986        }
 987
 988        CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
 989
 990        if(ubd_dev->cow.file != NULL){
 991                CONFIG_CHUNK(str, size, len, ",", 0);
 992                CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
 993        }
 994        else CONFIG_CHUNK(str, size, len, "", 1);
 995
 996 out:
 997        mutex_unlock(&ubd_lock);
 998        return len;
 999}
1000
1001static int ubd_id(char **str, int *start_out, int *end_out)
1002{
1003        int n;
1004
1005        n = parse_unit(str);
1006        *start_out = 0;
1007        *end_out = MAX_DEV - 1;
1008        return n;
1009}
1010
1011static int ubd_remove(int n, char **error_out)
1012{
1013        struct gendisk *disk = ubd_gendisk[n];
1014        struct ubd *ubd_dev;
1015        int err = -ENODEV;
1016
1017        mutex_lock(&ubd_lock);
1018
1019        ubd_dev = &ubd_devs[n];
1020
1021        if(ubd_dev->file == NULL)
1022                goto out;
1023
1024        /* you cannot remove a open disk */
1025        err = -EBUSY;
1026        if(ubd_dev->count > 0)
1027                goto out;
1028
1029        ubd_gendisk[n] = NULL;
1030        if(disk != NULL){
1031                del_gendisk(disk);
1032                blk_cleanup_disk(disk);
1033        }
1034
1035        err = 0;
1036        platform_device_unregister(&ubd_dev->pdev);
1037out:
1038        mutex_unlock(&ubd_lock);
1039        return err;
1040}
1041
1042/* All these are called by mconsole in process context and without
1043 * ubd-specific locks.  The structure itself is const except for .list.
1044 */
1045static struct mc_device ubd_mc = {
1046        .list           = LIST_HEAD_INIT(ubd_mc.list),
1047        .name           = "ubd",
1048        .config         = ubd_config,
1049        .get_config     = ubd_get_config,
1050        .id             = ubd_id,
1051        .remove         = ubd_remove,
1052};
1053
1054static int __init ubd_mc_init(void)
1055{
1056        mconsole_register_dev(&ubd_mc);
1057        return 0;
1058}
1059
1060__initcall(ubd_mc_init);
1061
1062static int __init ubd0_init(void)
1063{
1064        struct ubd *ubd_dev = &ubd_devs[0];
1065
1066        mutex_lock(&ubd_lock);
1067        if(ubd_dev->file == NULL)
1068                ubd_dev->file = "root_fs";
1069        mutex_unlock(&ubd_lock);
1070
1071        return 0;
1072}
1073
1074__initcall(ubd0_init);
1075
1076/* Used in ubd_init, which is an initcall */
1077static struct platform_driver ubd_driver = {
1078        .driver = {
1079                .name  = DRIVER_NAME,
1080        },
1081};
1082
1083static int __init ubd_init(void)
1084{
1085        char *error;
1086        int i, err;
1087
1088        if (register_blkdev(UBD_MAJOR, "ubd"))
1089                return -1;
1090
1091        irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1092                                       sizeof(struct io_thread_req *),
1093                                       GFP_KERNEL
1094                );
1095        irq_remainder = 0;
1096
1097        if (irq_req_buffer == NULL) {
1098                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1099                return -1;
1100        }
1101        io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1102                                      sizeof(struct io_thread_req *),
1103                                      GFP_KERNEL
1104                );
1105
1106        io_remainder = 0;
1107
1108        if (io_req_buffer == NULL) {
1109                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1110                return -1;
1111        }
1112        platform_driver_register(&ubd_driver);
1113        mutex_lock(&ubd_lock);
1114        for (i = 0; i < MAX_DEV; i++){
1115                err = ubd_add(i, &error);
1116                if(err)
1117                        printk(KERN_ERR "Failed to initialize ubd device %d :"
1118                               "%s\n", i, error);
1119        }
1120        mutex_unlock(&ubd_lock);
1121        return 0;
1122}
1123
1124late_initcall(ubd_init);
1125
1126static int __init ubd_driver_init(void){
1127        unsigned long stack;
1128        int err;
1129
1130        /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1131        if(global_openflags.s){
1132                printk(KERN_INFO "ubd: Synchronous mode\n");
1133                /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1134                 * enough. So use anyway the io thread. */
1135        }
1136        stack = alloc_stack(0, 0);
1137        io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd);
1138        if(io_pid < 0){
1139                printk(KERN_ERR
1140                       "ubd : Failed to start I/O thread (errno = %d) - "
1141                       "falling back to synchronous I/O\n", -io_pid);
1142                io_pid = -1;
1143                return 0;
1144        }
1145        err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1146                             0, "ubd", ubd_devs);
1147        if(err < 0)
1148                printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1149        return 0;
1150}
1151
1152device_initcall(ubd_driver_init);
1153
1154static int ubd_open(struct block_device *bdev, fmode_t mode)
1155{
1156        struct gendisk *disk = bdev->bd_disk;
1157        struct ubd *ubd_dev = disk->private_data;
1158        int err = 0;
1159
1160        mutex_lock(&ubd_mutex);
1161        if(ubd_dev->count == 0){
1162                err = ubd_open_dev(ubd_dev);
1163                if(err){
1164                        printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1165                               disk->disk_name, ubd_dev->file, -err);
1166                        goto out;
1167                }
1168        }
1169        ubd_dev->count++;
1170        set_disk_ro(disk, !ubd_dev->openflags.w);
1171
1172        /* This should no more be needed. And it didn't work anyway to exclude
1173         * read-write remounting of filesystems.*/
1174        /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1175                if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1176                err = -EROFS;
1177        }*/
1178out:
1179        mutex_unlock(&ubd_mutex);
1180        return err;
1181}
1182
1183static void ubd_release(struct gendisk *disk, fmode_t mode)
1184{
1185        struct ubd *ubd_dev = disk->private_data;
1186
1187        mutex_lock(&ubd_mutex);
1188        if(--ubd_dev->count == 0)
1189                ubd_close_dev(ubd_dev);
1190        mutex_unlock(&ubd_mutex);
1191}
1192
1193static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1194                          __u64 *cow_offset, unsigned long *bitmap,
1195                          __u64 bitmap_offset, unsigned long *bitmap_words,
1196                          __u64 bitmap_len)
1197{
1198        __u64 sector = io_offset >> SECTOR_SHIFT;
1199        int i, update_bitmap = 0;
1200
1201        for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1202                if(cow_mask != NULL)
1203                        ubd_set_bit(i, (unsigned char *) cow_mask);
1204                if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1205                        continue;
1206
1207                update_bitmap = 1;
1208                ubd_set_bit(sector + i, (unsigned char *) bitmap);
1209        }
1210
1211        if(!update_bitmap)
1212                return;
1213
1214        *cow_offset = sector / (sizeof(unsigned long) * 8);
1215
1216        /* This takes care of the case where we're exactly at the end of the
1217         * device, and *cow_offset + 1 is off the end.  So, just back it up
1218         * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1219         * for the original diagnosis.
1220         */
1221        if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1222                                         sizeof(unsigned long)) - 1))
1223                (*cow_offset)--;
1224
1225        bitmap_words[0] = bitmap[*cow_offset];
1226        bitmap_words[1] = bitmap[*cow_offset + 1];
1227
1228        *cow_offset *= sizeof(unsigned long);
1229        *cow_offset += bitmap_offset;
1230}
1231
1232static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1233                       unsigned long offset, unsigned long *bitmap,
1234                       __u64 bitmap_offset, __u64 bitmap_len)
1235{
1236        __u64 sector = offset >> SECTOR_SHIFT;
1237        int i;
1238
1239        if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1240                panic("Operation too long");
1241
1242        if (req_op(req->req) == REQ_OP_READ) {
1243                for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1244                        if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1245                                ubd_set_bit(i, (unsigned char *)
1246                                            &segment->sector_mask);
1247                }
1248        } else {
1249                cowify_bitmap(offset, segment->length, &segment->sector_mask,
1250                              &segment->cow_offset, bitmap, bitmap_offset,
1251                              segment->bitmap_words, bitmap_len);
1252        }
1253}
1254
1255static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1256                        struct request *req)
1257{
1258        struct bio_vec bvec;
1259        struct req_iterator iter;
1260        int i = 0;
1261        unsigned long byte_offset = io_req->offset;
1262        int op = req_op(req);
1263
1264        if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1265                io_req->io_desc[0].buffer = NULL;
1266                io_req->io_desc[0].length = blk_rq_bytes(req);
1267        } else {
1268                rq_for_each_segment(bvec, req, iter) {
1269                        BUG_ON(i >= io_req->desc_cnt);
1270
1271                        io_req->io_desc[i].buffer =
1272                                page_address(bvec.bv_page) + bvec.bv_offset;
1273                        io_req->io_desc[i].length = bvec.bv_len;
1274                        i++;
1275                }
1276        }
1277
1278        if (dev->cow.file) {
1279                for (i = 0; i < io_req->desc_cnt; i++) {
1280                        cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1281                                   dev->cow.bitmap, dev->cow.bitmap_offset,
1282                                   dev->cow.bitmap_len);
1283                        byte_offset += io_req->io_desc[i].length;
1284                }
1285
1286        }
1287}
1288
1289static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1290                                           int desc_cnt)
1291{
1292        struct io_thread_req *io_req;
1293        int i;
1294
1295        io_req = kmalloc(sizeof(*io_req) +
1296                         (desc_cnt * sizeof(struct io_desc)),
1297                         GFP_ATOMIC);
1298        if (!io_req)
1299                return NULL;
1300
1301        io_req->req = req;
1302        if (dev->cow.file)
1303                io_req->fds[0] = dev->cow.fd;
1304        else
1305                io_req->fds[0] = dev->fd;
1306        io_req->error = 0;
1307        io_req->sectorsize = SECTOR_SIZE;
1308        io_req->fds[1] = dev->fd;
1309        io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1310        io_req->offsets[0] = 0;
1311        io_req->offsets[1] = dev->cow.data_offset;
1312
1313        for (i = 0 ; i < desc_cnt; i++) {
1314                io_req->io_desc[i].sector_mask = 0;
1315                io_req->io_desc[i].cow_offset = -1;
1316        }
1317
1318        return io_req;
1319}
1320
1321static int ubd_submit_request(struct ubd *dev, struct request *req)
1322{
1323        int segs = 0;
1324        struct io_thread_req *io_req;
1325        int ret;
1326        int op = req_op(req);
1327
1328        if (op == REQ_OP_FLUSH)
1329                segs = 0;
1330        else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1331                segs = 1;
1332        else
1333                segs = blk_rq_nr_phys_segments(req);
1334
1335        io_req = ubd_alloc_req(dev, req, segs);
1336        if (!io_req)
1337                return -ENOMEM;
1338
1339        io_req->desc_cnt = segs;
1340        if (segs)
1341                ubd_map_req(dev, io_req, req);
1342
1343        ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1344        if (ret != sizeof(io_req)) {
1345                if (ret != -EAGAIN)
1346                        pr_err("write to io thread failed: %d\n", -ret);
1347                kfree(io_req);
1348        }
1349        return ret;
1350}
1351
1352static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1353                                 const struct blk_mq_queue_data *bd)
1354{
1355        struct ubd *ubd_dev = hctx->queue->queuedata;
1356        struct request *req = bd->rq;
1357        int ret = 0, res = BLK_STS_OK;
1358
1359        blk_mq_start_request(req);
1360
1361        spin_lock_irq(&ubd_dev->lock);
1362
1363        switch (req_op(req)) {
1364        case REQ_OP_FLUSH:
1365        case REQ_OP_READ:
1366        case REQ_OP_WRITE:
1367        case REQ_OP_DISCARD:
1368        case REQ_OP_WRITE_ZEROES:
1369                ret = ubd_submit_request(ubd_dev, req);
1370                break;
1371        default:
1372                WARN_ON_ONCE(1);
1373                res = BLK_STS_NOTSUPP;
1374        }
1375
1376        spin_unlock_irq(&ubd_dev->lock);
1377
1378        if (ret < 0) {
1379                if (ret == -ENOMEM)
1380                        res = BLK_STS_RESOURCE;
1381                else
1382                        res = BLK_STS_DEV_RESOURCE;
1383        }
1384
1385        return res;
1386}
1387
1388static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1389{
1390        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1391
1392        geo->heads = 128;
1393        geo->sectors = 32;
1394        geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1395        return 0;
1396}
1397
1398static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1399                     unsigned int cmd, unsigned long arg)
1400{
1401        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1402        u16 ubd_id[ATA_ID_WORDS];
1403
1404        switch (cmd) {
1405                struct cdrom_volctrl volume;
1406        case HDIO_GET_IDENTITY:
1407                memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1408                ubd_id[ATA_ID_CYLS]     = ubd_dev->size / (128 * 32 * 512);
1409                ubd_id[ATA_ID_HEADS]    = 128;
1410                ubd_id[ATA_ID_SECTORS]  = 32;
1411                if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1412                                 sizeof(ubd_id)))
1413                        return -EFAULT;
1414                return 0;
1415
1416        case CDROMVOLREAD:
1417                if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1418                        return -EFAULT;
1419                volume.channel0 = 255;
1420                volume.channel1 = 255;
1421                volume.channel2 = 255;
1422                volume.channel3 = 255;
1423                if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1424                        return -EFAULT;
1425                return 0;
1426        }
1427        return -EINVAL;
1428}
1429
1430static int map_error(int error_code)
1431{
1432        switch (error_code) {
1433        case 0:
1434                return BLK_STS_OK;
1435        case ENOSYS:
1436        case EOPNOTSUPP:
1437                return BLK_STS_NOTSUPP;
1438        case ENOSPC:
1439                return BLK_STS_NOSPC;
1440        }
1441        return BLK_STS_IOERR;
1442}
1443
1444/*
1445 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1446 *
1447 * The following functions are part of UML hypervisor code.
1448 * All functions from here onwards are executed as a helper
1449 * thread and are not allowed to execute any kernel functions.
1450 *
1451 * Any communication must occur strictly via shared memory and IPC.
1452 *
1453 * Do not add printks, locks, kernel memory operations, etc - it
1454 * will result in unpredictable behaviour and/or crashes.
1455 */
1456
1457static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1458{
1459        int n;
1460
1461        if (segment->cow_offset == -1)
1462                return map_error(0);
1463
1464        n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1465                          sizeof(segment->bitmap_words), segment->cow_offset);
1466        if (n != sizeof(segment->bitmap_words))
1467                return map_error(-n);
1468
1469        return map_error(0);
1470}
1471
1472static void do_io(struct io_thread_req *req, struct io_desc *desc)
1473{
1474        char *buf = NULL;
1475        unsigned long len;
1476        int n, nsectors, start, end, bit;
1477        __u64 off;
1478
1479        /* FLUSH is really a special case, we cannot "case" it with others */
1480
1481        if (req_op(req->req) == REQ_OP_FLUSH) {
1482                /* fds[0] is always either the rw image or our cow file */
1483                req->error = map_error(-os_sync_file(req->fds[0]));
1484                return;
1485        }
1486
1487        nsectors = desc->length / req->sectorsize;
1488        start = 0;
1489        do {
1490                bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1491                end = start;
1492                while((end < nsectors) &&
1493                      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1494                        end++;
1495
1496                off = req->offset + req->offsets[bit] +
1497                        start * req->sectorsize;
1498                len = (end - start) * req->sectorsize;
1499                if (desc->buffer != NULL)
1500                        buf = &desc->buffer[start * req->sectorsize];
1501
1502                switch (req_op(req->req)) {
1503                case REQ_OP_READ:
1504                        n = 0;
1505                        do {
1506                                buf = &buf[n];
1507                                len -= n;
1508                                n = os_pread_file(req->fds[bit], buf, len, off);
1509                                if (n < 0) {
1510                                        req->error = map_error(-n);
1511                                        return;
1512                                }
1513                        } while((n < len) && (n != 0));
1514                        if (n < len) memset(&buf[n], 0, len - n);
1515                        break;
1516                case REQ_OP_WRITE:
1517                        n = os_pwrite_file(req->fds[bit], buf, len, off);
1518                        if(n != len){
1519                                req->error = map_error(-n);
1520                                return;
1521                        }
1522                        break;
1523                case REQ_OP_DISCARD:
1524                case REQ_OP_WRITE_ZEROES:
1525                        n = os_falloc_punch(req->fds[bit], off, len);
1526                        if (n) {
1527                                req->error = map_error(-n);
1528                                return;
1529                        }
1530                        break;
1531                default:
1532                        WARN_ON_ONCE(1);
1533                        req->error = BLK_STS_NOTSUPP;
1534                        return;
1535                }
1536
1537                start = end;
1538        } while(start < nsectors);
1539
1540        req->offset += len;
1541        req->error = update_bitmap(req, desc);
1542}
1543
1544/* Changed in start_io_thread, which is serialized by being called only
1545 * from ubd_init, which is an initcall.
1546 */
1547int kernel_fd = -1;
1548
1549/* Only changed by the io thread. XXX: currently unused. */
1550static int io_count = 0;
1551
1552int io_thread(void *arg)
1553{
1554        int n, count, written, res;
1555
1556        os_fix_helper_signals();
1557
1558        while(1){
1559                n = bulk_req_safe_read(
1560                        kernel_fd,
1561                        io_req_buffer,
1562                        &io_remainder,
1563                        &io_remainder_size,
1564                        UBD_REQ_BUFFER_SIZE
1565                );
1566                if (n <= 0) {
1567                        if (n == -EAGAIN)
1568                                ubd_read_poll(-1);
1569
1570                        continue;
1571                }
1572
1573                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1574                        struct io_thread_req *req = (*io_req_buffer)[count];
1575                        int i;
1576
1577                        io_count++;
1578                        for (i = 0; !req->error && i < req->desc_cnt; i++)
1579                                do_io(req, &(req->io_desc[i]));
1580
1581                }
1582
1583                written = 0;
1584
1585                do {
1586                        res = os_write_file(kernel_fd,
1587                                            ((char *) io_req_buffer) + written,
1588                                            n - written);
1589                        if (res >= 0) {
1590                                written += res;
1591                        }
1592                        if (written < n) {
1593                                ubd_write_poll(-1);
1594                        }
1595                } while (written < n);
1596        }
1597
1598        return 0;
1599}
1600