linux/arch/um/drivers/ubd_kern.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
   3 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
   4 * Licensed under the GPL
   5 */
   6
   7/* 2001-09-28...2002-04-17
   8 * Partition stuff by James_McMechan@hotmail.com
   9 * old style ubd by setting UBD_SHIFT to 0
  10 * 2002-09-27...2002-10-18 massive tinkering for 2.5
  11 * partitions have changed in 2.5
  12 * 2003-01-29 more tinkering for 2.5.59-1
  13 * This should now address the sysfs problems and has
  14 * the symlink for devfs to allow for booting with
  15 * the common /dev/ubd/discX/... names rather than
  16 * only /dev/ubdN/discN this version also has lots of
  17 * clean ups preparing for ubd-many.
  18 * James McMechan
  19 */
  20
  21#define UBD_SHIFT 4
  22
  23#include <linux/module.h>
  24#include <linux/init.h>
  25#include <linux/blkdev.h>
  26#include <linux/ata.h>
  27#include <linux/hdreg.h>
  28#include <linux/cdrom.h>
  29#include <linux/proc_fs.h>
  30#include <linux/seq_file.h>
  31#include <linux/ctype.h>
  32#include <linux/slab.h>
  33#include <linux/vmalloc.h>
  34#include <linux/platform_device.h>
  35#include <linux/scatterlist.h>
  36#include <asm/tlbflush.h>
  37#include <kern_util.h>
  38#include "mconsole_kern.h"
  39#include <init.h>
  40#include <irq_kern.h>
  41#include "ubd.h"
  42#include <os.h>
  43#include "cow.h"
  44
  45enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
  46
  47struct io_thread_req {
  48        struct request *req;
  49        enum ubd_req op;
  50        int fds[2];
  51        unsigned long offsets[2];
  52        unsigned long long offset;
  53        unsigned long length;
  54        char *buffer;
  55        int sectorsize;
  56        unsigned long sector_mask;
  57        unsigned long long cow_offset;
  58        unsigned long bitmap_words[2];
  59        int error;
  60};
  61
  62
  63static struct io_thread_req * (*irq_req_buffer)[];
  64static struct io_thread_req *irq_remainder;
  65static int irq_remainder_size;
  66
  67static struct io_thread_req * (*io_req_buffer)[];
  68static struct io_thread_req *io_remainder;
  69static int io_remainder_size;
  70
  71
  72
  73static inline int ubd_test_bit(__u64 bit, unsigned char *data)
  74{
  75        __u64 n;
  76        int bits, off;
  77
  78        bits = sizeof(data[0]) * 8;
  79        n = bit / bits;
  80        off = bit % bits;
  81        return (data[n] & (1 << off)) != 0;
  82}
  83
  84static inline void ubd_set_bit(__u64 bit, unsigned char *data)
  85{
  86        __u64 n;
  87        int bits, off;
  88
  89        bits = sizeof(data[0]) * 8;
  90        n = bit / bits;
  91        off = bit % bits;
  92        data[n] |= (1 << off);
  93}
  94/*End stuff from ubd_user.h*/
  95
  96#define DRIVER_NAME "uml-blkdev"
  97
  98static DEFINE_MUTEX(ubd_lock);
  99static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
 100
 101static int ubd_open(struct block_device *bdev, fmode_t mode);
 102static void ubd_release(struct gendisk *disk, fmode_t mode);
 103static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
 104                     unsigned int cmd, unsigned long arg);
 105static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 106
 107#define MAX_DEV (16)
 108
 109static const struct block_device_operations ubd_blops = {
 110        .owner          = THIS_MODULE,
 111        .open           = ubd_open,
 112        .release        = ubd_release,
 113        .ioctl          = ubd_ioctl,
 114        .getgeo         = ubd_getgeo,
 115};
 116
 117/* Protected by ubd_lock */
 118static int fake_major = UBD_MAJOR;
 119static struct gendisk *ubd_gendisk[MAX_DEV];
 120static struct gendisk *fake_gendisk[MAX_DEV];
 121
 122#ifdef CONFIG_BLK_DEV_UBD_SYNC
 123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
 124                                         .cl = 1 })
 125#else
 126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
 127                                         .cl = 1 })
 128#endif
 129static struct openflags global_openflags = OPEN_FLAGS;
 130
 131struct cow {
 132        /* backing file name */
 133        char *file;
 134        /* backing file fd */
 135        int fd;
 136        unsigned long *bitmap;
 137        unsigned long bitmap_len;
 138        int bitmap_offset;
 139        int data_offset;
 140};
 141
 142#define MAX_SG 64
 143
 144struct ubd {
 145        struct list_head restart;
 146        /* name (and fd, below) of the file opened for writing, either the
 147         * backing or the cow file. */
 148        char *file;
 149        int count;
 150        int fd;
 151        __u64 size;
 152        struct openflags boot_openflags;
 153        struct openflags openflags;
 154        unsigned shared:1;
 155        unsigned no_cow:1;
 156        struct cow cow;
 157        struct platform_device pdev;
 158        struct request_queue *queue;
 159        spinlock_t lock;
 160        struct scatterlist sg[MAX_SG];
 161        struct request *request;
 162        int start_sg, end_sg;
 163        sector_t rq_pos;
 164};
 165
 166#define DEFAULT_COW { \
 167        .file =                 NULL, \
 168        .fd =                   -1,     \
 169        .bitmap =               NULL, \
 170        .bitmap_offset =        0, \
 171        .data_offset =          0, \
 172}
 173
 174#define DEFAULT_UBD { \
 175        .file =                 NULL, \
 176        .count =                0, \
 177        .fd =                   -1, \
 178        .size =                 -1, \
 179        .boot_openflags =       OPEN_FLAGS, \
 180        .openflags =            OPEN_FLAGS, \
 181        .no_cow =               0, \
 182        .shared =               0, \
 183        .cow =                  DEFAULT_COW, \
 184        .lock =                 __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
 185        .request =              NULL, \
 186        .start_sg =             0, \
 187        .end_sg =               0, \
 188        .rq_pos =               0, \
 189}
 190
 191/* Protected by ubd_lock */
 192static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 193
 194/* Only changed by fake_ide_setup which is a setup */
 195static int fake_ide = 0;
 196static struct proc_dir_entry *proc_ide_root = NULL;
 197static struct proc_dir_entry *proc_ide = NULL;
 198
 199static void make_proc_ide(void)
 200{
 201        proc_ide_root = proc_mkdir("ide", NULL);
 202        proc_ide = proc_mkdir("ide0", proc_ide_root);
 203}
 204
 205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
 206{
 207        seq_puts(m, "disk\n");
 208        return 0;
 209}
 210
 211static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
 212{
 213        return single_open(file, fake_ide_media_proc_show, NULL);
 214}
 215
 216static const struct file_operations fake_ide_media_proc_fops = {
 217        .owner          = THIS_MODULE,
 218        .open           = fake_ide_media_proc_open,
 219        .read           = seq_read,
 220        .llseek         = seq_lseek,
 221        .release        = single_release,
 222};
 223
 224static void make_ide_entries(const char *dev_name)
 225{
 226        struct proc_dir_entry *dir, *ent;
 227        char name[64];
 228
 229        if(proc_ide_root == NULL) make_proc_ide();
 230
 231        dir = proc_mkdir(dev_name, proc_ide);
 232        if(!dir) return;
 233
 234        ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
 235        if(!ent) return;
 236        snprintf(name, sizeof(name), "ide0/%s", dev_name);
 237        proc_symlink(dev_name, proc_ide_root, name);
 238}
 239
 240static int fake_ide_setup(char *str)
 241{
 242        fake_ide = 1;
 243        return 1;
 244}
 245
 246__setup("fake_ide", fake_ide_setup);
 247
 248__uml_help(fake_ide_setup,
 249"fake_ide\n"
 250"    Create ide0 entries that map onto ubd devices.\n\n"
 251);
 252
 253static int parse_unit(char **ptr)
 254{
 255        char *str = *ptr, *end;
 256        int n = -1;
 257
 258        if(isdigit(*str)) {
 259                n = simple_strtoul(str, &end, 0);
 260                if(end == str)
 261                        return -1;
 262                *ptr = end;
 263        }
 264        else if (('a' <= *str) && (*str <= 'z')) {
 265                n = *str - 'a';
 266                str++;
 267                *ptr = str;
 268        }
 269        return n;
 270}
 271
 272/* If *index_out == -1 at exit, the passed option was a general one;
 273 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 274 * should not be freed on exit.
 275 */
 276static int ubd_setup_common(char *str, int *index_out, char **error_out)
 277{
 278        struct ubd *ubd_dev;
 279        struct openflags flags = global_openflags;
 280        char *backing_file;
 281        int n, err = 0, i;
 282
 283        if(index_out) *index_out = -1;
 284        n = *str;
 285        if(n == '='){
 286                char *end;
 287                int major;
 288
 289                str++;
 290                if(!strcmp(str, "sync")){
 291                        global_openflags = of_sync(global_openflags);
 292                        goto out1;
 293                }
 294
 295                err = -EINVAL;
 296                major = simple_strtoul(str, &end, 0);
 297                if((*end != '\0') || (end == str)){
 298                        *error_out = "Didn't parse major number";
 299                        goto out1;
 300                }
 301
 302                mutex_lock(&ubd_lock);
 303                if (fake_major != UBD_MAJOR) {
 304                        *error_out = "Can't assign a fake major twice";
 305                        goto out1;
 306                }
 307
 308                fake_major = major;
 309
 310                printk(KERN_INFO "Setting extra ubd major number to %d\n",
 311                       major);
 312                err = 0;
 313        out1:
 314                mutex_unlock(&ubd_lock);
 315                return err;
 316        }
 317
 318        n = parse_unit(&str);
 319        if(n < 0){
 320                *error_out = "Couldn't parse device number";
 321                return -EINVAL;
 322        }
 323        if(n >= MAX_DEV){
 324                *error_out = "Device number out of range";
 325                return 1;
 326        }
 327
 328        err = -EBUSY;
 329        mutex_lock(&ubd_lock);
 330
 331        ubd_dev = &ubd_devs[n];
 332        if(ubd_dev->file != NULL){
 333                *error_out = "Device is already configured";
 334                goto out;
 335        }
 336
 337        if (index_out)
 338                *index_out = n;
 339
 340        err = -EINVAL;
 341        for (i = 0; i < sizeof("rscd="); i++) {
 342                switch (*str) {
 343                case 'r':
 344                        flags.w = 0;
 345                        break;
 346                case 's':
 347                        flags.s = 1;
 348                        break;
 349                case 'd':
 350                        ubd_dev->no_cow = 1;
 351                        break;
 352                case 'c':
 353                        ubd_dev->shared = 1;
 354                        break;
 355                case '=':
 356                        str++;
 357                        goto break_loop;
 358                default:
 359                        *error_out = "Expected '=' or flag letter "
 360                                "(r, s, c, or d)";
 361                        goto out;
 362                }
 363                str++;
 364        }
 365
 366        if (*str == '=')
 367                *error_out = "Too many flags specified";
 368        else
 369                *error_out = "Missing '='";
 370        goto out;
 371
 372break_loop:
 373        backing_file = strchr(str, ',');
 374
 375        if (backing_file == NULL)
 376                backing_file = strchr(str, ':');
 377
 378        if(backing_file != NULL){
 379                if(ubd_dev->no_cow){
 380                        *error_out = "Can't specify both 'd' and a cow file";
 381                        goto out;
 382                }
 383                else {
 384                        *backing_file = '\0';
 385                        backing_file++;
 386                }
 387        }
 388        err = 0;
 389        ubd_dev->file = str;
 390        ubd_dev->cow.file = backing_file;
 391        ubd_dev->boot_openflags = flags;
 392out:
 393        mutex_unlock(&ubd_lock);
 394        return err;
 395}
 396
 397static int ubd_setup(char *str)
 398{
 399        char *error;
 400        int err;
 401
 402        err = ubd_setup_common(str, NULL, &error);
 403        if(err)
 404                printk(KERN_ERR "Failed to initialize device with \"%s\" : "
 405                       "%s\n", str, error);
 406        return 1;
 407}
 408
 409__setup("ubd", ubd_setup);
 410__uml_help(ubd_setup,
 411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
 412"    This is used to associate a device with a file in the underlying\n"
 413"    filesystem. When specifying two filenames, the first one is the\n"
 414"    COW name and the second is the backing file name. As separator you can\n"
 415"    use either a ':' or a ',': the first one allows writing things like;\n"
 416"       ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
 417"    while with a ',' the shell would not expand the 2nd '~'.\n"
 418"    When using only one filename, UML will detect whether to treat it like\n"
 419"    a COW file or a backing file. To override this detection, add the 'd'\n"
 420"    flag:\n"
 421"       ubd0d=BackingFile\n"
 422"    Usually, there is a filesystem in the file, but \n"
 423"    that's not required. Swap devices containing swap files can be\n"
 424"    specified like this. Also, a file which doesn't contain a\n"
 425"    filesystem can have its contents read in the virtual \n"
 426"    machine by running 'dd' on the device. <n> must be in the range\n"
 427"    0 to 7. Appending an 'r' to the number will cause that device\n"
 428"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
 429"    an 's' will cause data to be written to disk on the host immediately.\n"
 430"    'c' will cause the device to be treated as being shared between multiple\n"
 431"    UMLs and file locking will be turned off - this is appropriate for a\n"
 432"    cluster filesystem and inappropriate at almost all other times.\n\n"
 433);
 434
 435static int udb_setup(char *str)
 436{
 437        printk("udb%s specified on command line is almost certainly a ubd -> "
 438               "udb TYPO\n", str);
 439        return 1;
 440}
 441
 442__setup("udb", udb_setup);
 443__uml_help(udb_setup,
 444"udb\n"
 445"    This option is here solely to catch ubd -> udb typos, which can be\n"
 446"    to impossible to catch visually unless you specifically look for\n"
 447"    them.  The only result of any option starting with 'udb' is an error\n"
 448"    in the boot output.\n\n"
 449);
 450
 451static void do_ubd_request(struct request_queue * q);
 452
 453/* Only changed by ubd_init, which is an initcall. */
 454static int thread_fd = -1;
 455static LIST_HEAD(restart);
 456
 457/* Function to read several request pointers at a time
 458* handling fractional reads if (and as) needed
 459*/
 460
 461static int bulk_req_safe_read(
 462        int fd,
 463        struct io_thread_req * (*request_buffer)[],
 464        struct io_thread_req **remainder,
 465        int *remainder_size,
 466        int max_recs
 467        )
 468{
 469        int n = 0;
 470        int res = 0;
 471
 472        if (*remainder_size > 0) {
 473                memmove(
 474                        (char *) request_buffer,
 475                        (char *) remainder, *remainder_size
 476                );
 477                n = *remainder_size;
 478        }
 479
 480        res = os_read_file(
 481                        fd,
 482                        ((char *) request_buffer) + *remainder_size,
 483                        sizeof(struct io_thread_req *)*max_recs
 484                                - *remainder_size
 485                );
 486        if (res > 0) {
 487                n += res;
 488                if ((n % sizeof(struct io_thread_req *)) > 0) {
 489                        /*
 490                        * Read somehow returned not a multiple of dword
 491                        * theoretically possible, but never observed in the
 492                        * wild, so read routine must be able to handle it
 493                        */
 494                        *remainder_size = n % sizeof(struct io_thread_req *);
 495                        WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
 496                        memmove(
 497                                remainder,
 498                                ((char *) request_buffer) +
 499                                        (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
 500                                *remainder_size
 501                        );
 502                        n = n - *remainder_size;
 503                }
 504        } else {
 505                n = res;
 506        }
 507        return n;
 508}
 509
 510/* Called without dev->lock held, and only in interrupt context. */
 511static void ubd_handler(void)
 512{
 513        struct ubd *ubd;
 514        struct list_head *list, *next_ele;
 515        unsigned long flags;
 516        int n;
 517        int count;
 518
 519        while(1){
 520                n = bulk_req_safe_read(
 521                        thread_fd,
 522                        irq_req_buffer,
 523                        &irq_remainder,
 524                        &irq_remainder_size,
 525                        UBD_REQ_BUFFER_SIZE
 526                );
 527                if (n < 0) {
 528                        if(n == -EAGAIN)
 529                                break;
 530                        printk(KERN_ERR "spurious interrupt in ubd_handler, "
 531                               "err = %d\n", -n);
 532                        return;
 533                }
 534                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
 535                        blk_end_request(
 536                                (*irq_req_buffer)[count]->req,
 537                                BLK_STS_OK,
 538                                (*irq_req_buffer)[count]->length
 539                        );
 540                        kfree((*irq_req_buffer)[count]);
 541                }
 542        }
 543        reactivate_fd(thread_fd, UBD_IRQ);
 544
 545        list_for_each_safe(list, next_ele, &restart){
 546                ubd = container_of(list, struct ubd, restart);
 547                list_del_init(&ubd->restart);
 548                spin_lock_irqsave(&ubd->lock, flags);
 549                do_ubd_request(ubd->queue);
 550                spin_unlock_irqrestore(&ubd->lock, flags);
 551        }
 552}
 553
 554static irqreturn_t ubd_intr(int irq, void *dev)
 555{
 556        ubd_handler();
 557        return IRQ_HANDLED;
 558}
 559
 560/* Only changed by ubd_init, which is an initcall. */
 561static int io_pid = -1;
 562
 563static void kill_io_thread(void)
 564{
 565        if(io_pid != -1)
 566                os_kill_process(io_pid, 1);
 567}
 568
 569__uml_exitcall(kill_io_thread);
 570
 571static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 572{
 573        char *file;
 574        int fd;
 575        int err;
 576
 577        __u32 version;
 578        __u32 align;
 579        char *backing_file;
 580        time_t mtime;
 581        unsigned long long size;
 582        int sector_size;
 583        int bitmap_offset;
 584
 585        if (ubd_dev->file && ubd_dev->cow.file) {
 586                file = ubd_dev->cow.file;
 587
 588                goto out;
 589        }
 590
 591        fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
 592        if (fd < 0)
 593                return fd;
 594
 595        err = read_cow_header(file_reader, &fd, &version, &backing_file, \
 596                &mtime, &size, &sector_size, &align, &bitmap_offset);
 597        os_close_file(fd);
 598
 599        if(err == -EINVAL)
 600                file = ubd_dev->file;
 601        else
 602                file = backing_file;
 603
 604out:
 605        return os_file_size(file, size_out);
 606}
 607
 608static int read_cow_bitmap(int fd, void *buf, int offset, int len)
 609{
 610        int err;
 611
 612        err = os_pread_file(fd, buf, len, offset);
 613        if (err < 0)
 614                return err;
 615
 616        return 0;
 617}
 618
 619static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
 620{
 621        unsigned long modtime;
 622        unsigned long long actual;
 623        int err;
 624
 625        err = os_file_modtime(file, &modtime);
 626        if (err < 0) {
 627                printk(KERN_ERR "Failed to get modification time of backing "
 628                       "file \"%s\", err = %d\n", file, -err);
 629                return err;
 630        }
 631
 632        err = os_file_size(file, &actual);
 633        if (err < 0) {
 634                printk(KERN_ERR "Failed to get size of backing file \"%s\", "
 635                       "err = %d\n", file, -err);
 636                return err;
 637        }
 638
 639        if (actual != size) {
 640                /*__u64 can be a long on AMD64 and with %lu GCC complains; so
 641                 * the typecast.*/
 642                printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
 643                       "vs backing file\n", (unsigned long long) size, actual);
 644                return -EINVAL;
 645        }
 646        if (modtime != mtime) {
 647                printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
 648                       "backing file\n", mtime, modtime);
 649                return -EINVAL;
 650        }
 651        return 0;
 652}
 653
 654static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
 655{
 656        struct uml_stat buf1, buf2;
 657        int err;
 658
 659        if (from_cmdline == NULL)
 660                return 0;
 661        if (!strcmp(from_cmdline, from_cow))
 662                return 0;
 663
 664        err = os_stat_file(from_cmdline, &buf1);
 665        if (err < 0) {
 666                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
 667                       -err);
 668                return 0;
 669        }
 670        err = os_stat_file(from_cow, &buf2);
 671        if (err < 0) {
 672                printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
 673                       -err);
 674                return 1;
 675        }
 676        if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
 677                return 0;
 678
 679        printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
 680               "\"%s\" specified in COW header of \"%s\"\n",
 681               from_cmdline, from_cow, cow);
 682        return 1;
 683}
 684
 685static int open_ubd_file(char *file, struct openflags *openflags, int shared,
 686                  char **backing_file_out, int *bitmap_offset_out,
 687                  unsigned long *bitmap_len_out, int *data_offset_out,
 688                  int *create_cow_out)
 689{
 690        time_t mtime;
 691        unsigned long long size;
 692        __u32 version, align;
 693        char *backing_file;
 694        int fd, err, sectorsize, asked_switch, mode = 0644;
 695
 696        fd = os_open_file(file, *openflags, mode);
 697        if (fd < 0) {
 698                if ((fd == -ENOENT) && (create_cow_out != NULL))
 699                        *create_cow_out = 1;
 700                if (!openflags->w ||
 701                    ((fd != -EROFS) && (fd != -EACCES)))
 702                        return fd;
 703                openflags->w = 0;
 704                fd = os_open_file(file, *openflags, mode);
 705                if (fd < 0)
 706                        return fd;
 707        }
 708
 709        if (shared)
 710                printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
 711        else {
 712                err = os_lock_file(fd, openflags->w);
 713                if (err < 0) {
 714                        printk(KERN_ERR "Failed to lock '%s', err = %d\n",
 715                               file, -err);
 716                        goto out_close;
 717                }
 718        }
 719
 720        /* Successful return case! */
 721        if (backing_file_out == NULL)
 722                return fd;
 723
 724        err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
 725                              &size, &sectorsize, &align, bitmap_offset_out);
 726        if (err && (*backing_file_out != NULL)) {
 727                printk(KERN_ERR "Failed to read COW header from COW file "
 728                       "\"%s\", errno = %d\n", file, -err);
 729                goto out_close;
 730        }
 731        if (err)
 732                return fd;
 733
 734        asked_switch = path_requires_switch(*backing_file_out, backing_file,
 735                                            file);
 736
 737        /* Allow switching only if no mismatch. */
 738        if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
 739                                                   mtime)) {
 740                printk(KERN_ERR "Switching backing file to '%s'\n",
 741                       *backing_file_out);
 742                err = write_cow_header(file, fd, *backing_file_out,
 743                                       sectorsize, align, &size);
 744                if (err) {
 745                        printk(KERN_ERR "Switch failed, errno = %d\n", -err);
 746                        goto out_close;
 747                }
 748        } else {
 749                *backing_file_out = backing_file;
 750                err = backing_file_mismatch(*backing_file_out, size, mtime);
 751                if (err)
 752                        goto out_close;
 753        }
 754
 755        cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
 756                  bitmap_len_out, data_offset_out);
 757
 758        return fd;
 759 out_close:
 760        os_close_file(fd);
 761        return err;
 762}
 763
 764static int create_cow_file(char *cow_file, char *backing_file,
 765                    struct openflags flags,
 766                    int sectorsize, int alignment, int *bitmap_offset_out,
 767                    unsigned long *bitmap_len_out, int *data_offset_out)
 768{
 769        int err, fd;
 770
 771        flags.c = 1;
 772        fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
 773        if (fd < 0) {
 774                err = fd;
 775                printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
 776                       cow_file, -err);
 777                goto out;
 778        }
 779
 780        err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
 781                            bitmap_offset_out, bitmap_len_out,
 782                            data_offset_out);
 783        if (!err)
 784                return fd;
 785        os_close_file(fd);
 786 out:
 787        return err;
 788}
 789
 790static void ubd_close_dev(struct ubd *ubd_dev)
 791{
 792        os_close_file(ubd_dev->fd);
 793        if(ubd_dev->cow.file == NULL)
 794                return;
 795
 796        os_close_file(ubd_dev->cow.fd);
 797        vfree(ubd_dev->cow.bitmap);
 798        ubd_dev->cow.bitmap = NULL;
 799}
 800
 801static int ubd_open_dev(struct ubd *ubd_dev)
 802{
 803        struct openflags flags;
 804        char **back_ptr;
 805        int err, create_cow, *create_ptr;
 806        int fd;
 807
 808        ubd_dev->openflags = ubd_dev->boot_openflags;
 809        create_cow = 0;
 810        create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
 811        back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
 812
 813        fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
 814                                back_ptr, &ubd_dev->cow.bitmap_offset,
 815                                &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
 816                                create_ptr);
 817
 818        if((fd == -ENOENT) && create_cow){
 819                fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
 820                                          ubd_dev->openflags, 1 << 9, PAGE_SIZE,
 821                                          &ubd_dev->cow.bitmap_offset,
 822                                          &ubd_dev->cow.bitmap_len,
 823                                          &ubd_dev->cow.data_offset);
 824                if(fd >= 0){
 825                        printk(KERN_INFO "Creating \"%s\" as COW file for "
 826                               "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
 827                }
 828        }
 829
 830        if(fd < 0){
 831                printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
 832                       -fd);
 833                return fd;
 834        }
 835        ubd_dev->fd = fd;
 836
 837        if(ubd_dev->cow.file != NULL){
 838                blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 839
 840                err = -ENOMEM;
 841                ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
 842                if(ubd_dev->cow.bitmap == NULL){
 843                        printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
 844                        goto error;
 845                }
 846                flush_tlb_kernel_vm();
 847
 848                err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
 849                                      ubd_dev->cow.bitmap_offset,
 850                                      ubd_dev->cow.bitmap_len);
 851                if(err < 0)
 852                        goto error;
 853
 854                flags = ubd_dev->openflags;
 855                flags.w = 0;
 856                err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
 857                                    NULL, NULL, NULL, NULL);
 858                if(err < 0) goto error;
 859                ubd_dev->cow.fd = err;
 860        }
 861        return 0;
 862 error:
 863        os_close_file(ubd_dev->fd);
 864        return err;
 865}
 866
 867static void ubd_device_release(struct device *dev)
 868{
 869        struct ubd *ubd_dev = dev_get_drvdata(dev);
 870
 871        blk_cleanup_queue(ubd_dev->queue);
 872        *ubd_dev = ((struct ubd) DEFAULT_UBD);
 873}
 874
 875static int ubd_disk_register(int major, u64 size, int unit,
 876                             struct gendisk **disk_out)
 877{
 878        struct device *parent = NULL;
 879        struct gendisk *disk;
 880
 881        disk = alloc_disk(1 << UBD_SHIFT);
 882        if(disk == NULL)
 883                return -ENOMEM;
 884
 885        disk->major = major;
 886        disk->first_minor = unit << UBD_SHIFT;
 887        disk->fops = &ubd_blops;
 888        set_capacity(disk, size / 512);
 889        if (major == UBD_MAJOR)
 890                sprintf(disk->disk_name, "ubd%c", 'a' + unit);
 891        else
 892                sprintf(disk->disk_name, "ubd_fake%d", unit);
 893
 894        /* sysfs register (not for ide fake devices) */
 895        if (major == UBD_MAJOR) {
 896                ubd_devs[unit].pdev.id   = unit;
 897                ubd_devs[unit].pdev.name = DRIVER_NAME;
 898                ubd_devs[unit].pdev.dev.release = ubd_device_release;
 899                dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
 900                platform_device_register(&ubd_devs[unit].pdev);
 901                parent = &ubd_devs[unit].pdev.dev;
 902        }
 903
 904        disk->private_data = &ubd_devs[unit];
 905        disk->queue = ubd_devs[unit].queue;
 906        device_add_disk(parent, disk);
 907
 908        *disk_out = disk;
 909        return 0;
 910}
 911
 912#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
 913
 914static int ubd_add(int n, char **error_out)
 915{
 916        struct ubd *ubd_dev = &ubd_devs[n];
 917        int err = 0;
 918
 919        if(ubd_dev->file == NULL)
 920                goto out;
 921
 922        err = ubd_file_size(ubd_dev, &ubd_dev->size);
 923        if(err < 0){
 924                *error_out = "Couldn't determine size of device's file";
 925                goto out;
 926        }
 927
 928        ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
 929
 930        INIT_LIST_HEAD(&ubd_dev->restart);
 931        sg_init_table(ubd_dev->sg, MAX_SG);
 932
 933        err = -ENOMEM;
 934        ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
 935        if (ubd_dev->queue == NULL) {
 936                *error_out = "Failed to initialize device queue";
 937                goto out;
 938        }
 939        ubd_dev->queue->queuedata = ubd_dev;
 940        blk_queue_write_cache(ubd_dev->queue, true, false);
 941
 942        blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 943        err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 944        if(err){
 945                *error_out = "Failed to register device";
 946                goto out_cleanup;
 947        }
 948
 949        if (fake_major != UBD_MAJOR)
 950                ubd_disk_register(fake_major, ubd_dev->size, n,
 951                                  &fake_gendisk[n]);
 952
 953        /*
 954         * Perhaps this should also be under the "if (fake_major)" above
 955         * using the fake_disk->disk_name
 956         */
 957        if (fake_ide)
 958                make_ide_entries(ubd_gendisk[n]->disk_name);
 959
 960        err = 0;
 961out:
 962        return err;
 963
 964out_cleanup:
 965        blk_cleanup_queue(ubd_dev->queue);
 966        goto out;
 967}
 968
 969static int ubd_config(char *str, char **error_out)
 970{
 971        int n, ret;
 972
 973        /* This string is possibly broken up and stored, so it's only
 974         * freed if ubd_setup_common fails, or if only general options
 975         * were set.
 976         */
 977        str = kstrdup(str, GFP_KERNEL);
 978        if (str == NULL) {
 979                *error_out = "Failed to allocate memory";
 980                return -ENOMEM;
 981        }
 982
 983        ret = ubd_setup_common(str, &n, error_out);
 984        if (ret)
 985                goto err_free;
 986
 987        if (n == -1) {
 988                ret = 0;
 989                goto err_free;
 990        }
 991
 992        mutex_lock(&ubd_lock);
 993        ret = ubd_add(n, error_out);
 994        if (ret)
 995                ubd_devs[n].file = NULL;
 996        mutex_unlock(&ubd_lock);
 997
 998out:
 999        return ret;
1000
1001err_free:
1002        kfree(str);
1003        goto out;
1004}
1005
1006static int ubd_get_config(char *name, char *str, int size, char **error_out)
1007{
1008        struct ubd *ubd_dev;
1009        int n, len = 0;
1010
1011        n = parse_unit(&name);
1012        if((n >= MAX_DEV) || (n < 0)){
1013                *error_out = "ubd_get_config : device number out of range";
1014                return -1;
1015        }
1016
1017        ubd_dev = &ubd_devs[n];
1018        mutex_lock(&ubd_lock);
1019
1020        if(ubd_dev->file == NULL){
1021                CONFIG_CHUNK(str, size, len, "", 1);
1022                goto out;
1023        }
1024
1025        CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1026
1027        if(ubd_dev->cow.file != NULL){
1028                CONFIG_CHUNK(str, size, len, ",", 0);
1029                CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1030        }
1031        else CONFIG_CHUNK(str, size, len, "", 1);
1032
1033 out:
1034        mutex_unlock(&ubd_lock);
1035        return len;
1036}
1037
1038static int ubd_id(char **str, int *start_out, int *end_out)
1039{
1040        int n;
1041
1042        n = parse_unit(str);
1043        *start_out = 0;
1044        *end_out = MAX_DEV - 1;
1045        return n;
1046}
1047
1048static int ubd_remove(int n, char **error_out)
1049{
1050        struct gendisk *disk = ubd_gendisk[n];
1051        struct ubd *ubd_dev;
1052        int err = -ENODEV;
1053
1054        mutex_lock(&ubd_lock);
1055
1056        ubd_dev = &ubd_devs[n];
1057
1058        if(ubd_dev->file == NULL)
1059                goto out;
1060
1061        /* you cannot remove a open disk */
1062        err = -EBUSY;
1063        if(ubd_dev->count > 0)
1064                goto out;
1065
1066        ubd_gendisk[n] = NULL;
1067        if(disk != NULL){
1068                del_gendisk(disk);
1069                put_disk(disk);
1070        }
1071
1072        if(fake_gendisk[n] != NULL){
1073                del_gendisk(fake_gendisk[n]);
1074                put_disk(fake_gendisk[n]);
1075                fake_gendisk[n] = NULL;
1076        }
1077
1078        err = 0;
1079        platform_device_unregister(&ubd_dev->pdev);
1080out:
1081        mutex_unlock(&ubd_lock);
1082        return err;
1083}
1084
1085/* All these are called by mconsole in process context and without
1086 * ubd-specific locks.  The structure itself is const except for .list.
1087 */
1088static struct mc_device ubd_mc = {
1089        .list           = LIST_HEAD_INIT(ubd_mc.list),
1090        .name           = "ubd",
1091        .config         = ubd_config,
1092        .get_config     = ubd_get_config,
1093        .id             = ubd_id,
1094        .remove         = ubd_remove,
1095};
1096
1097static int __init ubd_mc_init(void)
1098{
1099        mconsole_register_dev(&ubd_mc);
1100        return 0;
1101}
1102
1103__initcall(ubd_mc_init);
1104
1105static int __init ubd0_init(void)
1106{
1107        struct ubd *ubd_dev = &ubd_devs[0];
1108
1109        mutex_lock(&ubd_lock);
1110        if(ubd_dev->file == NULL)
1111                ubd_dev->file = "root_fs";
1112        mutex_unlock(&ubd_lock);
1113
1114        return 0;
1115}
1116
1117__initcall(ubd0_init);
1118
1119/* Used in ubd_init, which is an initcall */
1120static struct platform_driver ubd_driver = {
1121        .driver = {
1122                .name  = DRIVER_NAME,
1123        },
1124};
1125
1126static int __init ubd_init(void)
1127{
1128        char *error;
1129        int i, err;
1130
1131        if (register_blkdev(UBD_MAJOR, "ubd"))
1132                return -1;
1133
1134        if (fake_major != UBD_MAJOR) {
1135                char name[sizeof("ubd_nnn\0")];
1136
1137                snprintf(name, sizeof(name), "ubd_%d", fake_major);
1138                if (register_blkdev(fake_major, "ubd"))
1139                        return -1;
1140        }
1141
1142        irq_req_buffer = kmalloc(
1143                        sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
1144                        GFP_KERNEL
1145                );
1146        irq_remainder = 0;
1147
1148        if (irq_req_buffer == NULL) {
1149                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1150                return -1;
1151        }
1152        io_req_buffer = kmalloc(
1153                        sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
1154                        GFP_KERNEL
1155                );
1156
1157        io_remainder = 0;
1158
1159        if (io_req_buffer == NULL) {
1160                printk(KERN_ERR "Failed to initialize ubd buffering\n");
1161                return -1;
1162        }
1163        platform_driver_register(&ubd_driver);
1164        mutex_lock(&ubd_lock);
1165        for (i = 0; i < MAX_DEV; i++){
1166                err = ubd_add(i, &error);
1167                if(err)
1168                        printk(KERN_ERR "Failed to initialize ubd device %d :"
1169                               "%s\n", i, error);
1170        }
1171        mutex_unlock(&ubd_lock);
1172        return 0;
1173}
1174
1175late_initcall(ubd_init);
1176
1177static int __init ubd_driver_init(void){
1178        unsigned long stack;
1179        int err;
1180
1181        /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1182        if(global_openflags.s){
1183                printk(KERN_INFO "ubd: Synchronous mode\n");
1184                /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1185                 * enough. So use anyway the io thread. */
1186        }
1187        stack = alloc_stack(0, 0);
1188        io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1189                                 &thread_fd);
1190        if(io_pid < 0){
1191                printk(KERN_ERR
1192                       "ubd : Failed to start I/O thread (errno = %d) - "
1193                       "falling back to synchronous I/O\n", -io_pid);
1194                io_pid = -1;
1195                return 0;
1196        }
1197        err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1198                             0, "ubd", ubd_devs);
1199        if(err != 0)
1200                printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1201        return 0;
1202}
1203
1204device_initcall(ubd_driver_init);
1205
1206static int ubd_open(struct block_device *bdev, fmode_t mode)
1207{
1208        struct gendisk *disk = bdev->bd_disk;
1209        struct ubd *ubd_dev = disk->private_data;
1210        int err = 0;
1211
1212        mutex_lock(&ubd_mutex);
1213        if(ubd_dev->count == 0){
1214                err = ubd_open_dev(ubd_dev);
1215                if(err){
1216                        printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1217                               disk->disk_name, ubd_dev->file, -err);
1218                        goto out;
1219                }
1220        }
1221        ubd_dev->count++;
1222        set_disk_ro(disk, !ubd_dev->openflags.w);
1223
1224        /* This should no more be needed. And it didn't work anyway to exclude
1225         * read-write remounting of filesystems.*/
1226        /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1227                if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1228                err = -EROFS;
1229        }*/
1230out:
1231        mutex_unlock(&ubd_mutex);
1232        return err;
1233}
1234
1235static void ubd_release(struct gendisk *disk, fmode_t mode)
1236{
1237        struct ubd *ubd_dev = disk->private_data;
1238
1239        mutex_lock(&ubd_mutex);
1240        if(--ubd_dev->count == 0)
1241                ubd_close_dev(ubd_dev);
1242        mutex_unlock(&ubd_mutex);
1243}
1244
1245static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1246                          __u64 *cow_offset, unsigned long *bitmap,
1247                          __u64 bitmap_offset, unsigned long *bitmap_words,
1248                          __u64 bitmap_len)
1249{
1250        __u64 sector = io_offset >> 9;
1251        int i, update_bitmap = 0;
1252
1253        for(i = 0; i < length >> 9; i++){
1254                if(cow_mask != NULL)
1255                        ubd_set_bit(i, (unsigned char *) cow_mask);
1256                if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1257                        continue;
1258
1259                update_bitmap = 1;
1260                ubd_set_bit(sector + i, (unsigned char *) bitmap);
1261        }
1262
1263        if(!update_bitmap)
1264                return;
1265
1266        *cow_offset = sector / (sizeof(unsigned long) * 8);
1267
1268        /* This takes care of the case where we're exactly at the end of the
1269         * device, and *cow_offset + 1 is off the end.  So, just back it up
1270         * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1271         * for the original diagnosis.
1272         */
1273        if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1274                                         sizeof(unsigned long)) - 1))
1275                (*cow_offset)--;
1276
1277        bitmap_words[0] = bitmap[*cow_offset];
1278        bitmap_words[1] = bitmap[*cow_offset + 1];
1279
1280        *cow_offset *= sizeof(unsigned long);
1281        *cow_offset += bitmap_offset;
1282}
1283
1284static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1285                       __u64 bitmap_offset, __u64 bitmap_len)
1286{
1287        __u64 sector = req->offset >> 9;
1288        int i;
1289
1290        if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1291                panic("Operation too long");
1292
1293        if(req->op == UBD_READ) {
1294                for(i = 0; i < req->length >> 9; i++){
1295                        if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1296                                ubd_set_bit(i, (unsigned char *)
1297                                            &req->sector_mask);
1298                }
1299        }
1300        else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1301                           &req->cow_offset, bitmap, bitmap_offset,
1302                           req->bitmap_words, bitmap_len);
1303}
1304
1305/* Called with dev->lock held */
1306static void prepare_request(struct request *req, struct io_thread_req *io_req,
1307                            unsigned long long offset, int page_offset,
1308                            int len, struct page *page)
1309{
1310        struct gendisk *disk = req->rq_disk;
1311        struct ubd *ubd_dev = disk->private_data;
1312
1313        io_req->req = req;
1314        io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1315                ubd_dev->fd;
1316        io_req->fds[1] = ubd_dev->fd;
1317        io_req->cow_offset = -1;
1318        io_req->offset = offset;
1319        io_req->length = len;
1320        io_req->error = 0;
1321        io_req->sector_mask = 0;
1322
1323        io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1324        io_req->offsets[0] = 0;
1325        io_req->offsets[1] = ubd_dev->cow.data_offset;
1326        io_req->buffer = page_address(page) + page_offset;
1327        io_req->sectorsize = 1 << 9;
1328
1329        if(ubd_dev->cow.file != NULL)
1330                cowify_req(io_req, ubd_dev->cow.bitmap,
1331                           ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1332
1333}
1334
1335/* Called with dev->lock held */
1336static void prepare_flush_request(struct request *req,
1337                                  struct io_thread_req *io_req)
1338{
1339        struct gendisk *disk = req->rq_disk;
1340        struct ubd *ubd_dev = disk->private_data;
1341
1342        io_req->req = req;
1343        io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1344                ubd_dev->fd;
1345        io_req->op = UBD_FLUSH;
1346}
1347
1348static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1349{
1350        int n = os_write_file(thread_fd, &io_req,
1351                             sizeof(io_req));
1352        if (n != sizeof(io_req)) {
1353                if (n != -EAGAIN)
1354                        printk("write to io thread failed, "
1355                               "errno = %d\n", -n);
1356                else if (list_empty(&dev->restart))
1357                        list_add(&dev->restart, &restart);
1358
1359                kfree(io_req);
1360                return false;
1361        }
1362        return true;
1363}
1364
1365/* Called with dev->lock held */
1366static void do_ubd_request(struct request_queue *q)
1367{
1368        struct io_thread_req *io_req;
1369        struct request *req;
1370
1371        while(1){
1372                struct ubd *dev = q->queuedata;
1373                if(dev->request == NULL){
1374                        struct request *req = blk_fetch_request(q);
1375                        if(req == NULL)
1376                                return;
1377
1378                        dev->request = req;
1379                        dev->rq_pos = blk_rq_pos(req);
1380                        dev->start_sg = 0;
1381                        dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1382                }
1383
1384                req = dev->request;
1385
1386                if (req_op(req) == REQ_OP_FLUSH) {
1387                        io_req = kmalloc(sizeof(struct io_thread_req),
1388                                         GFP_ATOMIC);
1389                        if (io_req == NULL) {
1390                                if (list_empty(&dev->restart))
1391                                        list_add(&dev->restart, &restart);
1392                                return;
1393                        }
1394                        prepare_flush_request(req, io_req);
1395                        if (submit_request(io_req, dev) == false)
1396                                return;
1397                }
1398
1399                while(dev->start_sg < dev->end_sg){
1400                        struct scatterlist *sg = &dev->sg[dev->start_sg];
1401
1402                        io_req = kmalloc(sizeof(struct io_thread_req),
1403                                         GFP_ATOMIC);
1404                        if(io_req == NULL){
1405                                if(list_empty(&dev->restart))
1406                                        list_add(&dev->restart, &restart);
1407                                return;
1408                        }
1409                        prepare_request(req, io_req,
1410                                        (unsigned long long)dev->rq_pos << 9,
1411                                        sg->offset, sg->length, sg_page(sg));
1412
1413                        if (submit_request(io_req, dev) == false)
1414                                return;
1415
1416                        dev->rq_pos += sg->length >> 9;
1417                        dev->start_sg++;
1418                }
1419                dev->end_sg = 0;
1420                dev->request = NULL;
1421        }
1422}
1423
1424static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1425{
1426        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1427
1428        geo->heads = 128;
1429        geo->sectors = 32;
1430        geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1431        return 0;
1432}
1433
1434static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1435                     unsigned int cmd, unsigned long arg)
1436{
1437        struct ubd *ubd_dev = bdev->bd_disk->private_data;
1438        u16 ubd_id[ATA_ID_WORDS];
1439
1440        switch (cmd) {
1441                struct cdrom_volctrl volume;
1442        case HDIO_GET_IDENTITY:
1443                memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1444                ubd_id[ATA_ID_CYLS]     = ubd_dev->size / (128 * 32 * 512);
1445                ubd_id[ATA_ID_HEADS]    = 128;
1446                ubd_id[ATA_ID_SECTORS]  = 32;
1447                if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1448                                 sizeof(ubd_id)))
1449                        return -EFAULT;
1450                return 0;
1451
1452        case CDROMVOLREAD:
1453                if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1454                        return -EFAULT;
1455                volume.channel0 = 255;
1456                volume.channel1 = 255;
1457                volume.channel2 = 255;
1458                volume.channel3 = 255;
1459                if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1460                        return -EFAULT;
1461                return 0;
1462        }
1463        return -EINVAL;
1464}
1465
1466static int update_bitmap(struct io_thread_req *req)
1467{
1468        int n;
1469
1470        if(req->cow_offset == -1)
1471                return 0;
1472
1473        n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1474                          sizeof(req->bitmap_words), req->cow_offset);
1475        if(n != sizeof(req->bitmap_words)){
1476                printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1477                       req->fds[1]);
1478                return 1;
1479        }
1480
1481        return 0;
1482}
1483
1484static void do_io(struct io_thread_req *req)
1485{
1486        char *buf;
1487        unsigned long len;
1488        int n, nsectors, start, end, bit;
1489        __u64 off;
1490
1491        if (req->op == UBD_FLUSH) {
1492                /* fds[0] is always either the rw image or our cow file */
1493                n = os_sync_file(req->fds[0]);
1494                if (n != 0) {
1495                        printk("do_io - sync failed err = %d "
1496                               "fd = %d\n", -n, req->fds[0]);
1497                        req->error = 1;
1498                }
1499                return;
1500        }
1501
1502        nsectors = req->length / req->sectorsize;
1503        start = 0;
1504        do {
1505                bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1506                end = start;
1507                while((end < nsectors) &&
1508                      (ubd_test_bit(end, (unsigned char *)
1509                                    &req->sector_mask) == bit))
1510                        end++;
1511
1512                off = req->offset + req->offsets[bit] +
1513                        start * req->sectorsize;
1514                len = (end - start) * req->sectorsize;
1515                buf = &req->buffer[start * req->sectorsize];
1516
1517                if(req->op == UBD_READ){
1518                        n = 0;
1519                        do {
1520                                buf = &buf[n];
1521                                len -= n;
1522                                n = os_pread_file(req->fds[bit], buf, len, off);
1523                                if (n < 0) {
1524                                        printk("do_io - read failed, err = %d "
1525                                               "fd = %d\n", -n, req->fds[bit]);
1526                                        req->error = 1;
1527                                        return;
1528                                }
1529                        } while((n < len) && (n != 0));
1530                        if (n < len) memset(&buf[n], 0, len - n);
1531                } else {
1532                        n = os_pwrite_file(req->fds[bit], buf, len, off);
1533                        if(n != len){
1534                                printk("do_io - write failed err = %d "
1535                                       "fd = %d\n", -n, req->fds[bit]);
1536                                req->error = 1;
1537                                return;
1538                        }
1539                }
1540
1541                start = end;
1542        } while(start < nsectors);
1543
1544        req->error = update_bitmap(req);
1545}
1546
1547/* Changed in start_io_thread, which is serialized by being called only
1548 * from ubd_init, which is an initcall.
1549 */
1550int kernel_fd = -1;
1551
1552/* Only changed by the io thread. XXX: currently unused. */
1553static int io_count = 0;
1554
1555int io_thread(void *arg)
1556{
1557        int n, count, written, res;
1558
1559        os_fix_helper_signals();
1560
1561        while(1){
1562                n = bulk_req_safe_read(
1563                        kernel_fd,
1564                        io_req_buffer,
1565                        &io_remainder,
1566                        &io_remainder_size,
1567                        UBD_REQ_BUFFER_SIZE
1568                );
1569                if (n < 0) {
1570                        if (n == -EAGAIN) {
1571                                ubd_read_poll(-1);
1572                                continue;
1573                        } else {
1574                                printk("io_thread - read failed, fd = %d, "
1575                                       "err = %d,"
1576                                       "reminder = %d\n",
1577                                       kernel_fd, -n, io_remainder_size);
1578                        }
1579                }
1580
1581                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1582                        io_count++;
1583                        do_io((*io_req_buffer)[count]);
1584                }
1585
1586                written = 0;
1587
1588                do {
1589                        res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1590                        if (res >= 0) {
1591                                written += res;
1592                        } else {
1593                                if (res != -EAGAIN) {
1594                                        printk("io_thread - write failed, fd = %d, "
1595                                               "err = %d\n", kernel_fd, -n);
1596                                }
1597                        }
1598                        if (written < n) {
1599                                ubd_write_poll(-1);
1600                        }
1601                } while (written < n);
1602        }
1603
1604        return 0;
1605}
1606