1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#define UBD_SHIFT 4
22
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/blkdev.h>
26#include <linux/ata.h>
27#include <linux/hdreg.h>
28#include <linux/cdrom.h>
29#include <linux/proc_fs.h>
30#include <linux/seq_file.h>
31#include <linux/ctype.h>
32#include <linux/slab.h>
33#include <linux/vmalloc.h>
34#include <linux/platform_device.h>
35#include <linux/scatterlist.h>
36#include <asm/tlbflush.h>
37#include <kern_util.h>
38#include "mconsole_kern.h"
39#include <init.h>
40#include <irq_kern.h>
41#include "ubd.h"
42#include <os.h>
43#include "cow.h"
44
45enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
46
47struct io_thread_req {
48 struct request *req;
49 enum ubd_req op;
50 int fds[2];
51 unsigned long offsets[2];
52 unsigned long long offset;
53 unsigned long length;
54 char *buffer;
55 int sectorsize;
56 unsigned long sector_mask;
57 unsigned long long cow_offset;
58 unsigned long bitmap_words[2];
59 int error;
60};
61
62
63static struct io_thread_req * (*irq_req_buffer)[];
64static struct io_thread_req *irq_remainder;
65static int irq_remainder_size;
66
67static struct io_thread_req * (*io_req_buffer)[];
68static struct io_thread_req *io_remainder;
69static int io_remainder_size;
70
71
72
73static inline int ubd_test_bit(__u64 bit, unsigned char *data)
74{
75 __u64 n;
76 int bits, off;
77
78 bits = sizeof(data[0]) * 8;
79 n = bit / bits;
80 off = bit % bits;
81 return (data[n] & (1 << off)) != 0;
82}
83
84static inline void ubd_set_bit(__u64 bit, unsigned char *data)
85{
86 __u64 n;
87 int bits, off;
88
89 bits = sizeof(data[0]) * 8;
90 n = bit / bits;
91 off = bit % bits;
92 data[n] |= (1 << off);
93}
94
95
96#define DRIVER_NAME "uml-blkdev"
97
98static DEFINE_MUTEX(ubd_lock);
99static DEFINE_MUTEX(ubd_mutex);
100
101static int ubd_open(struct block_device *bdev, fmode_t mode);
102static void ubd_release(struct gendisk *disk, fmode_t mode);
103static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
104 unsigned int cmd, unsigned long arg);
105static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
106
107#define MAX_DEV (16)
108
109static const struct block_device_operations ubd_blops = {
110 .owner = THIS_MODULE,
111 .open = ubd_open,
112 .release = ubd_release,
113 .ioctl = ubd_ioctl,
114 .getgeo = ubd_getgeo,
115};
116
117
118static int fake_major = UBD_MAJOR;
119static struct gendisk *ubd_gendisk[MAX_DEV];
120static struct gendisk *fake_gendisk[MAX_DEV];
121
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
132
133 char *file;
134
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
139 int data_offset;
140};
141
142#define MAX_SG 64
143
144struct ubd {
145 struct list_head restart;
146
147
148 char *file;
149 int count;
150 int fd;
151 __u64 size;
152 struct openflags boot_openflags;
153 struct openflags openflags;
154 unsigned shared:1;
155 unsigned no_cow:1;
156 struct cow cow;
157 struct platform_device pdev;
158 struct request_queue *queue;
159 spinlock_t lock;
160 struct scatterlist sg[MAX_SG];
161 struct request *request;
162 int start_sg, end_sg;
163 sector_t rq_pos;
164};
165
166#define DEFAULT_COW { \
167 .file = NULL, \
168 .fd = -1, \
169 .bitmap = NULL, \
170 .bitmap_offset = 0, \
171 .data_offset = 0, \
172}
173
174#define DEFAULT_UBD { \
175 .file = NULL, \
176 .count = 0, \
177 .fd = -1, \
178 .size = -1, \
179 .boot_openflags = OPEN_FLAGS, \
180 .openflags = OPEN_FLAGS, \
181 .no_cow = 0, \
182 .shared = 0, \
183 .cow = DEFAULT_COW, \
184 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
185 .request = NULL, \
186 .start_sg = 0, \
187 .end_sg = 0, \
188 .rq_pos = 0, \
189}
190
191
192static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
193
194
195static int fake_ide = 0;
196static struct proc_dir_entry *proc_ide_root = NULL;
197static struct proc_dir_entry *proc_ide = NULL;
198
199static void make_proc_ide(void)
200{
201 proc_ide_root = proc_mkdir("ide", NULL);
202 proc_ide = proc_mkdir("ide0", proc_ide_root);
203}
204
205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
206{
207 seq_puts(m, "disk\n");
208 return 0;
209}
210
211static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
212{
213 return single_open(file, fake_ide_media_proc_show, NULL);
214}
215
216static const struct file_operations fake_ide_media_proc_fops = {
217 .owner = THIS_MODULE,
218 .open = fake_ide_media_proc_open,
219 .read = seq_read,
220 .llseek = seq_lseek,
221 .release = single_release,
222};
223
224static void make_ide_entries(const char *dev_name)
225{
226 struct proc_dir_entry *dir, *ent;
227 char name[64];
228
229 if(proc_ide_root == NULL) make_proc_ide();
230
231 dir = proc_mkdir(dev_name, proc_ide);
232 if(!dir) return;
233
234 ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
235 if(!ent) return;
236 snprintf(name, sizeof(name), "ide0/%s", dev_name);
237 proc_symlink(dev_name, proc_ide_root, name);
238}
239
240static int fake_ide_setup(char *str)
241{
242 fake_ide = 1;
243 return 1;
244}
245
246__setup("fake_ide", fake_ide_setup);
247
248__uml_help(fake_ide_setup,
249"fake_ide\n"
250" Create ide0 entries that map onto ubd devices.\n\n"
251);
252
253static int parse_unit(char **ptr)
254{
255 char *str = *ptr, *end;
256 int n = -1;
257
258 if(isdigit(*str)) {
259 n = simple_strtoul(str, &end, 0);
260 if(end == str)
261 return -1;
262 *ptr = end;
263 }
264 else if (('a' <= *str) && (*str <= 'z')) {
265 n = *str - 'a';
266 str++;
267 *ptr = str;
268 }
269 return n;
270}
271
272
273
274
275
276static int ubd_setup_common(char *str, int *index_out, char **error_out)
277{
278 struct ubd *ubd_dev;
279 struct openflags flags = global_openflags;
280 char *backing_file;
281 int n, err = 0, i;
282
283 if(index_out) *index_out = -1;
284 n = *str;
285 if(n == '='){
286 char *end;
287 int major;
288
289 str++;
290 if(!strcmp(str, "sync")){
291 global_openflags = of_sync(global_openflags);
292 goto out1;
293 }
294
295 err = -EINVAL;
296 major = simple_strtoul(str, &end, 0);
297 if((*end != '\0') || (end == str)){
298 *error_out = "Didn't parse major number";
299 goto out1;
300 }
301
302 mutex_lock(&ubd_lock);
303 if (fake_major != UBD_MAJOR) {
304 *error_out = "Can't assign a fake major twice";
305 goto out1;
306 }
307
308 fake_major = major;
309
310 printk(KERN_INFO "Setting extra ubd major number to %d\n",
311 major);
312 err = 0;
313 out1:
314 mutex_unlock(&ubd_lock);
315 return err;
316 }
317
318 n = parse_unit(&str);
319 if(n < 0){
320 *error_out = "Couldn't parse device number";
321 return -EINVAL;
322 }
323 if(n >= MAX_DEV){
324 *error_out = "Device number out of range";
325 return 1;
326 }
327
328 err = -EBUSY;
329 mutex_lock(&ubd_lock);
330
331 ubd_dev = &ubd_devs[n];
332 if(ubd_dev->file != NULL){
333 *error_out = "Device is already configured";
334 goto out;
335 }
336
337 if (index_out)
338 *index_out = n;
339
340 err = -EINVAL;
341 for (i = 0; i < sizeof("rscd="); i++) {
342 switch (*str) {
343 case 'r':
344 flags.w = 0;
345 break;
346 case 's':
347 flags.s = 1;
348 break;
349 case 'd':
350 ubd_dev->no_cow = 1;
351 break;
352 case 'c':
353 ubd_dev->shared = 1;
354 break;
355 case '=':
356 str++;
357 goto break_loop;
358 default:
359 *error_out = "Expected '=' or flag letter "
360 "(r, s, c, or d)";
361 goto out;
362 }
363 str++;
364 }
365
366 if (*str == '=')
367 *error_out = "Too many flags specified";
368 else
369 *error_out = "Missing '='";
370 goto out;
371
372break_loop:
373 backing_file = strchr(str, ',');
374
375 if (backing_file == NULL)
376 backing_file = strchr(str, ':');
377
378 if(backing_file != NULL){
379 if(ubd_dev->no_cow){
380 *error_out = "Can't specify both 'd' and a cow file";
381 goto out;
382 }
383 else {
384 *backing_file = '\0';
385 backing_file++;
386 }
387 }
388 err = 0;
389 ubd_dev->file = str;
390 ubd_dev->cow.file = backing_file;
391 ubd_dev->boot_openflags = flags;
392out:
393 mutex_unlock(&ubd_lock);
394 return err;
395}
396
397static int ubd_setup(char *str)
398{
399 char *error;
400 int err;
401
402 err = ubd_setup_common(str, NULL, &error);
403 if(err)
404 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
405 "%s\n", str, error);
406 return 1;
407}
408
409__setup("ubd", ubd_setup);
410__uml_help(ubd_setup,
411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412" This is used to associate a device with a file in the underlying\n"
413" filesystem. When specifying two filenames, the first one is the\n"
414" COW name and the second is the backing file name. As separator you can\n"
415" use either a ':' or a ',': the first one allows writing things like;\n"
416" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417" while with a ',' the shell would not expand the 2nd '~'.\n"
418" When using only one filename, UML will detect whether to treat it like\n"
419" a COW file or a backing file. To override this detection, add the 'd'\n"
420" flag:\n"
421" ubd0d=BackingFile\n"
422" Usually, there is a filesystem in the file, but \n"
423" that's not required. Swap devices containing swap files can be\n"
424" specified like this. Also, a file which doesn't contain a\n"
425" filesystem can have its contents read in the virtual \n"
426" machine by running 'dd' on the device. <n> must be in the range\n"
427" 0 to 7. Appending an 'r' to the number will cause that device\n"
428" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
429" an 's' will cause data to be written to disk on the host immediately.\n"
430" 'c' will cause the device to be treated as being shared between multiple\n"
431" UMLs and file locking will be turned off - this is appropriate for a\n"
432" cluster filesystem and inappropriate at almost all other times.\n\n"
433);
434
435static int udb_setup(char *str)
436{
437 printk("udb%s specified on command line is almost certainly a ubd -> "
438 "udb TYPO\n", str);
439 return 1;
440}
441
442__setup("udb", udb_setup);
443__uml_help(udb_setup,
444"udb\n"
445" This option is here solely to catch ubd -> udb typos, which can be\n"
446" to impossible to catch visually unless you specifically look for\n"
447" them. The only result of any option starting with 'udb' is an error\n"
448" in the boot output.\n\n"
449);
450
451static void do_ubd_request(struct request_queue * q);
452
453
454static int thread_fd = -1;
455static LIST_HEAD(restart);
456
457
458
459
460
461static int bulk_req_safe_read(
462 int fd,
463 struct io_thread_req * (*request_buffer)[],
464 struct io_thread_req **remainder,
465 int *remainder_size,
466 int max_recs
467 )
468{
469 int n = 0;
470 int res = 0;
471
472 if (*remainder_size > 0) {
473 memmove(
474 (char *) request_buffer,
475 (char *) remainder, *remainder_size
476 );
477 n = *remainder_size;
478 }
479
480 res = os_read_file(
481 fd,
482 ((char *) request_buffer) + *remainder_size,
483 sizeof(struct io_thread_req *)*max_recs
484 - *remainder_size
485 );
486 if (res > 0) {
487 n += res;
488 if ((n % sizeof(struct io_thread_req *)) > 0) {
489
490
491
492
493
494 *remainder_size = n % sizeof(struct io_thread_req *);
495 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
496 memmove(
497 remainder,
498 ((char *) request_buffer) +
499 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
500 *remainder_size
501 );
502 n = n - *remainder_size;
503 }
504 } else {
505 n = res;
506 }
507 return n;
508}
509
510
511static void ubd_handler(void)
512{
513 struct ubd *ubd;
514 struct list_head *list, *next_ele;
515 unsigned long flags;
516 int n;
517 int count;
518
519 while(1){
520 n = bulk_req_safe_read(
521 thread_fd,
522 irq_req_buffer,
523 &irq_remainder,
524 &irq_remainder_size,
525 UBD_REQ_BUFFER_SIZE
526 );
527 if (n < 0) {
528 if(n == -EAGAIN)
529 break;
530 printk(KERN_ERR "spurious interrupt in ubd_handler, "
531 "err = %d\n", -n);
532 return;
533 }
534 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
535 blk_end_request(
536 (*irq_req_buffer)[count]->req,
537 0,
538 (*irq_req_buffer)[count]->length
539 );
540 kfree((*irq_req_buffer)[count]);
541 }
542 }
543 reactivate_fd(thread_fd, UBD_IRQ);
544
545 list_for_each_safe(list, next_ele, &restart){
546 ubd = container_of(list, struct ubd, restart);
547 list_del_init(&ubd->restart);
548 spin_lock_irqsave(&ubd->lock, flags);
549 do_ubd_request(ubd->queue);
550 spin_unlock_irqrestore(&ubd->lock, flags);
551 }
552}
553
554static irqreturn_t ubd_intr(int irq, void *dev)
555{
556 ubd_handler();
557 return IRQ_HANDLED;
558}
559
560
561static int io_pid = -1;
562
563static void kill_io_thread(void)
564{
565 if(io_pid != -1)
566 os_kill_process(io_pid, 1);
567}
568
569__uml_exitcall(kill_io_thread);
570
571static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
572{
573 char *file;
574 int fd;
575 int err;
576
577 __u32 version;
578 __u32 align;
579 char *backing_file;
580 time_t mtime;
581 unsigned long long size;
582 int sector_size;
583 int bitmap_offset;
584
585 if (ubd_dev->file && ubd_dev->cow.file) {
586 file = ubd_dev->cow.file;
587
588 goto out;
589 }
590
591 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
592 if (fd < 0)
593 return fd;
594
595 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
596 &mtime, &size, §or_size, &align, &bitmap_offset);
597 os_close_file(fd);
598
599 if(err == -EINVAL)
600 file = ubd_dev->file;
601 else
602 file = backing_file;
603
604out:
605 return os_file_size(file, size_out);
606}
607
608static int read_cow_bitmap(int fd, void *buf, int offset, int len)
609{
610 int err;
611
612 err = os_pread_file(fd, buf, len, offset);
613 if (err < 0)
614 return err;
615
616 return 0;
617}
618
619static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
620{
621 unsigned long modtime;
622 unsigned long long actual;
623 int err;
624
625 err = os_file_modtime(file, &modtime);
626 if (err < 0) {
627 printk(KERN_ERR "Failed to get modification time of backing "
628 "file \"%s\", err = %d\n", file, -err);
629 return err;
630 }
631
632 err = os_file_size(file, &actual);
633 if (err < 0) {
634 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
635 "err = %d\n", file, -err);
636 return err;
637 }
638
639 if (actual != size) {
640
641
642 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
643 "vs backing file\n", (unsigned long long) size, actual);
644 return -EINVAL;
645 }
646 if (modtime != mtime) {
647 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
648 "backing file\n", mtime, modtime);
649 return -EINVAL;
650 }
651 return 0;
652}
653
654static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
655{
656 struct uml_stat buf1, buf2;
657 int err;
658
659 if (from_cmdline == NULL)
660 return 0;
661 if (!strcmp(from_cmdline, from_cow))
662 return 0;
663
664 err = os_stat_file(from_cmdline, &buf1);
665 if (err < 0) {
666 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
667 -err);
668 return 0;
669 }
670 err = os_stat_file(from_cow, &buf2);
671 if (err < 0) {
672 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
673 -err);
674 return 1;
675 }
676 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
677 return 0;
678
679 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
680 "\"%s\" specified in COW header of \"%s\"\n",
681 from_cmdline, from_cow, cow);
682 return 1;
683}
684
685static int open_ubd_file(char *file, struct openflags *openflags, int shared,
686 char **backing_file_out, int *bitmap_offset_out,
687 unsigned long *bitmap_len_out, int *data_offset_out,
688 int *create_cow_out)
689{
690 time_t mtime;
691 unsigned long long size;
692 __u32 version, align;
693 char *backing_file;
694 int fd, err, sectorsize, asked_switch, mode = 0644;
695
696 fd = os_open_file(file, *openflags, mode);
697 if (fd < 0) {
698 if ((fd == -ENOENT) && (create_cow_out != NULL))
699 *create_cow_out = 1;
700 if (!openflags->w ||
701 ((fd != -EROFS) && (fd != -EACCES)))
702 return fd;
703 openflags->w = 0;
704 fd = os_open_file(file, *openflags, mode);
705 if (fd < 0)
706 return fd;
707 }
708
709 if (shared)
710 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
711 else {
712 err = os_lock_file(fd, openflags->w);
713 if (err < 0) {
714 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
715 file, -err);
716 goto out_close;
717 }
718 }
719
720
721 if (backing_file_out == NULL)
722 return fd;
723
724 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
725 &size, §orsize, &align, bitmap_offset_out);
726 if (err && (*backing_file_out != NULL)) {
727 printk(KERN_ERR "Failed to read COW header from COW file "
728 "\"%s\", errno = %d\n", file, -err);
729 goto out_close;
730 }
731 if (err)
732 return fd;
733
734 asked_switch = path_requires_switch(*backing_file_out, backing_file,
735 file);
736
737
738 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
739 mtime)) {
740 printk(KERN_ERR "Switching backing file to '%s'\n",
741 *backing_file_out);
742 err = write_cow_header(file, fd, *backing_file_out,
743 sectorsize, align, &size);
744 if (err) {
745 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
746 goto out_close;
747 }
748 } else {
749 *backing_file_out = backing_file;
750 err = backing_file_mismatch(*backing_file_out, size, mtime);
751 if (err)
752 goto out_close;
753 }
754
755 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
756 bitmap_len_out, data_offset_out);
757
758 return fd;
759 out_close:
760 os_close_file(fd);
761 return err;
762}
763
764static int create_cow_file(char *cow_file, char *backing_file,
765 struct openflags flags,
766 int sectorsize, int alignment, int *bitmap_offset_out,
767 unsigned long *bitmap_len_out, int *data_offset_out)
768{
769 int err, fd;
770
771 flags.c = 1;
772 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
773 if (fd < 0) {
774 err = fd;
775 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
776 cow_file, -err);
777 goto out;
778 }
779
780 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
781 bitmap_offset_out, bitmap_len_out,
782 data_offset_out);
783 if (!err)
784 return fd;
785 os_close_file(fd);
786 out:
787 return err;
788}
789
790static void ubd_close_dev(struct ubd *ubd_dev)
791{
792 os_close_file(ubd_dev->fd);
793 if(ubd_dev->cow.file == NULL)
794 return;
795
796 os_close_file(ubd_dev->cow.fd);
797 vfree(ubd_dev->cow.bitmap);
798 ubd_dev->cow.bitmap = NULL;
799}
800
801static int ubd_open_dev(struct ubd *ubd_dev)
802{
803 struct openflags flags;
804 char **back_ptr;
805 int err, create_cow, *create_ptr;
806 int fd;
807
808 ubd_dev->openflags = ubd_dev->boot_openflags;
809 create_cow = 0;
810 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
811 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
812
813 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
814 back_ptr, &ubd_dev->cow.bitmap_offset,
815 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
816 create_ptr);
817
818 if((fd == -ENOENT) && create_cow){
819 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
820 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
821 &ubd_dev->cow.bitmap_offset,
822 &ubd_dev->cow.bitmap_len,
823 &ubd_dev->cow.data_offset);
824 if(fd >= 0){
825 printk(KERN_INFO "Creating \"%s\" as COW file for "
826 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
827 }
828 }
829
830 if(fd < 0){
831 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
832 -fd);
833 return fd;
834 }
835 ubd_dev->fd = fd;
836
837 if(ubd_dev->cow.file != NULL){
838 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
839
840 err = -ENOMEM;
841 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
842 if(ubd_dev->cow.bitmap == NULL){
843 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
844 goto error;
845 }
846 flush_tlb_kernel_vm();
847
848 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
849 ubd_dev->cow.bitmap_offset,
850 ubd_dev->cow.bitmap_len);
851 if(err < 0)
852 goto error;
853
854 flags = ubd_dev->openflags;
855 flags.w = 0;
856 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
857 NULL, NULL, NULL, NULL);
858 if(err < 0) goto error;
859 ubd_dev->cow.fd = err;
860 }
861 return 0;
862 error:
863 os_close_file(ubd_dev->fd);
864 return err;
865}
866
867static void ubd_device_release(struct device *dev)
868{
869 struct ubd *ubd_dev = dev_get_drvdata(dev);
870
871 blk_cleanup_queue(ubd_dev->queue);
872 *ubd_dev = ((struct ubd) DEFAULT_UBD);
873}
874
875static int ubd_disk_register(int major, u64 size, int unit,
876 struct gendisk **disk_out)
877{
878 struct device *parent = NULL;
879 struct gendisk *disk;
880
881 disk = alloc_disk(1 << UBD_SHIFT);
882 if(disk == NULL)
883 return -ENOMEM;
884
885 disk->major = major;
886 disk->first_minor = unit << UBD_SHIFT;
887 disk->fops = &ubd_blops;
888 set_capacity(disk, size / 512);
889 if (major == UBD_MAJOR)
890 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
891 else
892 sprintf(disk->disk_name, "ubd_fake%d", unit);
893
894
895 if (major == UBD_MAJOR) {
896 ubd_devs[unit].pdev.id = unit;
897 ubd_devs[unit].pdev.name = DRIVER_NAME;
898 ubd_devs[unit].pdev.dev.release = ubd_device_release;
899 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
900 platform_device_register(&ubd_devs[unit].pdev);
901 parent = &ubd_devs[unit].pdev.dev;
902 }
903
904 disk->private_data = &ubd_devs[unit];
905 disk->queue = ubd_devs[unit].queue;
906 device_add_disk(parent, disk);
907
908 *disk_out = disk;
909 return 0;
910}
911
912#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
913
914static int ubd_add(int n, char **error_out)
915{
916 struct ubd *ubd_dev = &ubd_devs[n];
917 int err = 0;
918
919 if(ubd_dev->file == NULL)
920 goto out;
921
922 err = ubd_file_size(ubd_dev, &ubd_dev->size);
923 if(err < 0){
924 *error_out = "Couldn't determine size of device's file";
925 goto out;
926 }
927
928 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
929
930 INIT_LIST_HEAD(&ubd_dev->restart);
931 sg_init_table(ubd_dev->sg, MAX_SG);
932
933 err = -ENOMEM;
934 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
935 if (ubd_dev->queue == NULL) {
936 *error_out = "Failed to initialize device queue";
937 goto out;
938 }
939 ubd_dev->queue->queuedata = ubd_dev;
940 blk_queue_write_cache(ubd_dev->queue, true, false);
941
942 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
943 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
944 if(err){
945 *error_out = "Failed to register device";
946 goto out_cleanup;
947 }
948
949 if (fake_major != UBD_MAJOR)
950 ubd_disk_register(fake_major, ubd_dev->size, n,
951 &fake_gendisk[n]);
952
953
954
955
956
957 if (fake_ide)
958 make_ide_entries(ubd_gendisk[n]->disk_name);
959
960 err = 0;
961out:
962 return err;
963
964out_cleanup:
965 blk_cleanup_queue(ubd_dev->queue);
966 goto out;
967}
968
969static int ubd_config(char *str, char **error_out)
970{
971 int n, ret;
972
973
974
975
976
977 str = kstrdup(str, GFP_KERNEL);
978 if (str == NULL) {
979 *error_out = "Failed to allocate memory";
980 return -ENOMEM;
981 }
982
983 ret = ubd_setup_common(str, &n, error_out);
984 if (ret)
985 goto err_free;
986
987 if (n == -1) {
988 ret = 0;
989 goto err_free;
990 }
991
992 mutex_lock(&ubd_lock);
993 ret = ubd_add(n, error_out);
994 if (ret)
995 ubd_devs[n].file = NULL;
996 mutex_unlock(&ubd_lock);
997
998out:
999 return ret;
1000
1001err_free:
1002 kfree(str);
1003 goto out;
1004}
1005
1006static int ubd_get_config(char *name, char *str, int size, char **error_out)
1007{
1008 struct ubd *ubd_dev;
1009 int n, len = 0;
1010
1011 n = parse_unit(&name);
1012 if((n >= MAX_DEV) || (n < 0)){
1013 *error_out = "ubd_get_config : device number out of range";
1014 return -1;
1015 }
1016
1017 ubd_dev = &ubd_devs[n];
1018 mutex_lock(&ubd_lock);
1019
1020 if(ubd_dev->file == NULL){
1021 CONFIG_CHUNK(str, size, len, "", 1);
1022 goto out;
1023 }
1024
1025 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1026
1027 if(ubd_dev->cow.file != NULL){
1028 CONFIG_CHUNK(str, size, len, ",", 0);
1029 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1030 }
1031 else CONFIG_CHUNK(str, size, len, "", 1);
1032
1033 out:
1034 mutex_unlock(&ubd_lock);
1035 return len;
1036}
1037
1038static int ubd_id(char **str, int *start_out, int *end_out)
1039{
1040 int n;
1041
1042 n = parse_unit(str);
1043 *start_out = 0;
1044 *end_out = MAX_DEV - 1;
1045 return n;
1046}
1047
1048static int ubd_remove(int n, char **error_out)
1049{
1050 struct gendisk *disk = ubd_gendisk[n];
1051 struct ubd *ubd_dev;
1052 int err = -ENODEV;
1053
1054 mutex_lock(&ubd_lock);
1055
1056 ubd_dev = &ubd_devs[n];
1057
1058 if(ubd_dev->file == NULL)
1059 goto out;
1060
1061
1062 err = -EBUSY;
1063 if(ubd_dev->count > 0)
1064 goto out;
1065
1066 ubd_gendisk[n] = NULL;
1067 if(disk != NULL){
1068 del_gendisk(disk);
1069 put_disk(disk);
1070 }
1071
1072 if(fake_gendisk[n] != NULL){
1073 del_gendisk(fake_gendisk[n]);
1074 put_disk(fake_gendisk[n]);
1075 fake_gendisk[n] = NULL;
1076 }
1077
1078 err = 0;
1079 platform_device_unregister(&ubd_dev->pdev);
1080out:
1081 mutex_unlock(&ubd_lock);
1082 return err;
1083}
1084
1085
1086
1087
1088static struct mc_device ubd_mc = {
1089 .list = LIST_HEAD_INIT(ubd_mc.list),
1090 .name = "ubd",
1091 .config = ubd_config,
1092 .get_config = ubd_get_config,
1093 .id = ubd_id,
1094 .remove = ubd_remove,
1095};
1096
1097static int __init ubd_mc_init(void)
1098{
1099 mconsole_register_dev(&ubd_mc);
1100 return 0;
1101}
1102
1103__initcall(ubd_mc_init);
1104
1105static int __init ubd0_init(void)
1106{
1107 struct ubd *ubd_dev = &ubd_devs[0];
1108
1109 mutex_lock(&ubd_lock);
1110 if(ubd_dev->file == NULL)
1111 ubd_dev->file = "root_fs";
1112 mutex_unlock(&ubd_lock);
1113
1114 return 0;
1115}
1116
1117__initcall(ubd0_init);
1118
1119
1120static struct platform_driver ubd_driver = {
1121 .driver = {
1122 .name = DRIVER_NAME,
1123 },
1124};
1125
1126static int __init ubd_init(void)
1127{
1128 char *error;
1129 int i, err;
1130
1131 if (register_blkdev(UBD_MAJOR, "ubd"))
1132 return -1;
1133
1134 if (fake_major != UBD_MAJOR) {
1135 char name[sizeof("ubd_nnn\0")];
1136
1137 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1138 if (register_blkdev(fake_major, "ubd"))
1139 return -1;
1140 }
1141
1142 irq_req_buffer = kmalloc(
1143 sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
1144 GFP_KERNEL
1145 );
1146 irq_remainder = 0;
1147
1148 if (irq_req_buffer == NULL) {
1149 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1150 return -1;
1151 }
1152 io_req_buffer = kmalloc(
1153 sizeof(struct io_thread_req *) * UBD_REQ_BUFFER_SIZE,
1154 GFP_KERNEL
1155 );
1156
1157 io_remainder = 0;
1158
1159 if (io_req_buffer == NULL) {
1160 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1161 return -1;
1162 }
1163 platform_driver_register(&ubd_driver);
1164 mutex_lock(&ubd_lock);
1165 for (i = 0; i < MAX_DEV; i++){
1166 err = ubd_add(i, &error);
1167 if(err)
1168 printk(KERN_ERR "Failed to initialize ubd device %d :"
1169 "%s\n", i, error);
1170 }
1171 mutex_unlock(&ubd_lock);
1172 return 0;
1173}
1174
1175late_initcall(ubd_init);
1176
1177static int __init ubd_driver_init(void){
1178 unsigned long stack;
1179 int err;
1180
1181
1182 if(global_openflags.s){
1183 printk(KERN_INFO "ubd: Synchronous mode\n");
1184
1185
1186 }
1187 stack = alloc_stack(0, 0);
1188 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1189 &thread_fd);
1190 if(io_pid < 0){
1191 printk(KERN_ERR
1192 "ubd : Failed to start I/O thread (errno = %d) - "
1193 "falling back to synchronous I/O\n", -io_pid);
1194 io_pid = -1;
1195 return 0;
1196 }
1197 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1198 0, "ubd", ubd_devs);
1199 if(err != 0)
1200 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1201 return 0;
1202}
1203
1204device_initcall(ubd_driver_init);
1205
1206static int ubd_open(struct block_device *bdev, fmode_t mode)
1207{
1208 struct gendisk *disk = bdev->bd_disk;
1209 struct ubd *ubd_dev = disk->private_data;
1210 int err = 0;
1211
1212 mutex_lock(&ubd_mutex);
1213 if(ubd_dev->count == 0){
1214 err = ubd_open_dev(ubd_dev);
1215 if(err){
1216 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1217 disk->disk_name, ubd_dev->file, -err);
1218 goto out;
1219 }
1220 }
1221 ubd_dev->count++;
1222 set_disk_ro(disk, !ubd_dev->openflags.w);
1223
1224
1225
1226
1227
1228
1229
1230out:
1231 mutex_unlock(&ubd_mutex);
1232 return err;
1233}
1234
1235static void ubd_release(struct gendisk *disk, fmode_t mode)
1236{
1237 struct ubd *ubd_dev = disk->private_data;
1238
1239 mutex_lock(&ubd_mutex);
1240 if(--ubd_dev->count == 0)
1241 ubd_close_dev(ubd_dev);
1242 mutex_unlock(&ubd_mutex);
1243}
1244
1245static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1246 __u64 *cow_offset, unsigned long *bitmap,
1247 __u64 bitmap_offset, unsigned long *bitmap_words,
1248 __u64 bitmap_len)
1249{
1250 __u64 sector = io_offset >> 9;
1251 int i, update_bitmap = 0;
1252
1253 for(i = 0; i < length >> 9; i++){
1254 if(cow_mask != NULL)
1255 ubd_set_bit(i, (unsigned char *) cow_mask);
1256 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1257 continue;
1258
1259 update_bitmap = 1;
1260 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1261 }
1262
1263 if(!update_bitmap)
1264 return;
1265
1266 *cow_offset = sector / (sizeof(unsigned long) * 8);
1267
1268
1269
1270
1271
1272
1273 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1274 sizeof(unsigned long)) - 1))
1275 (*cow_offset)--;
1276
1277 bitmap_words[0] = bitmap[*cow_offset];
1278 bitmap_words[1] = bitmap[*cow_offset + 1];
1279
1280 *cow_offset *= sizeof(unsigned long);
1281 *cow_offset += bitmap_offset;
1282}
1283
1284static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1285 __u64 bitmap_offset, __u64 bitmap_len)
1286{
1287 __u64 sector = req->offset >> 9;
1288 int i;
1289
1290 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1291 panic("Operation too long");
1292
1293 if(req->op == UBD_READ) {
1294 for(i = 0; i < req->length >> 9; i++){
1295 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1296 ubd_set_bit(i, (unsigned char *)
1297 &req->sector_mask);
1298 }
1299 }
1300 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1301 &req->cow_offset, bitmap, bitmap_offset,
1302 req->bitmap_words, bitmap_len);
1303}
1304
1305
1306static void prepare_request(struct request *req, struct io_thread_req *io_req,
1307 unsigned long long offset, int page_offset,
1308 int len, struct page *page)
1309{
1310 struct gendisk *disk = req->rq_disk;
1311 struct ubd *ubd_dev = disk->private_data;
1312
1313 io_req->req = req;
1314 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1315 ubd_dev->fd;
1316 io_req->fds[1] = ubd_dev->fd;
1317 io_req->cow_offset = -1;
1318 io_req->offset = offset;
1319 io_req->length = len;
1320 io_req->error = 0;
1321 io_req->sector_mask = 0;
1322
1323 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1324 io_req->offsets[0] = 0;
1325 io_req->offsets[1] = ubd_dev->cow.data_offset;
1326 io_req->buffer = page_address(page) + page_offset;
1327 io_req->sectorsize = 1 << 9;
1328
1329 if(ubd_dev->cow.file != NULL)
1330 cowify_req(io_req, ubd_dev->cow.bitmap,
1331 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1332
1333}
1334
1335
1336static void prepare_flush_request(struct request *req,
1337 struct io_thread_req *io_req)
1338{
1339 struct gendisk *disk = req->rq_disk;
1340 struct ubd *ubd_dev = disk->private_data;
1341
1342 io_req->req = req;
1343 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1344 ubd_dev->fd;
1345 io_req->op = UBD_FLUSH;
1346}
1347
1348static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1349{
1350 int n = os_write_file(thread_fd, &io_req,
1351 sizeof(io_req));
1352 if (n != sizeof(io_req)) {
1353 if (n != -EAGAIN)
1354 printk("write to io thread failed, "
1355 "errno = %d\n", -n);
1356 else if (list_empty(&dev->restart))
1357 list_add(&dev->restart, &restart);
1358
1359 kfree(io_req);
1360 return false;
1361 }
1362 return true;
1363}
1364
1365
1366static void do_ubd_request(struct request_queue *q)
1367{
1368 struct io_thread_req *io_req;
1369 struct request *req;
1370
1371 while(1){
1372 struct ubd *dev = q->queuedata;
1373 if(dev->request == NULL){
1374 struct request *req = blk_fetch_request(q);
1375 if(req == NULL)
1376 return;
1377
1378 dev->request = req;
1379 dev->rq_pos = blk_rq_pos(req);
1380 dev->start_sg = 0;
1381 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1382 }
1383
1384 req = dev->request;
1385
1386 if (req_op(req) == REQ_OP_FLUSH) {
1387 io_req = kmalloc(sizeof(struct io_thread_req),
1388 GFP_ATOMIC);
1389 if (io_req == NULL) {
1390 if (list_empty(&dev->restart))
1391 list_add(&dev->restart, &restart);
1392 return;
1393 }
1394 prepare_flush_request(req, io_req);
1395 if (submit_request(io_req, dev) == false)
1396 return;
1397 }
1398
1399 while(dev->start_sg < dev->end_sg){
1400 struct scatterlist *sg = &dev->sg[dev->start_sg];
1401
1402 io_req = kmalloc(sizeof(struct io_thread_req),
1403 GFP_ATOMIC);
1404 if(io_req == NULL){
1405 if(list_empty(&dev->restart))
1406 list_add(&dev->restart, &restart);
1407 return;
1408 }
1409 prepare_request(req, io_req,
1410 (unsigned long long)dev->rq_pos << 9,
1411 sg->offset, sg->length, sg_page(sg));
1412
1413 if (submit_request(io_req, dev) == false)
1414 return;
1415
1416 dev->rq_pos += sg->length >> 9;
1417 dev->start_sg++;
1418 }
1419 dev->end_sg = 0;
1420 dev->request = NULL;
1421 }
1422}
1423
1424static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1425{
1426 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1427
1428 geo->heads = 128;
1429 geo->sectors = 32;
1430 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1431 return 0;
1432}
1433
1434static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1435 unsigned int cmd, unsigned long arg)
1436{
1437 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1438 u16 ubd_id[ATA_ID_WORDS];
1439
1440 switch (cmd) {
1441 struct cdrom_volctrl volume;
1442 case HDIO_GET_IDENTITY:
1443 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1444 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1445 ubd_id[ATA_ID_HEADS] = 128;
1446 ubd_id[ATA_ID_SECTORS] = 32;
1447 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1448 sizeof(ubd_id)))
1449 return -EFAULT;
1450 return 0;
1451
1452 case CDROMVOLREAD:
1453 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1454 return -EFAULT;
1455 volume.channel0 = 255;
1456 volume.channel1 = 255;
1457 volume.channel2 = 255;
1458 volume.channel3 = 255;
1459 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1460 return -EFAULT;
1461 return 0;
1462 }
1463 return -EINVAL;
1464}
1465
1466static int update_bitmap(struct io_thread_req *req)
1467{
1468 int n;
1469
1470 if(req->cow_offset == -1)
1471 return 0;
1472
1473 n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1474 sizeof(req->bitmap_words), req->cow_offset);
1475 if(n != sizeof(req->bitmap_words)){
1476 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1477 req->fds[1]);
1478 return 1;
1479 }
1480
1481 return 0;
1482}
1483
1484static void do_io(struct io_thread_req *req)
1485{
1486 char *buf;
1487 unsigned long len;
1488 int n, nsectors, start, end, bit;
1489 __u64 off;
1490
1491 if (req->op == UBD_FLUSH) {
1492
1493 n = os_sync_file(req->fds[0]);
1494 if (n != 0) {
1495 printk("do_io - sync failed err = %d "
1496 "fd = %d\n", -n, req->fds[0]);
1497 req->error = 1;
1498 }
1499 return;
1500 }
1501
1502 nsectors = req->length / req->sectorsize;
1503 start = 0;
1504 do {
1505 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1506 end = start;
1507 while((end < nsectors) &&
1508 (ubd_test_bit(end, (unsigned char *)
1509 &req->sector_mask) == bit))
1510 end++;
1511
1512 off = req->offset + req->offsets[bit] +
1513 start * req->sectorsize;
1514 len = (end - start) * req->sectorsize;
1515 buf = &req->buffer[start * req->sectorsize];
1516
1517 if(req->op == UBD_READ){
1518 n = 0;
1519 do {
1520 buf = &buf[n];
1521 len -= n;
1522 n = os_pread_file(req->fds[bit], buf, len, off);
1523 if (n < 0) {
1524 printk("do_io - read failed, err = %d "
1525 "fd = %d\n", -n, req->fds[bit]);
1526 req->error = 1;
1527 return;
1528 }
1529 } while((n < len) && (n != 0));
1530 if (n < len) memset(&buf[n], 0, len - n);
1531 } else {
1532 n = os_pwrite_file(req->fds[bit], buf, len, off);
1533 if(n != len){
1534 printk("do_io - write failed err = %d "
1535 "fd = %d\n", -n, req->fds[bit]);
1536 req->error = 1;
1537 return;
1538 }
1539 }
1540
1541 start = end;
1542 } while(start < nsectors);
1543
1544 req->error = update_bitmap(req);
1545}
1546
1547
1548
1549
1550int kernel_fd = -1;
1551
1552
1553static int io_count = 0;
1554
1555int io_thread(void *arg)
1556{
1557 int n, count, written, res;
1558
1559 os_fix_helper_signals();
1560
1561 while(1){
1562 n = bulk_req_safe_read(
1563 kernel_fd,
1564 io_req_buffer,
1565 &io_remainder,
1566 &io_remainder_size,
1567 UBD_REQ_BUFFER_SIZE
1568 );
1569 if (n < 0) {
1570 if (n == -EAGAIN) {
1571 ubd_read_poll(-1);
1572 continue;
1573 } else {
1574 printk("io_thread - read failed, fd = %d, "
1575 "err = %d,"
1576 "reminder = %d\n",
1577 kernel_fd, -n, io_remainder_size);
1578 }
1579 }
1580
1581 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1582 io_count++;
1583 do_io((*io_req_buffer)[count]);
1584 }
1585
1586 written = 0;
1587
1588 do {
1589 res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
1590 if (res > 0) {
1591 written += res;
1592 } else {
1593 if (res != -EAGAIN) {
1594 printk("io_thread - read failed, fd = %d, "
1595 "err = %d\n", kernel_fd, -n);
1596 }
1597 }
1598 if (written < n) {
1599 ubd_write_poll(-1);
1600 }
1601 } while (written < n);
1602 }
1603
1604 return 0;
1605}
1606