linux/drivers/lguest/core.c
<<
>>
Prefs
   1/*P:400
   2 * This contains run_guest() which actually calls into the Host<->Guest
   3 * Switcher and analyzes the return, such as determining if the Guest wants the
   4 * Host to do something.  This file also contains useful helper routines.
   5:*/
   6#include <linux/module.h>
   7#include <linux/stringify.h>
   8#include <linux/stddef.h>
   9#include <linux/io.h>
  10#include <linux/mm.h>
  11#include <linux/vmalloc.h>
  12#include <linux/cpu.h>
  13#include <linux/freezer.h>
  14#include <linux/highmem.h>
  15#include <linux/slab.h>
  16#include <asm/paravirt.h>
  17#include <asm/pgtable.h>
  18#include <asm/uaccess.h>
  19#include <asm/poll.h>
  20#include <asm/asm-offsets.h>
  21#include "lg.h"
  22
  23unsigned long switcher_addr;
  24struct page **lg_switcher_pages;
  25static struct vm_struct *switcher_text_vma;
  26static struct vm_struct *switcher_stacks_vma;
  27
  28/* This One Big lock protects all inter-guest data structures. */
  29DEFINE_MUTEX(lguest_lock);
  30
  31/*H:010
  32 * We need to set up the Switcher at a high virtual address.  Remember the
  33 * Switcher is a few hundred bytes of assembler code which actually changes the
  34 * CPU to run the Guest, and then changes back to the Host when a trap or
  35 * interrupt happens.
  36 *
  37 * The Switcher code must be at the same virtual address in the Guest as the
  38 * Host since it will be running as the switchover occurs.
  39 *
  40 * Trying to map memory at a particular address is an unusual thing to do, so
  41 * it's not a simple one-liner.
  42 */
  43static __init int map_switcher(void)
  44{
  45        int i, err;
  46
  47        /*
  48         * Map the Switcher in to high memory.
  49         *
  50         * It turns out that if we choose the address 0xFFC00000 (4MB under the
  51         * top virtual address), it makes setting up the page tables really
  52         * easy.
  53         */
  54
  55        /* We assume Switcher text fits into a single page. */
  56        if (end_switcher_text - start_switcher_text > PAGE_SIZE) {
  57                printk(KERN_ERR "lguest: switcher text too large (%zu)\n",
  58                       end_switcher_text - start_switcher_text);
  59                return -EINVAL;
  60        }
  61
  62        /*
  63         * We allocate an array of struct page pointers.  map_vm_area() wants
  64         * this, rather than just an array of pages.
  65         */
  66        lg_switcher_pages = kmalloc(sizeof(lg_switcher_pages[0])
  67                                    * TOTAL_SWITCHER_PAGES,
  68                                    GFP_KERNEL);
  69        if (!lg_switcher_pages) {
  70                err = -ENOMEM;
  71                goto out;
  72        }
  73
  74        /*
  75         * Now we actually allocate the pages.  The Guest will see these pages,
  76         * so we make sure they're zeroed.
  77         */
  78        for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
  79                lg_switcher_pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
  80                if (!lg_switcher_pages[i]) {
  81                        err = -ENOMEM;
  82                        goto free_some_pages;
  83                }
  84        }
  85
  86        /*
  87         * Copy in the compiled-in Switcher code (from x86/switcher_32.S).
  88         * It goes in the first page, which we map in momentarily.
  89         */
  90        memcpy(kmap(lg_switcher_pages[0]), start_switcher_text,
  91               end_switcher_text - start_switcher_text);
  92        kunmap(lg_switcher_pages[0]);
  93
  94        /*
  95         * We place the Switcher underneath the fixmap area, which is the
  96         * highest virtual address we can get.  This is important, since we
  97         * tell the Guest it can't access this memory, so we want its ceiling
  98         * as high as possible.
  99         */
 100        switcher_addr = FIXADDR_START - TOTAL_SWITCHER_PAGES*PAGE_SIZE;
 101
 102        /*
 103         * Now we reserve the "virtual memory area"s we want.  We might
 104         * not get them in theory, but in practice it's worked so far.
 105         *
 106         * We want the switcher text to be read-only and executable, and
 107         * the stacks to be read-write and non-executable.
 108         */
 109        switcher_text_vma = __get_vm_area(PAGE_SIZE, VM_ALLOC|VM_NO_GUARD,
 110                                          switcher_addr,
 111                                          switcher_addr + PAGE_SIZE);
 112
 113        if (!switcher_text_vma) {
 114                err = -ENOMEM;
 115                printk("lguest: could not map switcher pages high\n");
 116                goto free_pages;
 117        }
 118
 119        switcher_stacks_vma = __get_vm_area(SWITCHER_STACK_PAGES * PAGE_SIZE,
 120                                            VM_ALLOC|VM_NO_GUARD,
 121                                            switcher_addr + PAGE_SIZE,
 122                                            switcher_addr + TOTAL_SWITCHER_PAGES * PAGE_SIZE);
 123        if (!switcher_stacks_vma) {
 124                err = -ENOMEM;
 125                printk("lguest: could not map switcher pages high\n");
 126                goto free_text_vma;
 127        }
 128
 129        /*
 130         * This code actually sets up the pages we've allocated to appear at
 131         * switcher_addr.  map_vm_area() takes the vma we allocated above, the
 132         * kind of pages we're mapping (kernel text pages and kernel writable
 133         * pages respectively), and a pointer to our array of struct pages.
 134         */
 135        err = map_vm_area(switcher_text_vma, PAGE_KERNEL_RX, lg_switcher_pages);
 136        if (err) {
 137                printk("lguest: text map_vm_area failed: %i\n", err);
 138                goto free_vmas;
 139        }
 140
 141        err = map_vm_area(switcher_stacks_vma, PAGE_KERNEL,
 142                          lg_switcher_pages + SWITCHER_TEXT_PAGES);
 143        if (err) {
 144                printk("lguest: stacks map_vm_area failed: %i\n", err);
 145                goto free_vmas;
 146        }
 147
 148        /*
 149         * Now the Switcher is mapped at the right address, we can't fail!
 150         */
 151        printk(KERN_INFO "lguest: mapped switcher at %p\n",
 152               switcher_text_vma->addr);
 153        /* And we succeeded... */
 154        return 0;
 155
 156free_vmas:
 157        /* Undoes map_vm_area and __get_vm_area */
 158        vunmap(switcher_stacks_vma->addr);
 159free_text_vma:
 160        vunmap(switcher_text_vma->addr);
 161free_pages:
 162        i = TOTAL_SWITCHER_PAGES;
 163free_some_pages:
 164        for (--i; i >= 0; i--)
 165                __free_pages(lg_switcher_pages[i], 0);
 166        kfree(lg_switcher_pages);
 167out:
 168        return err;
 169}
 170/*:*/
 171
 172/* Cleaning up the mapping when the module is unloaded is almost... too easy. */
 173static void unmap_switcher(void)
 174{
 175        unsigned int i;
 176
 177        /* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */
 178        vunmap(switcher_text_vma->addr);
 179        vunmap(switcher_stacks_vma->addr);
 180        /* Now we just need to free the pages we copied the switcher into */
 181        for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
 182                __free_pages(lg_switcher_pages[i], 0);
 183        kfree(lg_switcher_pages);
 184}
 185
 186/*H:032
 187 * Dealing With Guest Memory.
 188 *
 189 * Before we go too much further into the Host, we need to grok the routines
 190 * we use to deal with Guest memory.
 191 *
 192 * When the Guest gives us (what it thinks is) a physical address, we can use
 193 * the normal copy_from_user() & copy_to_user() on the corresponding place in
 194 * the memory region allocated by the Launcher.
 195 *
 196 * But we can't trust the Guest: it might be trying to access the Launcher
 197 * code.  We have to check that the range is below the pfn_limit the Launcher
 198 * gave us.  We have to make sure that addr + len doesn't give us a false
 199 * positive by overflowing, too.
 200 */
 201bool lguest_address_ok(const struct lguest *lg,
 202                       unsigned long addr, unsigned long len)
 203{
 204        return addr+len <= lg->pfn_limit * PAGE_SIZE && (addr+len >= addr);
 205}
 206
 207/*
 208 * This routine copies memory from the Guest.  Here we can see how useful the
 209 * kill_lguest() routine we met in the Launcher can be: we return a random
 210 * value (all zeroes) instead of needing to return an error.
 211 */
 212void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
 213{
 214        if (!lguest_address_ok(cpu->lg, addr, bytes)
 215            || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {
 216                /* copy_from_user should do this, but as we rely on it... */
 217                memset(b, 0, bytes);
 218                kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);
 219        }
 220}
 221
 222/* This is the write (copy into Guest) version. */
 223void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
 224               unsigned bytes)
 225{
 226        if (!lguest_address_ok(cpu->lg, addr, bytes)
 227            || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)
 228                kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);
 229}
 230/*:*/
 231
 232/*H:030
 233 * Let's jump straight to the the main loop which runs the Guest.
 234 * Remember, this is called by the Launcher reading /dev/lguest, and we keep
 235 * going around and around until something interesting happens.
 236 */
 237int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 238{
 239        /* If the launcher asked for a register with LHREQ_GETREG */
 240        if (cpu->reg_read) {
 241                if (put_user(*cpu->reg_read, user))
 242                        return -EFAULT;
 243                cpu->reg_read = NULL;
 244                return sizeof(*cpu->reg_read);
 245        }
 246
 247        /* We stop running once the Guest is dead. */
 248        while (!cpu->lg->dead) {
 249                unsigned int irq;
 250                bool more;
 251
 252                /* First we run any hypercalls the Guest wants done. */
 253                if (cpu->hcall)
 254                        do_hypercalls(cpu);
 255
 256                /* Do we have to tell the Launcher about a trap? */
 257                if (cpu->pending.trap) {
 258                        if (copy_to_user(user, &cpu->pending,
 259                                         sizeof(cpu->pending)))
 260                                return -EFAULT;
 261                        return sizeof(cpu->pending);
 262                }
 263
 264                /*
 265                 * All long-lived kernel loops need to check with this horrible
 266                 * thing called the freezer.  If the Host is trying to suspend,
 267                 * it stops us.
 268                 */
 269                try_to_freeze();
 270
 271                /* Check for signals */
 272                if (signal_pending(current))
 273                        return -ERESTARTSYS;
 274
 275                /*
 276                 * Check if there are any interrupts which can be delivered now:
 277                 * if so, this sets up the hander to be executed when we next
 278                 * run the Guest.
 279                 */
 280                irq = interrupt_pending(cpu, &more);
 281                if (irq < LGUEST_IRQS)
 282                        try_deliver_interrupt(cpu, irq, more);
 283
 284                /*
 285                 * Just make absolutely sure the Guest is still alive.  One of
 286                 * those hypercalls could have been fatal, for example.
 287                 */
 288                if (cpu->lg->dead)
 289                        break;
 290
 291                /*
 292                 * If the Guest asked to be stopped, we sleep.  The Guest's
 293                 * clock timer will wake us.
 294                 */
 295                if (cpu->halted) {
 296                        set_current_state(TASK_INTERRUPTIBLE);
 297                        /*
 298                         * Just before we sleep, make sure no interrupt snuck in
 299                         * which we should be doing.
 300                         */
 301                        if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
 302                                set_current_state(TASK_RUNNING);
 303                        else
 304                                schedule();
 305                        continue;
 306                }
 307
 308                /*
 309                 * OK, now we're ready to jump into the Guest.  First we put up
 310                 * the "Do Not Disturb" sign:
 311                 */
 312                local_irq_disable();
 313
 314                /* Actually run the Guest until something happens. */
 315                lguest_arch_run_guest(cpu);
 316
 317                /* Now we're ready to be interrupted or moved to other CPUs */
 318                local_irq_enable();
 319
 320                /* Now we deal with whatever happened to the Guest. */
 321                lguest_arch_handle_trap(cpu);
 322        }
 323
 324        /* Special case: Guest is 'dead' but wants a reboot. */
 325        if (cpu->lg->dead == ERR_PTR(-ERESTART))
 326                return -ERESTART;
 327
 328        /* The Guest is dead => "No such file or directory" */
 329        return -ENOENT;
 330}
 331
 332/*H:000
 333 * Welcome to the Host!
 334 *
 335 * By this point your brain has been tickled by the Guest code and numbed by
 336 * the Launcher code; prepare for it to be stretched by the Host code.  This is
 337 * the heart.  Let's begin at the initialization routine for the Host's lg
 338 * module.
 339 */
 340static int __init init(void)
 341{
 342        int err;
 343
 344        /* Lguest can't run under Xen, VMI or itself.  It does Tricky Stuff. */
 345        if (get_kernel_rpl() != 0) {
 346                printk("lguest is afraid of being a guest\n");
 347                return -EPERM;
 348        }
 349
 350        /* First we put the Switcher up in very high virtual memory. */
 351        err = map_switcher();
 352        if (err)
 353                goto out;
 354
 355        /* We might need to reserve an interrupt vector. */
 356        err = init_interrupts();
 357        if (err)
 358                goto unmap;
 359
 360        /* /dev/lguest needs to be registered. */
 361        err = lguest_device_init();
 362        if (err)
 363                goto free_interrupts;
 364
 365        /* Finally we do some architecture-specific setup. */
 366        lguest_arch_host_init();
 367
 368        /* All good! */
 369        return 0;
 370
 371free_interrupts:
 372        free_interrupts();
 373unmap:
 374        unmap_switcher();
 375out:
 376        return err;
 377}
 378
 379/* Cleaning up is just the same code, backwards.  With a little French. */
 380static void __exit fini(void)
 381{
 382        lguest_device_remove();
 383        free_interrupts();
 384        unmap_switcher();
 385
 386        lguest_arch_host_fini();
 387}
 388/*:*/
 389
 390/*
 391 * The Host side of lguest can be a module.  This is a nice way for people to
 392 * play with it.
 393 */
 394module_init(init);
 395module_exit(fini);
 396MODULE_LICENSE("GPL");
 397MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
 398