linux/drivers/lguest/x86/switcher_32.S
<<
>>
Prefs
   1/*P:900
   2 * This is the Switcher: code which sits at 0xFFC00000 (or 0xFFE00000) astride
   3 * both the Host and Guest to do the low-level Guest<->Host switch.  It is as
   4 * simple as it can be made, but it's naturally very specific to x86.
   5 *
   6 * You have now completed Preparation.  If this has whet your appetite; if you
   7 * are feeling invigorated and refreshed then the next, more challenging stage
   8 * can be found in "make Guest".
   9 :*/
  10
  11/*M:012
  12 * Lguest is meant to be simple: my rule of thumb is that 1% more LOC must
  13 * gain at least 1% more performance.  Since neither LOC nor performance can be
  14 * measured beforehand, it generally means implementing a feature then deciding
  15 * if it's worth it.  And once it's implemented, who can say no?
  16 *
  17 * This is why I haven't implemented this idea myself.  I want to, but I
  18 * haven't.  You could, though.
  19 *
  20 * The main place where lguest performance sucks is Guest page faulting.  When
  21 * a Guest userspace process hits an unmapped page we switch back to the Host,
  22 * walk the page tables, find it's not mapped, switch back to the Guest page
  23 * fault handler, which calls a hypercall to set the page table entry, then
  24 * finally returns to userspace.  That's two round-trips.
  25 *
  26 * If we had a small walker in the Switcher, we could quickly check the Guest
  27 * page table and if the page isn't mapped, immediately reflect the fault back
  28 * into the Guest.  This means the Switcher would have to know the top of the
  29 * Guest page table and the page fault handler address.
  30 *
  31 * For simplicity, the Guest should only handle the case where the privilege
  32 * level of the fault is 3 and probably only not present or write faults.  It
  33 * should also detect recursive faults, and hand the original fault to the
  34 * Host (which is actually really easy).
  35 *
  36 * Two questions remain.  Would the performance gain outweigh the complexity?
  37 * And who would write the verse documenting it?
  38:*/
  39
  40/*M:011
  41 * Lguest64 handles NMI.  This gave me NMI envy (until I looked at their
  42 * code).  It's worth doing though, since it would let us use oprofile in the
  43 * Host when a Guest is running.
  44:*/
  45
  46/*S:100
  47 * Welcome to the Switcher itself!
  48 *
  49 * This file contains the low-level code which changes the CPU to run the Guest
  50 * code, and returns to the Host when something happens.  Understand this, and
  51 * you understand the heart of our journey.
  52 *
  53 * Because this is in assembler rather than C, our tale switches from prose to
  54 * verse.  First I tried limericks:
  55 *
  56 *      There once was an eax reg,
  57 *      To which our pointer was fed,
  58 *      It needed an add,
  59 *      Which asm-offsets.h had
  60 *      But this limerick is hurting my head.
  61 *
  62 * Next I tried haikus, but fitting the required reference to the seasons in
  63 * every stanza was quickly becoming tiresome:
  64 *
  65 *      The %eax reg
  66 *      Holds "struct lguest_pages" now:
  67 *      Cherry blossoms fall.
  68 *
  69 * Then I started with Heroic Verse, but the rhyming requirement leeched away
  70 * the content density and led to some uniquely awful oblique rhymes:
  71 *
  72 *      These constants are coming from struct offsets
  73 *      For use within the asm switcher text.
  74 *
  75 * Finally, I settled for something between heroic hexameter, and normal prose
  76 * with inappropriate linebreaks.  Anyway, it aint no Shakespeare.
  77 */
  78
  79// Not all kernel headers work from assembler
  80// But these ones are needed: the ENTRY() define
  81// And constants extracted from struct offsets
  82// To avoid magic numbers and breakage:
  83// Should they change the compiler can't save us
  84// Down here in the depths of assembler code.
  85#include <linux/linkage.h>
  86#include <asm/asm-offsets.h>
  87#include <asm/page.h>
  88#include <asm/segment.h>
  89#include <asm/lguest.h>
  90
  91// We mark the start of the code to copy
  92// It's placed in .text tho it's never run here
  93// You'll see the trick macro at the end
  94// Which interleaves data and text to effect.
  95.text
  96ENTRY(start_switcher_text)
  97
  98// When we reach switch_to_guest we have just left
  99// The safe and comforting shores of C code
 100// %eax has the "struct lguest_pages" to use
 101// Where we save state and still see it from the Guest
 102// And %ebx holds the Guest shadow pagetable:
 103// Once set we have truly left Host behind.
 104ENTRY(switch_to_guest)
 105        // We told gcc all its regs could fade,
 106        // Clobbered by our journey into the Guest
 107        // We could have saved them, if we tried
 108        // But time is our master and cycles count.
 109
 110        // Segment registers must be saved for the Host
 111        // We push them on the Host stack for later
 112        pushl   %es
 113        pushl   %ds
 114        pushl   %gs
 115        pushl   %fs
 116        // But the compiler is fickle, and heeds
 117        // No warning of %ebp clobbers
 118        // When frame pointers are used.  That register
 119        // Must be saved and restored or chaos strikes.
 120        pushl   %ebp
 121        // The Host's stack is done, now save it away
 122        // In our "struct lguest_pages" at offset
 123        // Distilled into asm-offsets.h
 124        movl    %esp, LGUEST_PAGES_host_sp(%eax)
 125
 126        // All saved and there's now five steps before us:
 127        // Stack, GDT, IDT, TSS
 128        // Then last of all the page tables are flipped.
 129
 130        // Yet beware that our stack pointer must be
 131        // Always valid lest an NMI hits
 132        // %edx does the duty here as we juggle
 133        // %eax is lguest_pages: our stack lies within.
 134        movl    %eax, %edx
 135        addl    $LGUEST_PAGES_regs, %edx
 136        movl    %edx, %esp
 137
 138        // The Guest's GDT we so carefully
 139        // Placed in the "struct lguest_pages" before
 140        lgdt    LGUEST_PAGES_guest_gdt_desc(%eax)
 141
 142        // The Guest's IDT we did partially
 143        // Copy to "struct lguest_pages" as well.
 144        lidt    LGUEST_PAGES_guest_idt_desc(%eax)
 145
 146        // The TSS entry which controls traps
 147        // Must be loaded up with "ltr" now:
 148        // The GDT entry that TSS uses 
 149        // Changes type when we load it: damn Intel!
 150        // For after we switch over our page tables
 151        // That entry will be read-only: we'd crash.
 152        movl    $(GDT_ENTRY_TSS*8), %edx
 153        ltr     %dx
 154
 155        // Look back now, before we take this last step!
 156        // The Host's TSS entry was also marked used;
 157        // Let's clear it again for our return.
 158        // The GDT descriptor of the Host
 159        // Points to the table after two "size" bytes
 160        movl    (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx
 161        // Clear "used" from type field (byte 5, bit 2)
 162        andb    $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx)
 163
 164        // Once our page table's switched, the Guest is live!
 165        // The Host fades as we run this final step.
 166        // Our "struct lguest_pages" is now read-only.
 167        movl    %ebx, %cr3
 168
 169        // The page table change did one tricky thing:
 170        // The Guest's register page has been mapped
 171        // Writable under our %esp (stack) --
 172        // We can simply pop off all Guest regs.
 173        popl    %eax
 174        popl    %ebx
 175        popl    %ecx
 176        popl    %edx
 177        popl    %esi
 178        popl    %edi
 179        popl    %ebp
 180        popl    %gs
 181        popl    %fs
 182        popl    %ds
 183        popl    %es
 184
 185        // Near the base of the stack lurk two strange fields
 186        // Which we fill as we exit the Guest
 187        // These are the trap number and its error
 188        // We can simply step past them on our way.
 189        addl    $8, %esp
 190
 191        // The last five stack slots hold return address
 192        // And everything needed to switch privilege
 193        // From Switcher's level 0 to Guest's 1,
 194        // And the stack where the Guest had last left it.
 195        // Interrupts are turned back on: we are Guest.
 196        iret
 197
 198// We tread two paths to switch back to the Host
 199// Yet both must save Guest state and restore Host
 200// So we put the routine in a macro.
 201#define SWITCH_TO_HOST                                                  \
 202        /* We save the Guest state: all registers first                 \
 203         * Laid out just as "struct lguest_regs" defines */             \
 204        pushl   %es;                                                    \
 205        pushl   %ds;                                                    \
 206        pushl   %fs;                                                    \
 207        pushl   %gs;                                                    \
 208        pushl   %ebp;                                                   \
 209        pushl   %edi;                                                   \
 210        pushl   %esi;                                                   \
 211        pushl   %edx;                                                   \
 212        pushl   %ecx;                                                   \
 213        pushl   %ebx;                                                   \
 214        pushl   %eax;                                                   \
 215        /* Our stack and our code are using segments                    \
 216         * Set in the TSS and IDT                                       \
 217         * Yet if we were to touch data we'd use                        \
 218         * Whatever data segment the Guest had.                         \
 219         * Load the lguest ds segment for now. */                       \
 220        movl    $(LGUEST_DS), %eax;                                     \
 221        movl    %eax, %ds;                                              \
 222        /* So where are we?  Which CPU, which struct?                   \
 223         * The stack is our clue: our TSS starts                        \
 224         * It at the end of "struct lguest_pages".                      \
 225         * Or we may have stumbled while restoring                      \
 226         * Our Guest segment regs while in switch_to_guest,             \
 227         * The fault pushed atop that part-unwound stack.               \
 228         * If we round the stack down to the page start                 \
 229         * We're at the start of "struct lguest_pages". */              \
 230        movl    %esp, %eax;                                             \
 231        andl    $(~(1 << PAGE_SHIFT - 1)), %eax;                        \
 232        /* Save our trap number: the switch will obscure it             \
 233         * (In the Host the Guest regs are not mapped here)             \
 234         * %ebx holds it safe for deliver_to_host */                    \
 235        movl    LGUEST_PAGES_regs_trapnum(%eax), %ebx;                  \
 236        /* The Host GDT, IDT and stack!                                 \
 237         * All these lie safely hidden from the Guest:                  \
 238         * We must return to the Host page tables                       \
 239         * (Hence that was saved in struct lguest_pages) */             \
 240        movl    LGUEST_PAGES_host_cr3(%eax), %edx;                      \
 241        movl    %edx, %cr3;                                             \
 242        /* As before, when we looked back at the Host                   \
 243         * As we left and marked TSS unused                             \
 244         * So must we now for the Guest left behind. */                 \
 245        andb    $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \
 246        /* Switch to Host's GDT, IDT. */                                \
 247        lgdt    LGUEST_PAGES_host_gdt_desc(%eax);                       \
 248        lidt    LGUEST_PAGES_host_idt_desc(%eax);                       \
 249        /* Restore the Host's stack where its saved regs lie */         \
 250        movl    LGUEST_PAGES_host_sp(%eax), %esp;                       \
 251        /* Last the TSS: our Host is returned */                        \
 252        movl    $(GDT_ENTRY_TSS*8), %edx;                               \
 253        ltr     %dx;                                                    \
 254        /* Restore now the regs saved right at the first. */            \
 255        popl    %ebp;                                                   \
 256        popl    %fs;                                                    \
 257        popl    %gs;                                                    \
 258        popl    %ds;                                                    \
 259        popl    %es
 260
 261// The first path is trod when the Guest has trapped:
 262// (Which trap it was has been pushed on the stack).
 263// We need only switch back, and the Host will decode
 264// Why we came home, and what needs to be done.
 265return_to_host:
 266        SWITCH_TO_HOST
 267        iret
 268
 269// We are lead to the second path like so:
 270// An interrupt, with some cause external
 271// Has ajerked us rudely from the Guest's code
 272// Again we must return home to the Host
 273deliver_to_host:
 274        SWITCH_TO_HOST
 275        // But now we must go home via that place
 276        // Where that interrupt was supposed to go
 277        // Had we not been ensconced, running the Guest.
 278        // Here we see the trickness of run_guest_once():
 279        // The Host stack is formed like an interrupt
 280        // With EIP, CS and EFLAGS layered.
 281        // Interrupt handlers end with "iret"
 282        // And that will take us home at long long last.
 283
 284        // But first we must find the handler to call!
 285        // The IDT descriptor for the Host
 286        // Has two bytes for size, and four for address:
 287        // %edx will hold it for us for now.
 288        movl    (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx
 289        // We now know the table address we need,
 290        // And saved the trap's number inside %ebx.
 291        // Yet the pointer to the handler is smeared
 292        // Across the bits of the table entry.
 293        // What oracle can tell us how to extract
 294        // From such a convoluted encoding?
 295        // I consulted gcc, and it gave
 296        // These instructions, which I gladly credit:
 297        leal    (%edx,%ebx,8), %eax
 298        movzwl  (%eax),%edx
 299        movl    4(%eax), %eax
 300        xorw    %ax, %ax
 301        orl     %eax, %edx
 302        // Now the address of the handler's in %edx
 303        // We call it now: its "iret" drops us home.
 304        jmp     *%edx
 305
 306// Every interrupt can come to us here
 307// But we must truly tell each apart.
 308// They number two hundred and fifty six
 309// And each must land in a different spot,
 310// Push its number on stack, and join the stream.
 311
 312// And worse, a mere six of the traps stand apart
 313// And push on their stack an addition:
 314// An error number, thirty two bits long
 315// So we punish the other two fifty
 316// And make them push a zero so they match.
 317
 318// Yet two fifty six entries is long
 319// And all will look most the same as the last
 320// So we create a macro which can make
 321// As many entries as we need to fill.
 322
 323// Note the change to .data then .text:
 324// We plant the address of each entry
 325// Into a (data) table for the Host
 326// To know where each Guest interrupt should go.
 327.macro IRQ_STUB N TARGET
 328        .data; .long 1f; .text; 1:
 329 // Trap eight, ten through fourteen and seventeen
 330 // Supply an error number.  Else zero.
 331 .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17)
 332        pushl   $0
 333 .endif
 334        pushl   $\N
 335        jmp     \TARGET
 336        ALIGN
 337.endm
 338
 339// This macro creates numerous entries
 340// Using GAS macros which out-power C's.
 341.macro IRQ_STUBS FIRST LAST TARGET
 342 irq=\FIRST
 343 .rept \LAST-\FIRST+1
 344        IRQ_STUB irq \TARGET
 345  irq=irq+1
 346 .endr
 347.endm
 348
 349// Here's the marker for our pointer table
 350// Laid in the data section just before
 351// Each macro places the address of code
 352// Forming an array: each one points to text
 353// Which handles interrupt in its turn.
 354.data
 355.global default_idt_entries
 356default_idt_entries:
 357.text
 358        // The first two traps go straight back to the Host
 359        IRQ_STUBS 0 1 return_to_host
 360        // We'll say nothing, yet, about NMI
 361        IRQ_STUB 2 handle_nmi
 362        // Other traps also return to the Host
 363        IRQ_STUBS 3 31 return_to_host
 364        // All interrupts go via their handlers
 365        IRQ_STUBS 32 127 deliver_to_host
 366        // 'Cept system calls coming from userspace
 367        // Are to go to the Guest, never the Host.
 368        IRQ_STUB 128 return_to_host
 369        IRQ_STUBS 129 255 deliver_to_host
 370
 371// The NMI, what a fabulous beast
 372// Which swoops in and stops us no matter that
 373// We're suspended between heaven and hell,
 374// (Or more likely between the Host and Guest)
 375// When in it comes!  We are dazed and confused
 376// So we do the simplest thing which one can.
 377// Though we've pushed the trap number and zero
 378// We discard them, return, and hope we live.
 379handle_nmi:
 380        addl    $8, %esp
 381        iret
 382
 383// We are done; all that's left is Mastery
 384// And "make Mastery" is a journey long
 385// Designed to make your fingers itch to code.
 386
 387// Here ends the text, the file and poem.
 388ENTRY(end_switcher_text)
 389