Signed-off-by: Rusty Russell --- drivers/lguest/Makefile | 6 drivers/lguest/i386_switcher.S | 350 ++++++++++++++++++++++++++++++++++++++++ drivers/lguest/switcher.S | 350 ---------------------------------------- 3 files changed, 354 insertions(+), 352 deletions(-) =================================================================== --- a/drivers/lguest/Makefile +++ b/drivers/lguest/Makefile @@ -1,8 +1,10 @@ # Host requires the other files, which can be a module. obj-$(CONFIG_LGUEST) += lg.o -lg-y := core.o hypercalls.o page_tables.o interrupts_and_traps.o \ - segments.o io.o lguest_user.o switcher.o +lg-y = core.o hypercalls.o page_tables.o interrupts_and_traps.o \ + segments.o io.o lguest_user.o + +lg-$(CONFIG_X86_32) += i386_switcher.o Preparation Preparation!: PREFIX=P Guest: PREFIX=G =================================================================== --- /dev/null +++ b/drivers/lguest/i386_switcher.S @@ -0,0 +1,350 @@ +/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level + * Guest<->Host switch. It is as simple as it can be made, but it's naturally + * very specific to x86. + * + * You have now completed Preparation. If this has whet your appetite; if you + * are feeling invigorated and refreshed then the next, more challenging stage + * can be found in "make Guest". :*/ + +/*S:100 + * Welcome to the Switcher itself! + * + * This file contains the low-level code which changes the CPU to run the Guest + * code, and returns to the Host when something happens. Understand this, and + * you understand the heart of our journey. + * + * Because this is in assembler rather than C, our tale switches from prose to + * verse. First I tried limericks: + * + * There once was an eax reg, + * To which our pointer was fed, + * It needed an add, + * Which asm-offsets.h had + * But this limerick is hurting my head. + * + * Next I tried haikus, but fitting the required reference to the seasons in + * every stanza was quickly becoming tiresome: + * + * The %eax reg + * Holds "struct lguest_pages" now: + * Cherry blossoms fall. + * + * Then I started with Heroic Verse, but the rhyming requirement leeched away + * the content density and led to some uniquely awful oblique rhymes: + * + * These constants are coming from struct offsets + * For use within the asm switcher text. + * + * Finally, I settled for something between heroic hexameter, and normal prose + * with inappropriate linebreaks. Anyway, it aint no Shakespeare. + */ + +// Not all kernel headers work from assembler +// But these ones are needed: the ENTRY() define +// And constants extracted from struct offsets +// To avoid magic numbers and breakage: +// Should they change the compiler can't save us +// Down here in the depths of assembler code. +#include +#include +#include +#include "lg.h" + +// We mark the start of the code to copy +// It's placed in .text tho it's never run here +// You'll see the trick macro at the end +// Which interleaves data and text to effect. +.text +ENTRY(start_switcher_text) + +// When we reach switch_to_guest we have just left +// The safe and comforting shores of C code +// %eax has the "struct lguest_pages" to use +// Where we save state and still see it from the Guest +// And %ebx holds the Guest shadow pagetable: +// Once set we have truly left Host behind. +ENTRY(switch_to_guest) + // We told gcc all its regs could fade, + // Clobbered by our journey into the Guest + // We could have saved them, if we tried + // But time is our master and cycles count. + + // Segment registers must be saved for the Host + // We push them on the Host stack for later + pushl %es + pushl %ds + pushl %gs + pushl %fs + // But the compiler is fickle, and heeds + // No warning of %ebp clobbers + // When frame pointers are used. That register + // Must be saved and restored or chaos strikes. + pushl %ebp + // The Host's stack is done, now save it away + // In our "struct lguest_pages" at offset + // Distilled into asm-offsets.h + movl %esp, LGUEST_PAGES_host_sp(%eax) + + // All saved and there's now five steps before us: + // Stack, GDT, IDT, TSS + // And last of all the page tables are flipped. + + // Yet beware that our stack pointer must be + // Always valid lest an NMI hits + // %edx does the duty here as we juggle + // %eax is lguest_pages: our stack lies within. + movl %eax, %edx + addl $LGUEST_PAGES_regs, %edx + movl %edx, %esp + + // The Guest's GDT we so carefully + // Placed in the "struct lguest_pages" before + lgdt LGUEST_PAGES_guest_gdt_desc(%eax) + + // The Guest's IDT we did partially + // Move to the "struct lguest_pages" as well. + lidt LGUEST_PAGES_guest_idt_desc(%eax) + + // The TSS entry which controls traps + // Must be loaded up with "ltr" now: + // For after we switch over our page tables + // It (as the rest) will be writable no more. + // (The GDT entry TSS needs + // Changes type when we load it: damn Intel!) + movl $(GDT_ENTRY_TSS*8), %edx + ltr %dx + + // Look back now, before we take this last step! + // The Host's TSS entry was also marked used; + // Let's clear it again, ere we return. + // The GDT descriptor of the Host + // Points to the table after two "size" bytes + movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx + // Clear the type field of "used" (byte 5, bit 2) + andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) + + // Once our page table's switched, the Guest is live! + // The Host fades as we run this final step. + // Our "struct lguest_pages" is now read-only. + movl %ebx, %cr3 + + // The page table change did one tricky thing: + // The Guest's register page has been mapped + // Writable onto our %esp (stack) -- + // We can simply pop off all Guest regs. + popl %ebx + popl %ecx + popl %edx + popl %esi + popl %edi + popl %ebp + popl %gs + popl %eax + popl %fs + popl %ds + popl %es + + // Near the base of the stack lurk two strange fields + // Which we fill as we exit the Guest + // These are the trap number and its error + // We can simply step past them on our way. + addl $8, %esp + + // The last five stack slots hold return address + // And everything needed to change privilege + // Into the Guest privilege level of 1, + // And the stack where the Guest had last left it. + // Interrupts are turned back on: we are Guest. + iret + +// There are two paths where we switch to the Host +// So we put the routine in a macro. +// We are on our way home, back to the Host +// Interrupted out of the Guest, we come here. +#define SWITCH_TO_HOST \ + /* We save the Guest state: all registers first \ + * Laid out just as "struct lguest_regs" defines */ \ + pushl %es; \ + pushl %ds; \ + pushl %fs; \ + pushl %eax; \ + pushl %gs; \ + pushl %ebp; \ + pushl %edi; \ + pushl %esi; \ + pushl %edx; \ + pushl %ecx; \ + pushl %ebx; \ + /* Our stack and our code are using segments \ + * Set in the TSS and IDT \ + * Yet if we were to touch data we'd use \ + * Whatever data segment the Guest had. \ + * Load the lguest ds segment for now. */ \ + movl $(LGUEST_DS), %eax; \ + movl %eax, %ds; \ + /* So where are we? Which CPU, which struct? \ + * The stack is our clue: our TSS starts \ + * It at the end of "struct lguest_pages". \ + * Or we may have stumbled while restoring \ + * Our Guest segment regs while in switch_to_guest, \ + * The fault pushed atop that part-unwound stack. \ + * If we round the stack down to the page start \ + * We're at the start of "struct lguest_pages". */ \ + movl %esp, %eax; \ + andl $(~(1 << PAGE_SHIFT - 1)), %eax; \ + /* Save our trap number: the switch will obscure it \ + * (The Guest regs are not mapped here in the Host) \ + * %ebx holds it safe for deliver_to_host */ \ + movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ + /* The Host GDT, IDT and stack! \ + * All these lie safely hidden from the Guest: \ + * We must return to the Host page tables \ + * (Hence that was saved in struct lguest_pages) */ \ + movl LGUEST_PAGES_host_cr3(%eax), %edx; \ + movl %edx, %cr3; \ + /* As before, when we looked back at the Host \ + * As we left and marked TSS unused \ + * So must we now for the Guest left behind. */ \ + andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ + /* Switch to Host's GDT, IDT. */ \ + lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ + lidt LGUEST_PAGES_host_idt_desc(%eax); \ + /* Restore the Host's stack where it's saved regs lie */ \ + movl LGUEST_PAGES_host_sp(%eax), %esp; \ + /* Last the TSS: our Host is complete */ \ + movl $(GDT_ENTRY_TSS*8), %edx; \ + ltr %dx; \ + /* Restore now the regs saved right at the first. */ \ + popl %ebp; \ + popl %fs; \ + popl %gs; \ + popl %ds; \ + popl %es + +// Here's where we come when the Guest has just trapped: +// (Which trap we'll see has been pushed on the stack). +// We need only switch back, and the Host will decode +// Why we came home, and what needs to be done. +return_to_host: + SWITCH_TO_HOST + iret + +// An interrupt, with some cause external +// Has ajerked us rudely from the Guest's code +// Again we must return home to the Host +deliver_to_host: + SWITCH_TO_HOST + // But now we must go home via that place + // Where that interrupt was supposed to go + // Had we not been ensconced, running the Guest. + // Here we see the cleverness of our stack: + // The Host stack is formed like an interrupt + // With EIP, CS and EFLAGS layered. + // Interrupt handlers end with "iret" + // And that will take us home at long long last. + + // But first we must find the handler to call! + // The IDT descriptor for the Host + // Has two bytes for size, and four for address: + // %edx will hold it for us for now. + movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx + // We now know the table address we need, + // And saved the trap's number inside %ebx. + // Yet the pointer to the handler is smeared + // Across the bits of the table entry. + // What oracle can tell us how to extract + // From such a convoluted encoding? + // I consulted gcc, and it gave + // These instructions, which I gladly credit: + leal (%edx,%ebx,8), %eax + movzwl (%eax),%edx + movl 4(%eax), %eax + xorw %ax, %ax + orl %eax, %edx + // Now the address of the handler's in %edx + // We call it now: its "iret" takes us home. + jmp *%edx + +// Every interrupt can come to us here +// But we must truly tell each apart. +// They number two hundred and fifty six +// And each must land in a different spot, +// Push its number on stack, and join the stream. + +// And worse, a mere six of the traps stand apart +// And push on their stack an addition: +// An error number, thirty two bits long +// So we punish the other two fifty +// And make them push a zero so they match. + +// Yet two fifty six entries is long +// And all will look most the same as the last +// So we create a macro which can make +// As many entries as we need to fill. + +// Note the change to .data then .text: +// We plant the address of each entry +// Into a (data) table for the Host +// To know where each Guest interrupt should go. +.macro IRQ_STUB N TARGET + .data; .long 1f; .text; 1: + // Trap eight, ten through fourteen and seventeen + // Supply an error number. Else zero. + .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) + pushl $0 + .endif + pushl $\N + jmp \TARGET + ALIGN +.endm + +// This macro creates numerous entries +// Using GAS macros which out-power C's. +.macro IRQ_STUBS FIRST LAST TARGET + irq=\FIRST + .rept \LAST-\FIRST+1 + IRQ_STUB irq \TARGET + irq=irq+1 + .endr +.endm + +// Here's the marker for our pointer table +// Laid in the data section just before +// Each macro places the address of code +// Forming an array: each one points to text +// Which handles interrupt in its turn. +.data +.global default_idt_entries +default_idt_entries: +.text + // The first two traps go straight back to the Host + IRQ_STUBS 0 1 return_to_host + // We'll say nothing, yet, about NMI + IRQ_STUB 2 handle_nmi + // Other traps also return to the Host + IRQ_STUBS 3 31 return_to_host + // All interrupts go via their handlers + IRQ_STUBS 32 127 deliver_to_host + // 'Cept system calls coming from userspace + // Are to go to the Guest, never the Host. + IRQ_STUB 128 return_to_host + IRQ_STUBS 129 255 deliver_to_host + +// The NMI, what a fabulous beast +// Which swoops in and stops us no matter that +// We're suspended between heaven and hell, +// (Or more likely between the Host and Guest) +// When in it comes! We are dazed and confused +// So we do the simplest thing which one can. +// Though we've pushed the trap number and zero +// We discard them, return, and hope we live. +handle_nmi: + addl $8, %esp + iret + +// We are done; all that's left is Mastery +// And "make Mastery" is a journey long +// Designed to make your fingers itch to code. + +// Here ends the text, the file and poem. +ENTRY(end_switcher_text) =================================================================== --- a/drivers/lguest/switcher.S +++ /dev/null @@ -1,350 +0,0 @@ -/*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level - * Guest<->Host switch. It is as simple as it can be made, but it's naturally - * very specific to x86. - * - * You have now completed Preparation. If this has whet your appetite; if you - * are feeling invigorated and refreshed then the next, more challenging stage - * can be found in "make Guest". :*/ - -/*S:100 - * Welcome to the Switcher itself! - * - * This file contains the low-level code which changes the CPU to run the Guest - * code, and returns to the Host when something happens. Understand this, and - * you understand the heart of our journey. - * - * Because this is in assembler rather than C, our tale switches from prose to - * verse. First I tried limericks: - * - * There once was an eax reg, - * To which our pointer was fed, - * It needed an add, - * Which asm-offsets.h had - * But this limerick is hurting my head. - * - * Next I tried haikus, but fitting the required reference to the seasons in - * every stanza was quickly becoming tiresome: - * - * The %eax reg - * Holds "struct lguest_pages" now: - * Cherry blossoms fall. - * - * Then I started with Heroic Verse, but the rhyming requirement leeched away - * the content density and led to some uniquely awful oblique rhymes: - * - * These constants are coming from struct offsets - * For use within the asm switcher text. - * - * Finally, I settled for something between heroic hexameter, and normal prose - * with inappropriate linebreaks. Anyway, it aint no Shakespeare. - */ - -// Not all kernel headers work from assembler -// But these ones are needed: the ENTRY() define -// And constants extracted from struct offsets -// To avoid magic numbers and breakage: -// Should they change the compiler can't save us -// Down here in the depths of assembler code. -#include -#include -#include -#include "lg.h" - -// We mark the start of the code to copy -// It's placed in .text tho it's never run here -// You'll see the trick macro at the end -// Which interleaves data and text to effect. -.text -ENTRY(start_switcher_text) - -// When we reach switch_to_guest we have just left -// The safe and comforting shores of C code -// %eax has the "struct lguest_pages" to use -// Where we save state and still see it from the Guest -// And %ebx holds the Guest shadow pagetable: -// Once set we have truly left Host behind. -ENTRY(switch_to_guest) - // We told gcc all its regs could fade, - // Clobbered by our journey into the Guest - // We could have saved them, if we tried - // But time is our master and cycles count. - - // Segment registers must be saved for the Host - // We push them on the Host stack for later - pushl %es - pushl %ds - pushl %gs - pushl %fs - // But the compiler is fickle, and heeds - // No warning of %ebp clobbers - // When frame pointers are used. That register - // Must be saved and restored or chaos strikes. - pushl %ebp - // The Host's stack is done, now save it away - // In our "struct lguest_pages" at offset - // Distilled into asm-offsets.h - movl %esp, LGUEST_PAGES_host_sp(%eax) - - // All saved and there's now five steps before us: - // Stack, GDT, IDT, TSS - // And last of all the page tables are flipped. - - // Yet beware that our stack pointer must be - // Always valid lest an NMI hits - // %edx does the duty here as we juggle - // %eax is lguest_pages: our stack lies within. - movl %eax, %edx - addl $LGUEST_PAGES_regs, %edx - movl %edx, %esp - - // The Guest's GDT we so carefully - // Placed in the "struct lguest_pages" before - lgdt LGUEST_PAGES_guest_gdt_desc(%eax) - - // The Guest's IDT we did partially - // Move to the "struct lguest_pages" as well. - lidt LGUEST_PAGES_guest_idt_desc(%eax) - - // The TSS entry which controls traps - // Must be loaded up with "ltr" now: - // For after we switch over our page tables - // It (as the rest) will be writable no more. - // (The GDT entry TSS needs - // Changes type when we load it: damn Intel!) - movl $(GDT_ENTRY_TSS*8), %edx - ltr %dx - - // Look back now, before we take this last step! - // The Host's TSS entry was also marked used; - // Let's clear it again, ere we return. - // The GDT descriptor of the Host - // Points to the table after two "size" bytes - movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx - // Clear the type field of "used" (byte 5, bit 2) - andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) - - // Once our page table's switched, the Guest is live! - // The Host fades as we run this final step. - // Our "struct lguest_pages" is now read-only. - movl %ebx, %cr3 - - // The page table change did one tricky thing: - // The Guest's register page has been mapped - // Writable onto our %esp (stack) -- - // We can simply pop off all Guest regs. - popl %ebx - popl %ecx - popl %edx - popl %esi - popl %edi - popl %ebp - popl %gs - popl %eax - popl %fs - popl %ds - popl %es - - // Near the base of the stack lurk two strange fields - // Which we fill as we exit the Guest - // These are the trap number and its error - // We can simply step past them on our way. - addl $8, %esp - - // The last five stack slots hold return address - // And everything needed to change privilege - // Into the Guest privilege level of 1, - // And the stack where the Guest had last left it. - // Interrupts are turned back on: we are Guest. - iret - -// There are two paths where we switch to the Host -// So we put the routine in a macro. -// We are on our way home, back to the Host -// Interrupted out of the Guest, we come here. -#define SWITCH_TO_HOST \ - /* We save the Guest state: all registers first \ - * Laid out just as "struct lguest_regs" defines */ \ - pushl %es; \ - pushl %ds; \ - pushl %fs; \ - pushl %eax; \ - pushl %gs; \ - pushl %ebp; \ - pushl %edi; \ - pushl %esi; \ - pushl %edx; \ - pushl %ecx; \ - pushl %ebx; \ - /* Our stack and our code are using segments \ - * Set in the TSS and IDT \ - * Yet if we were to touch data we'd use \ - * Whatever data segment the Guest had. \ - * Load the lguest ds segment for now. */ \ - movl $(LGUEST_DS), %eax; \ - movl %eax, %ds; \ - /* So where are we? Which CPU, which struct? \ - * The stack is our clue: our TSS starts \ - * It at the end of "struct lguest_pages". \ - * Or we may have stumbled while restoring \ - * Our Guest segment regs while in switch_to_guest, \ - * The fault pushed atop that part-unwound stack. \ - * If we round the stack down to the page start \ - * We're at the start of "struct lguest_pages". */ \ - movl %esp, %eax; \ - andl $(~(1 << PAGE_SHIFT - 1)), %eax; \ - /* Save our trap number: the switch will obscure it \ - * (The Guest regs are not mapped here in the Host) \ - * %ebx holds it safe for deliver_to_host */ \ - movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ - /* The Host GDT, IDT and stack! \ - * All these lie safely hidden from the Guest: \ - * We must return to the Host page tables \ - * (Hence that was saved in struct lguest_pages) */ \ - movl LGUEST_PAGES_host_cr3(%eax), %edx; \ - movl %edx, %cr3; \ - /* As before, when we looked back at the Host \ - * As we left and marked TSS unused \ - * So must we now for the Guest left behind. */ \ - andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ - /* Switch to Host's GDT, IDT. */ \ - lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ - lidt LGUEST_PAGES_host_idt_desc(%eax); \ - /* Restore the Host's stack where it's saved regs lie */ \ - movl LGUEST_PAGES_host_sp(%eax), %esp; \ - /* Last the TSS: our Host is complete */ \ - movl $(GDT_ENTRY_TSS*8), %edx; \ - ltr %dx; \ - /* Restore now the regs saved right at the first. */ \ - popl %ebp; \ - popl %fs; \ - popl %gs; \ - popl %ds; \ - popl %es - -// Here's where we come when the Guest has just trapped: -// (Which trap we'll see has been pushed on the stack). -// We need only switch back, and the Host will decode -// Why we came home, and what needs to be done. -return_to_host: - SWITCH_TO_HOST - iret - -// An interrupt, with some cause external -// Has ajerked us rudely from the Guest's code -// Again we must return home to the Host -deliver_to_host: - SWITCH_TO_HOST - // But now we must go home via that place - // Where that interrupt was supposed to go - // Had we not been ensconced, running the Guest. - // Here we see the cleverness of our stack: - // The Host stack is formed like an interrupt - // With EIP, CS and EFLAGS layered. - // Interrupt handlers end with "iret" - // And that will take us home at long long last. - - // But first we must find the handler to call! - // The IDT descriptor for the Host - // Has two bytes for size, and four for address: - // %edx will hold it for us for now. - movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx - // We now know the table address we need, - // And saved the trap's number inside %ebx. - // Yet the pointer to the handler is smeared - // Across the bits of the table entry. - // What oracle can tell us how to extract - // From such a convoluted encoding? - // I consulted gcc, and it gave - // These instructions, which I gladly credit: - leal (%edx,%ebx,8), %eax - movzwl (%eax),%edx - movl 4(%eax), %eax - xorw %ax, %ax - orl %eax, %edx - // Now the address of the handler's in %edx - // We call it now: its "iret" takes us home. - jmp *%edx - -// Every interrupt can come to us here -// But we must truly tell each apart. -// They number two hundred and fifty six -// And each must land in a different spot, -// Push its number on stack, and join the stream. - -// And worse, a mere six of the traps stand apart -// And push on their stack an addition: -// An error number, thirty two bits long -// So we punish the other two fifty -// And make them push a zero so they match. - -// Yet two fifty six entries is long -// And all will look most the same as the last -// So we create a macro which can make -// As many entries as we need to fill. - -// Note the change to .data then .text: -// We plant the address of each entry -// Into a (data) table for the Host -// To know where each Guest interrupt should go. -.macro IRQ_STUB N TARGET - .data; .long 1f; .text; 1: - // Trap eight, ten through fourteen and seventeen - // Supply an error number. Else zero. - .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) - pushl $0 - .endif - pushl $\N - jmp \TARGET - ALIGN -.endm - -// This macro creates numerous entries -// Using GAS macros which out-power C's. -.macro IRQ_STUBS FIRST LAST TARGET - irq=\FIRST - .rept \LAST-\FIRST+1 - IRQ_STUB irq \TARGET - irq=irq+1 - .endr -.endm - -// Here's the marker for our pointer table -// Laid in the data section just before -// Each macro places the address of code -// Forming an array: each one points to text -// Which handles interrupt in its turn. -.data -.global default_idt_entries -default_idt_entries: -.text - // The first two traps go straight back to the Host - IRQ_STUBS 0 1 return_to_host - // We'll say nothing, yet, about NMI - IRQ_STUB 2 handle_nmi - // Other traps also return to the Host - IRQ_STUBS 3 31 return_to_host - // All interrupts go via their handlers - IRQ_STUBS 32 127 deliver_to_host - // 'Cept system calls coming from userspace - // Are to go to the Guest, never the Host. - IRQ_STUB 128 return_to_host - IRQ_STUBS 129 255 deliver_to_host - -// The NMI, what a fabulous beast -// Which swoops in and stops us no matter that -// We're suspended between heaven and hell, -// (Or more likely between the Host and Guest) -// When in it comes! We are dazed and confused -// So we do the simplest thing which one can. -// Though we've pushed the trap number and zero -// We discard them, return, and hope we live. -handle_nmi: - addl $8, %esp - iret - -// We are done; all that's left is Mastery -// And "make Mastery" is a journey long -// Designed to make your fingers itch to code. - -// Here ends the text, the file and poem. -ENTRY(end_switcher_text) -- there are those who do and those who hang on and you don't see too many doers quoting their contemporaries. -- Larry McVoy