[Lguest] [patch] core.c i386 disentangle

Steven Rostedt rostedt at goodmis.org
Tue Aug 14 23:35:53 EST 2007


On Tue, 2007-08-14 at 14:54 +0200, Jes Sorensen wrote:
> Hi,

> plain text document attachment (lg-core-i386-seperate.diff)
> Seperate i368 architecture specific from core.c and move it to
> i386_guest.c and add header file entries to match.
> 
> Signed-off-by: Jes Sorensen <jes at sgi.com>
> 
> ---
>  drivers/lguest/core.c                 |  497 ---------------------------------
>  drivers/lguest/i386_guest.c           |  510 ++++++++++++++++++++++++++++++++++
>  drivers/lguest/interrupts_and_traps.c |   18 -
>  drivers/lguest/lg.h                   |   55 ---
>  drivers/lguest/segments.c             |   26 -
>  include/asm-i386/lguest.h             |   54 +++
>  6 files changed, 600 insertions(+), 560 deletions(-)
> 
> Index: linux-2.6.23-rc3/drivers/lguest/core.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/drivers/lguest/core.c
> +++ linux-2.6.23-rc3/drivers/lguest/core.c
> @@ -12,13 +12,13 @@
>  #include <linux/cpu.h>
>  #include <linux/freezer.h>
>  #include <asm/paravirt.h>
> -#include <asm/desc.h>
>  #include <asm/pgtable.h>
>  #include <asm/uaccess.h>
>  #include <asm/poll.h>
> +#ifdef CONFIG_HIGHMEM
>  #include <asm/highmem.h>
> +#endif

You should be able to just include <linux/highmem.h> and get rid of the
ugly ifdefs.

>  #include <asm/asm-offsets.h>
> -#include <asm/i387.h>
>  #include "lg.h"
>  
>  /* Found in switcher.S */
> @@ -37,28 +37,8 @@ extern unsigned long default_idt_entries
>  static struct vm_struct *switcher_vma;
>  static struct page **switcher_page;
>  
> -static int cpu_had_pge;
> -static struct {
> -	unsigned long offset;
> -	unsigned short segment;
> -} lguest_entry;
> -
>  /* This One Big lock protects all inter-guest data structures. */
>  DEFINE_MUTEX(lguest_lock);
> -static DEFINE_PER_CPU(struct lguest *, last_guest);
> -
> -/* Offset from where switcher.S was compiled to where we've copied it */
> -static unsigned long switcher_offset(void)
> -{
> -	return SWITCHER_ADDR - (unsigned long)start_switcher_text;
> -}
> -
> -/* This cpu's struct lguest_pages. */
> -static struct lguest_pages *lguest_pages(unsigned int cpu)
> -{
> -	return &(((struct lguest_pages *)
> -		  (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
> -}
>  
>  /*H:010 We need to set up the Switcher at a high virtual address.  Remember the
>   * Switcher is a few hundred bytes of assembler code which actually changes the
> @@ -133,84 +113,8 @@ static __init int map_switcher(void)
>  	memcpy(switcher_vma->addr, start_switcher_text,
>  	       end_switcher_text - start_switcher_text);
>  
> -	/* Most of the switcher.S doesn't care that it's been moved; on Intel,
> -	 * jumps are relative, and it doesn't access any references to external
> -	 * code or data.
> -	 *
> -	 * The only exception is the interrupt handlers in switcher.S: their
> -	 * addresses are placed in a table (default_idt_entries), so we need to
> -	 * update the table with the new addresses.  switcher_offset() is a
> -	 * convenience function which returns the distance between the builtin
> -	 * switcher code and the high-mapped copy we just made. */
> -	for (i = 0; i < IDT_ENTRIES; i++)
> -		default_idt_entries[i] += switcher_offset();
> -
> -	/*
> -	 * Set up the Switcher's per-cpu areas.
> -	 *
> -	 * Each CPU gets two pages of its own within the high-mapped region
> -	 * (aka. "struct lguest_pages").  Much of this can be initialized now,
> -	 * but some depends on what Guest we are running (which is set up in
> -	 * copy_in_guest_info()).
> -	 */
> -	for_each_possible_cpu(i) {
> -		/* lguest_pages() returns this CPU's two pages. */
> -		struct lguest_pages *pages = lguest_pages(i);
> -		/* This is a convenience pointer to make the code fit one
> -		 * statement to a line. */
> -		struct lguest_ro_state *state = &pages->state;
> -
> -		/* The Global Descriptor Table: the Host has a different one
> -		 * for each CPU.  We keep a descriptor for the GDT which says
> -		 * where it is and how big it is (the size is actually the last
> -		 * byte, not the size, hence the "-1"). */
> -		state->host_gdt_desc.size = GDT_SIZE-1;
> -		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
> -
> -		/* All CPUs on the Host use the same Interrupt Descriptor
> -		 * Table, so we just use store_idt(), which gets this CPU's IDT
> -		 * descriptor. */
> -		store_idt(&state->host_idt_desc);
> -
> -		/* The descriptors for the Guest's GDT and IDT can be filled
> -		 * out now, too.  We copy the GDT & IDT into ->guest_gdt and
> -		 * ->guest_idt before actually running the Guest. */
> -		state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
> -		state->guest_idt_desc.address = (long)&state->guest_idt;
> -		state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
> -		state->guest_gdt_desc.address = (long)&state->guest_gdt;
> -
> -		/* We know where we want the stack to be when the Guest enters
> -		 * the switcher: in pages->regs.  The stack grows upwards, so
> -		 * we start it at the end of that structure. */
> -		state->guest_tss.esp0 = (long)(&pages->regs + 1);
> -		/* And this is the GDT entry to use for the stack: we keep a
> -		 * couple of special LGUEST entries. */
> -		state->guest_tss.ss0 = LGUEST_DS;
> -
> -		/* x86 can have a finegrained bitmap which indicates what I/O
> -		 * ports the process can use.  We set it to the end of our
> -		 * structure, meaning "none". */
> -		state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
> -
> -		/* Some GDT entries are the same across all Guests, so we can
> -		 * set them up now. */
> -		setup_default_gdt_entries(state);
> -		/* Most IDT entries are the same for all Guests, too.*/
> -		setup_default_idt_entries(state, default_idt_entries);
> -
> -		/* The Host needs to be able to use the LGUEST segments on this
> -		 * CPU, too, so put them in the Host GDT. */
> -		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
> -		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
> -	}
> -
> -	/* In the Switcher, we want the %cs segment register to use the
> -	 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
> -	 * it will be undisturbed when we switch.  To change %cs and jump we
> -	 * need this structure to feed to Intel's "lcall" instruction. */
> -	lguest_entry.offset = (long)switch_to_guest + switcher_offset();
> -	lguest_entry.segment = LGUEST_CS;
> +	/* Call the architecture specific portion for mapping the switcher. */
> +	lguest_arch_map_switcher();
>  
>  	printk(KERN_INFO "lguest: mapped switcher at %p\n",
>  	       switcher_vma->addr);
> @@ -243,80 +147,6 @@ static void unmap_switcher(void)
>  		__free_pages(switcher_page[i], 0);
>  }
>  
> -/*H:130 Our Guest is usually so well behaved; it never tries to do things it
> - * isn't allowed to.  Unfortunately, "struct paravirt_ops" isn't quite
> - * complete, because it doesn't contain replacements for the Intel I/O
> - * instructions.  As a result, the Guest sometimes fumbles across one during
> - * the boot process as it probes for various things which are usually attached
> - * to a PC.
> - *
> - * When the Guest uses one of these instructions, we get trap #13 (General
> - * Protection Fault) and come here.  We see if it's one of those troublesome
> - * instructions and skip over it.  We return true if we did. */
> -static int emulate_insn(struct lguest *lg)
> -{
> -	u8 insn;
> -	unsigned int insnlen = 0, in = 0, shift = 0;
> -	/* The eip contains the *virtual* address of the Guest's instruction:
> -	 * guest_pa just subtracts the Guest's page_offset. */
> -	unsigned long physaddr = guest_pa(lg, lg->regs->eip);
> -
> -	/* The guest_pa() function only works for Guest kernel addresses, but
> -	 * that's all we're trying to do anyway. */
> -	if (lg->regs->eip < lg->page_offset)
> -		return 0;
> -
> -	/* Decoding x86 instructions is icky. */
> -	lgread(lg, &insn, physaddr, 1);
> -
> -	/* 0x66 is an "operand prefix".  It means it's using the upper 16 bits
> -	   of the eax register. */
> -	if (insn == 0x66) {
> -		shift = 16;
> -		/* The instruction is 1 byte so far, read the next byte. */
> -		insnlen = 1;
> -		lgread(lg, &insn, physaddr + insnlen, 1);
> -	}
> -
> -	/* We can ignore the lower bit for the moment and decode the 4 opcodes
> -	 * we need to emulate. */
> -	switch (insn & 0xFE) {
> -	case 0xE4: /* in     <next byte>,%al */
> -		insnlen += 2;
> -		in = 1;
> -		break;
> -	case 0xEC: /* in     (%dx),%al */
> -		insnlen += 1;
> -		in = 1;
> -		break;
> -	case 0xE6: /* out    %al,<next byte> */
> -		insnlen += 2;
> -		break;
> -	case 0xEE: /* out    %al,(%dx) */
> -		insnlen += 1;
> -		break;
> -	default:
> -		/* OK, we don't know what this is, can't emulate. */
> -		return 0;
> -	}
> -
> -	/* If it was an "IN" instruction, they expect the result to be read
> -	 * into %eax, so we change %eax.  We always return all-ones, which
> -	 * traditionally means "there's nothing there". */
> -	if (in) {
> -		/* Lower bit tells is whether it's a 16 or 32 bit access */
> -		if (insn & 0x1)
> -			lg->regs->eax = 0xFFFFFFFF;
> -		else
> -			lg->regs->eax |= (0xFFFF << shift);
> -	}
> -	/* Finally, we've "done" the instruction, so move past it. */
> -	lg->regs->eip += insnlen;
> -	/* Success! */
> -	return 1;
> -}
> -/*:*/
> -
>  /*L:305
>   * Dealing With Guest Memory.
>   *
> @@ -380,290 +210,6 @@ void lgwrite(struct lguest *lg, unsigned
>  }
>  /* (end of memory access helper routines) :*/
>  
> -static void set_ts(void)
> -{
> -	u32 cr0;
> -
> -	cr0 = read_cr0();
> -	if (!(cr0 & 8))
> -		write_cr0(cr0|8);
> -}
> -
> -/*S:010
> - * We are getting close to the Switcher.
> - *
> - * Remember that each CPU has two pages which are visible to the Guest when it
> - * runs on that CPU.  This has to contain the state for that Guest: we copy the
> - * state in just before we run the Guest.
> - *
> - * Each Guest has "changed" flags which indicate what has changed in the Guest
> - * since it last ran.  We saw this set in interrupts_and_traps.c and
> - * segments.c.
> - */
> -static void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
> -{
> -	/* Copying all this data can be quite expensive.  We usually run the
> -	 * same Guest we ran last time (and that Guest hasn't run anywhere else
> -	 * meanwhile).  If that's not the case, we pretend everything in the
> -	 * Guest has changed. */
> -	if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
> -		__get_cpu_var(last_guest) = lg;
> -		lg->last_pages = pages;
> -		lg->changed = CHANGED_ALL;
> -	}
> -
> -	/* These copies are pretty cheap, so we do them unconditionally: */
> -	/* Save the current Host top-level page directory. */
> -	pages->state.host_cr3 = __pa(current->mm->pgd);
> -	/* Set up the Guest's page tables to see this CPU's pages (and no
> -	 * other CPU's pages). */
> -	map_switcher_in_guest(lg, pages);
> -	/* Set up the two "TSS" members which tell the CPU what stack to use
> -	 * for traps which do directly into the Guest (ie. traps at privilege
> -	 * level 1). */
> -	pages->state.guest_tss.esp1 = lg->esp1;
> -	pages->state.guest_tss.ss1 = lg->ss1;
> -
> -	/* Copy direct-to-Guest trap entries. */
> -	if (lg->changed & CHANGED_IDT)
> -		copy_traps(lg, pages->state.guest_idt, default_idt_entries);
> -
> -	/* Copy all GDT entries which the Guest can change. */
> -	if (lg->changed & CHANGED_GDT)
> -		copy_gdt(lg, pages->state.guest_gdt);
> -	/* If only the TLS entries have changed, copy them. */
> -	else if (lg->changed & CHANGED_GDT_TLS)
> -		copy_gdt_tls(lg, pages->state.guest_gdt);
> -
> -	/* Mark the Guest as unchanged for next time. */
> -	lg->changed = 0;
> -}
> -
> -/* Finally: the code to actually call into the Switcher to run the Guest. */
> -static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
> -{
> -	/* This is a dummy value we need for GCC's sake. */
> -	unsigned int clobber;
> -
> -	/* Copy the guest-specific information into this CPU's "struct
> -	 * lguest_pages". */
> -	copy_in_guest_info(lg, pages);
> -
> -	/* Set the trap number to 256 (impossible value).  If we fault while
> -	 * switching to the Guest (bad segment registers or bug), this will
> -	 * cause us to abort the Guest. */
> -	lg->regs->trapnum = 256;
> -
> -	/* Now: we push the "eflags" register on the stack, then do an "lcall".
> -	 * This is how we change from using the kernel code segment to using
> -	 * the dedicated lguest code segment, as well as jumping into the
> -	 * Switcher.
> -	 *
> -	 * The lcall also pushes the old code segment (KERNEL_CS) onto the
> -	 * stack, then the address of this call.  This stack layout happens to
> -	 * exactly match the stack of an interrupt... */
> -	asm volatile("pushf; lcall *lguest_entry"
> -		     /* This is how we tell GCC that %eax ("a") and %ebx ("b")
> -		      * are changed by this routine.  The "=" means output. */
> -		     : "=a"(clobber), "=b"(clobber)
> -		     /* %eax contains the pages pointer.  ("0" refers to the
> -		      * 0-th argument above, ie "a").  %ebx contains the
> -		      * physical address of the Guest's top-level page
> -		      * directory. */
> -		     : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
> -		     /* We tell gcc that all these registers could change,
> -		      * which means we don't have to save and restore them in
> -		      * the Switcher. */
> -		     : "memory", "%edx", "%ecx", "%edi", "%esi");
> -}
> -/*:*/
> -
> -/*H:030 Let's jump straight to the the main loop which runs the Guest.
> - * Remember, this is called by the Launcher reading /dev/lguest, and we keep
> - * going around and around until something interesting happens. */
> -int run_guest(struct lguest *lg, unsigned long __user *user)
> -{
> -	/* We stop running once the Guest is dead. */
> -	while (!lg->dead) {
> -		/* We need to initialize this, otherwise gcc complains.  It's
> -		 * not (yet) clever enough to see that it's initialized when we
> -		 * need it. */
> -		unsigned int cr2 = 0; /* Damn gcc */
> -
> -		/* First we run any hypercalls the Guest wants done: either in
> -		 * the hypercall ring in "struct lguest_data", or directly by
> -		 * using int 31 (LGUEST_TRAP_ENTRY). */
> -		do_hypercalls(lg);
> -		/* It's possible the Guest did a SEND_DMA hypercall to the
> -		 * Launcher, in which case we return from the read() now. */
> -		if (lg->dma_is_pending) {
> -			if (put_user(lg->pending_dma, user) ||
> -			    put_user(lg->pending_key, user+1))
> -				return -EFAULT;
> -			return sizeof(unsigned long)*2;
> -		}
> -
> -		/* Check for signals */
> -		if (signal_pending(current))
> -			return -ERESTARTSYS;
> -
> -		/* If Waker set break_out, return to Launcher. */
> -		if (lg->break_out)
> -			return -EAGAIN;
> -
> -		/* Check if there are any interrupts which can be delivered
> -		 * now: if so, this sets up the hander to be executed when we
> -		 * next run the Guest. */
> -		maybe_do_interrupt(lg);
> -
> -		/* All long-lived kernel loops need to check with this horrible
> -		 * thing called the freezer.  If the Host is trying to suspend,
> -		 * it stops us. */
> -		try_to_freeze();
> -
> -		/* Just make absolutely sure the Guest is still alive.  One of
> -		 * those hypercalls could have been fatal, for example. */
> -		if (lg->dead)
> -			break;
> -
> -		/* If the Guest asked to be stopped, we sleep.  The Guest's
> -		 * clock timer or LHCALL_BREAK from the Waker will wake us. */
> -		if (lg->halted) {
> -			set_current_state(TASK_INTERRUPTIBLE);
> -			schedule();
> -			continue;
> -		}
> -
> -		/* OK, now we're ready to jump into the Guest.  First we put up
> -		 * the "Do Not Disturb" sign: */
> -		local_irq_disable();
> -
> -		/* Remember the awfully-named TS bit?  If the Guest has asked
> -		 * to set it we set it now, so we can trap and pass that trap
> -		 * to the Guest if it uses the FPU. */
> -		if (lg->ts)
> -			set_ts();
> -
> -		/* SYSENTER is an optimized way of doing system calls.  We
> -		 * can't allow it because it always jumps to privilege level 0.
> -		 * A normal Guest won't try it because we don't advertise it in
> -		 * CPUID, but a malicious Guest (or malicious Guest userspace
> -		 * program) could, so we tell the CPU to disable it before
> -		 * running the Guest. */
> -		if (boot_cpu_has(X86_FEATURE_SEP))
> -			wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
> -
> -		/* Now we actually run the Guest.  It will pop back out when
> -		 * something interesting happens, and we can examine its
> -		 * registers to see what it was doing. */
> -		run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
> -
> -		/* The "regs" pointer contains two extra entries which are not
> -		 * really registers: a trap number which says what interrupt or
> -		 * trap made the switcher code come back, and an error code
> -		 * which some traps set.  */
> -
> -		/* If the Guest page faulted, then the cr2 register will tell
> -		 * us the bad virtual address.  We have to grab this now,
> -		 * because once we re-enable interrupts an interrupt could
> -		 * fault and thus overwrite cr2, or we could even move off to a
> -		 * different CPU. */
> -		if (lg->regs->trapnum == 14)
> -			cr2 = read_cr2();
> -		/* Similarly, if we took a trap because the Guest used the FPU,
> -		 * we have to restore the FPU it expects to see. */
> -		else if (lg->regs->trapnum == 7)
> -			math_state_restore();
> -
> -		/* Restore SYSENTER if it's supposed to be on. */
> -		if (boot_cpu_has(X86_FEATURE_SEP))
> -			wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
> -
> -		/* Now we're ready to be interrupted or moved to other CPUs */
> -		local_irq_enable();
> -
> -		/* OK, so what happened? */
> -		switch (lg->regs->trapnum) {
> -		case 13: /* We've intercepted a GPF. */
> -			/* Check if this was one of those annoying IN or OUT
> -			 * instructions which we need to emulate.  If so, we
> -			 * just go back into the Guest after we've done it. */
> -			if (lg->regs->errcode == 0) {
> -				if (emulate_insn(lg))
> -					continue;
> -			}
> -			break;
> -		case 14: /* We've intercepted a page fault. */
> -			/* The Guest accessed a virtual address that wasn't
> -			 * mapped.  This happens a lot: we don't actually set
> -			 * up most of the page tables for the Guest at all when
> -			 * we start: as it runs it asks for more and more, and
> -			 * we set them up as required. In this case, we don't
> -			 * even tell the Guest that the fault happened.
> -			 *
> -			 * The errcode tells whether this was a read or a
> -			 * write, and whether kernel or userspace code. */
> -			if (demand_page(lg, cr2, lg->regs->errcode))
> -				continue;
> -
> -			/* OK, it's really not there (or not OK): the Guest
> -			 * needs to know.  We write out the cr2 value so it
> -			 * knows where the fault occurred.
> -			 *
> -			 * Note that if the Guest were really messed up, this
> -			 * could happen before it's done the INITIALIZE
> -			 * hypercall, so lg->lguest_data will be NULL */
> -			if (lg->lguest_data
> -			    && put_user(cr2, &lg->lguest_data->cr2))
> -				kill_guest(lg, "Writing cr2");
> -			break;
> -		case 7: /* We've intercepted a Device Not Available fault. */
> -			/* If the Guest doesn't want to know, we already
> -			 * restored the Floating Point Unit, so we just
> -			 * continue without telling it. */
> -			if (!lg->ts)
> -				continue;
> -			break;
> -		case 32 ... 255:
> -			/* These values mean a real interrupt occurred, in
> -			 * which case the Host handler has already been run.
> -			 * We just do a friendly check if another process
> -			 * should now be run, then fall through to loop
> -			 * around: */
> -			cond_resched();
> -		case LGUEST_TRAP_ENTRY: /* Handled at top of loop */
> -			continue;
> -		}
> -
> -		/* If we get here, it's a trap the Guest wants to know
> -		 * about. */
> -		if (deliver_trap(lg, lg->regs->trapnum))
> -			continue;
> -
> -		/* If the Guest doesn't have a handler (either it hasn't
> -		 * registered any yet, or it's one of the faults we don't let
> -		 * it handle), it dies with a cryptic error message. */
> -		kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
> -			   lg->regs->trapnum, lg->regs->eip,
> -			   lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
> -	}
> -	/* The Guest is dead => "No such file or directory" */
> -	return -ENOENT;
> -}
> -
> -/* Now we can look at each of the routines this calls, in increasing order of
> - * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
> - * deliver_trap() and demand_page().  After all those, we'll be ready to
> - * examine the Switcher, and our philosophical understanding of the Host/Guest
> - * duality will be complete. :*/
> -static void adjust_pge(void *on)
> -{
> -	if (on)
> -		write_cr4(read_cr4() | X86_CR4_PGE);
> -	else
> -		write_cr4(read_cr4() & ~X86_CR4_PGE);
> -}
> -
>  /*H:000
>   * Welcome to the Host!
>   *
> @@ -705,31 +251,7 @@ static int __init init(void)
>  		return err;
>  	}
>  
> -	/* Finally, we need to turn off "Page Global Enable".  PGE is an
> -	 * optimization where page table entries are specially marked to show
> -	 * they never change.  The Host kernel marks all the kernel pages this
> -	 * way because it's always present, even when userspace is running.
> -	 *
> -	 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
> -	 * switch to the Guest kernel.  If you don't disable this on all CPUs,
> -	 * you'll get really weird bugs that you'll chase for two days.
> -	 *
> -	 * I used to turn PGE off every time we switched to the Guest and back
> -	 * on when we return, but that slowed the Switcher down noticibly. */
> -
> -	/* We don't need the complexity of CPUs coming and going while we're
> -	 * doing this. */
> -	lock_cpu_hotplug();
> -	if (cpu_has_pge) { /* We have a broader idea of "global". */
> -		/* Remember that this was originally set (for cleanup). */
> -		cpu_had_pge = 1;
> -		/* adjust_pge is a helper function which sets or unsets the PGE
> -		 * bit on its CPU, depending on the argument (0 == unset). */
> -		on_each_cpu(adjust_pge, (void *)0, 0, 1);
> -		/* Turn off the feature in the global feature set. */
> -		clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
> -	}
> -	unlock_cpu_hotplug();
> +	lguest_arch_host_init();
>  
>  	/* All good! */
>  	return 0;
> @@ -743,14 +265,7 @@ static void __exit fini(void)
>  	free_pagetables();
>  	unmap_switcher();
>  
> -	/* If we had PGE before we started, turn it back on now. */
> -	lock_cpu_hotplug();
> -	if (cpu_had_pge) {
> -		set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
> -		/* adjust_pge's argument "1" means set PGE. */
> -		on_each_cpu(adjust_pge, (void *)1, 0, 1);
> -	}
> -	unlock_cpu_hotplug();
> +	lguest_arch_host_fini();
>  }
>  
>  /* The Host side of lguest can be a module.  This is a nice way for people to
> Index: linux-2.6.23-rc3/drivers/lguest/i386_guest.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/drivers/lguest/i386_guest.c
> +++ linux-2.6.23-rc3/drivers/lguest/i386_guest.c
> @@ -65,6 +65,7 @@
>  #include <asm/e820.h>
>  #include <asm/mce.h>
>  #include <asm/io.h>
> +#include <asm/i387.h>
>  
>  /*G:010 Welcome to the Guest!
>   *
> @@ -91,6 +92,28 @@ struct lguest_data lguest_data = {
>  struct lguest_device_desc *lguest_devices;
>  static cycle_t clock_base;
>  
> +static int cpu_had_pge;
> +
> +static struct {
> +	unsigned long offset;
> +	unsigned short segment;
> +} lguest_entry;
> +
> +/* Offset from where switcher.S was compiled to where we've copied it */
> +static unsigned long switcher_offset(void)
> +{
> +	return SWITCHER_ADDR - (unsigned long)start_switcher_text;
> +}
> +
> +/* This cpu's struct lguest_pages. */
> +static struct lguest_pages *lguest_pages(unsigned int cpu)
> +{
> +	return &(((struct lguest_pages *)
> +		  (SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
> +}
> +
> +static DEFINE_PER_CPU(struct lguest *, last_guest);
> +
>  /*G:035 Notice the lazy_hcall() above, rather than hcall().  This is our first
>   * real optimization trick!
>   *
> @@ -1060,3 +1083,490 @@ __init void lguest_init(void *boot)
>   * It is now time for us to explore the nooks and crannies of the three Guest
>   * devices and complete our understanding of the Guest in "make Drivers".
>   */
> +
> +__init int lguest_arch_map_switcher(void)
> +{
> +	int i, err;
> +
> +	/* Most of the switcher.S doesn't care that it's been moved; on Intel,
> +	 * jumps are relative, and it doesn't access any references to external
> +	 * code or data.
> +	 *
> +	 * The only exception is the interrupt handlers in switcher.S: their
> +	 * addresses are placed in a table (default_idt_entries), so we need to
> +	 * update the table with the new addresses.  switcher_offset() is a
> +	 * convenience function which returns the distance between the builtin
> +	 * switcher code and the high-mapped copy we just made. */
> +	for (i = 0; i < IDT_ENTRIES; i++)
> +		default_idt_entries[i] += switcher_offset();
> +
> +	/*
> +	 * Set up the Switcher's per-cpu areas.
> +	 *
> +	 * Each CPU gets two pages of its own within the high-mapped region
> +	 * (aka. "struct lguest_pages").  Much of this can be initialized now,
> +	 * but some depends on what Guest we are running (which is set up in
> +	 * copy_in_guest_info()).
> +	 */
> +	for_each_possible_cpu(i) {
> +		/* lguest_pages() returns this CPU's two pages. */
> +		struct lguest_pages *pages = lguest_pages(i);
> +		/* This is a convenience pointer to make the code fit one
> +		 * statement to a line. */
> +		struct lguest_ro_state *state = &pages->state;
> +
> +		/* The Global Descriptor Table: the Host has a different one
> +		 * for each CPU.  We keep a descriptor for the GDT which says
> +		 * where it is and how big it is (the size is actually the last
> +		 * byte, not the size, hence the "-1"). */
> +		state->host_gdt_desc.size = GDT_SIZE-1;
> +		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
> +
> +		/* All CPUs on the Host use the same Interrupt Descriptor
> +		 * Table, so we just use store_idt(), which gets this CPU's IDT
> +		 * descriptor. */
> +		store_idt(&state->host_idt_desc);
> +
> +		/* The descriptors for the Guest's GDT and IDT can be filled
> +		 * out now, too.  We copy the GDT & IDT into ->guest_gdt and
> +		 * ->guest_idt before actually running the Guest. */
> +		state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
> +		state->guest_idt_desc.address = (long)&state->guest_idt;
> +		state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
> +		state->guest_gdt_desc.address = (long)&state->guest_gdt;
> +
> +		/* We know where we want the stack to be when the Guest enters
> +		 * the switcher: in pages->regs.  The stack grows upwards, so
> +		 * we start it at the end of that structure. */
> +		state->guest_tss.esp0 = (long)(&pages->regs + 1);
> +		/* And this is the GDT entry to use for the stack: we keep a
> +		 * couple of special LGUEST entries. */
> +		state->guest_tss.ss0 = LGUEST_DS;
> +
> +		/* x86 can have a finegrained bitmap which indicates what I/O
> +		 * ports the process can use.  We set it to the end of our
> +		 * structure, meaning "none". */
> +		state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
> +
> +		/* Some GDT entries are the same across all Guests, so we can
> +		 * set them up now. */
> +		setup_default_gdt_entries(state);
> +		/* Most IDT entries are the same for all Guests, too.*/
> +		setup_default_idt_entries(state, default_idt_entries);
> +
> +		/* The Host needs to be able to use the LGUEST segments on this
> +		 * CPU, too, so put them in the Host GDT. */
> +		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
> +		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
> +	}
> +
> +	/* In the Switcher, we want the %cs segment register to use the
> +	 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
> +	 * it will be undisturbed when we switch.  To change %cs and jump we
> +	 * need this structure to feed to Intel's "lcall" instruction. */
> +	lguest_entry.offset = (long)switch_to_guest + switcher_offset();
> +	lguest_entry.segment = LGUEST_CS;
> +
> +	return 0;
> +}
> +
> +/*S:010
> + * We are getting close to the Switcher.
> + *
> + * Remember that each CPU has two pages which are visible to the Guest when it
> + * runs on that CPU.  This has to contain the state for that Guest: we copy the
> + * state in just before we run the Guest.
> + *
> + * Each Guest has "changed" flags which indicate what has changed in the Guest
> + * since it last ran.  We saw this set in interrupts_and_traps.c and
> + * segments.c.
> + */
> +void copy_in_guest_info(struct lguest *lg, struct lguest_pages *pages)
> +{
> +	/* Copying all this data can be quite expensive.  We usually run the
> +	 * same Guest we ran last time (and that Guest hasn't run anywhere else
> +	 * meanwhile).  If that's not the case, we pretend everything in the
> +	 * Guest has changed. */
> +	if (__get_cpu_var(last_guest) != lg || lg->last_pages != pages) {
> +		__get_cpu_var(last_guest) = lg;
> +		lg->last_pages = pages;
> +		lg->changed = CHANGED_ALL;
> +	}
> +
> +	/* These copies are pretty cheap, so we do them unconditionally: */
> +	/* Save the current Host top-level page directory. */
> +	pages->state.host_cr3 = __pa(current->mm->pgd);
> +	/* Set up the Guest's page tables to see this CPU's pages (and no
> +	 * other CPU's pages). */
> +	map_switcher_in_guest(lg, pages);
> +	/* Set up the two "TSS" members which tell the CPU what stack to use
> +	 * for traps which do directly into the Guest (ie. traps at privilege
> +	 * level 1). */
> +	pages->state.guest_tss.esp1 = lg->esp1;
> +	pages->state.guest_tss.ss1 = lg->ss1;
> +
> +	/* Copy direct-to-Guest trap entries. */
> +	if (lg->changed & CHANGED_IDT)
> +		copy_traps(lg, pages->state.guest_idt, default_idt_entries);
> +
> +	/* Copy all GDT entries which the Guest can change. */
> +	if (lg->changed & CHANGED_GDT)
> +		copy_gdt(lg, pages->state.guest_gdt);
> +	/* If only the TLS entries have changed, copy them. */
> +	else if (lg->changed & CHANGED_GDT_TLS)
> +		copy_gdt_tls(lg, pages->state.guest_gdt);
> +
> +	/* Mark the Guest as unchanged for next time. */
> +	lg->changed = 0;
> +}
> +
> +/* Finally: the code to actually call into the Switcher to run the Guest. */
> +static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
> +{
> +	/* This is a dummy value we need for GCC's sake. */
> +	unsigned int clobber;
> +
> +	/* Copy the guest-specific information into this CPU's "struct
> +	 * lguest_pages". */
> +	copy_in_guest_info(lg, pages);
> +
> +	/* Set the trap number to 256 (impossible value).  If we fault while
> +	 * switching to the Guest (bad segment registers or bug), this will
> +	 * cause us to abort the Guest. */
> +	lg->regs->trapnum = 256;
> +
> +	/* Now: we push the "eflags" register on the stack, then do an "lcall".
> +	 * This is how we change from using the kernel code segment to using
> +	 * the dedicated lguest code segment, as well as jumping into the
> +	 * Switcher.
> +	 *
> +	 * The lcall also pushes the old code segment (KERNEL_CS) onto the
> +	 * stack, then the address of this call.  This stack layout happens to
> +	 * exactly match the stack of an interrupt... */
> +	asm volatile("pushf; lcall *lguest_entry"
> +		     /* This is how we tell GCC that %eax ("a") and %ebx ("b")
> +		      * are changed by this routine.  The "=" means output. */
> +		     : "=a"(clobber), "=b"(clobber)
> +		     /* %eax contains the pages pointer.  ("0" refers to the
> +		      * 0-th argument above, ie "a").  %ebx contains the
> +		      * physical address of the Guest's top-level page
> +		      * directory. */
> +		     : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
> +		     /* We tell gcc that all these registers could change,
> +		      * which means we don't have to save and restore them in
> +		      * the Switcher. */
> +		     : "memory", "%edx", "%ecx", "%edi", "%esi");
> +}
> +/*:*/
> +
> +static void set_ts(void)
> +{
> +	u32 cr0;
> +
> +	cr0 = read_cr0();
> +	if (!(cr0 & 8))
> +		write_cr0(cr0|8);
> +}
> +
> +/*H:030 Let's jump straight to the the main loop which runs the Guest.
> + * Remember, this is called by the Launcher reading /dev/lguest, and we keep
> + * going around and around until something interesting happens. */
> +int run_guest(struct lguest *lg, unsigned long __user *user)
> +{
> +	/* We stop running once the Guest is dead. */
> +	while (!lg->dead) {
> +		/* We need to initialize this, otherwise gcc complains.  It's
> +		 * not (yet) clever enough to see that it's initialized when we
> +		 * need it. */
> +		unsigned int cr2 = 0; /* Damn gcc */
> +
> +		/* First we run any hypercalls the Guest wants done: either in
> +		 * the hypercall ring in "struct lguest_data", or directly by
> +		 * using int 31 (LGUEST_TRAP_ENTRY). */
> +		do_hypercalls(lg);
> +		/* It's possible the Guest did a SEND_DMA hypercall to the
> +		 * Launcher, in which case we return from the read() now. */
> +		if (lg->dma_is_pending) {
> +			if (put_user(lg->pending_dma, user) ||
> +			    put_user(lg->pending_key, user+1))
> +				return -EFAULT;
> +			return sizeof(unsigned long)*2;
> +		}
> +
> +		/* Check for signals */
> +		if (signal_pending(current))
> +			return -ERESTARTSYS;
> +
> +		/* If Waker set break_out, return to Launcher. */
> +		if (lg->break_out)
> +			return -EAGAIN;
> +
> +		/* Check if there are any interrupts which can be delivered
> +		 * now: if so, this sets up the hander to be executed when we
> +		 * next run the Guest. */
> +		maybe_do_interrupt(lg);
> +
> +		/* All long-lived kernel loops need to check with this horrible
> +		 * thing called the freezer.  If the Host is trying to suspend,
> +		 * it stops us. */
> +		try_to_freeze();
> +
> +		/* Just make absolutely sure the Guest is still alive.  One of
> +		 * those hypercalls could have been fatal, for example. */
> +		if (lg->dead)
> +			break;
> +
> +		/* If the Guest asked to be stopped, we sleep.  The Guest's
> +		 * clock timer or LHCALL_BREAK from the Waker will wake us. */
> +		if (lg->halted) {
> +			set_current_state(TASK_INTERRUPTIBLE);
> +			schedule();
> +			continue;
> +		}
> +
> +		/* OK, now we're ready to jump into the Guest.  First we put up
> +		 * the "Do Not Disturb" sign: */
> +		local_irq_disable();
> +
> +		/* Remember the awfully-named TS bit?  If the Guest has asked
> +		 * to set it we set it now, so we can trap and pass that trap
> +		 * to the Guest if it uses the FPU. */
> +		if (lg->ts)
> +			set_ts();
> +
> +		/* SYSENTER is an optimized way of doing system calls.  We
> +		 * can't allow it because it always jumps to privilege level 0.
> +		 * A normal Guest won't try it because we don't advertise it in
> +		 * CPUID, but a malicious Guest (or malicious Guest userspace
> +		 * program) could, so we tell the CPU to disable it before
> +		 * running the Guest. */
> +		if (boot_cpu_has(X86_FEATURE_SEP))
> +			wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
> +
> +		/* Now we actually run the Guest.  It will pop back out when
> +		 * something interesting happens, and we can examine its
> +		 * registers to see what it was doing. */
> +		run_guest_once(lg, lguest_pages(raw_smp_processor_id()));
> +
> +		/* The "regs" pointer contains two extra entries which are not
> +		 * really registers: a trap number which says what interrupt or
> +		 * trap made the switcher code come back, and an error code
> +		 * which some traps set.  */
> +
> +		/* If the Guest page faulted, then the cr2 register will tell
> +		 * us the bad virtual address.  We have to grab this now,
> +		 * because once we re-enable interrupts an interrupt could
> +		 * fault and thus overwrite cr2, or we could even move off to a
> +		 * different CPU. */
> +		if (lg->regs->trapnum == 14)
> +			cr2 = read_cr2();
> +		/* Similarly, if we took a trap because the Guest used the FPU,
> +		 * we have to restore the FPU it expects to see. */
> +		else if (lg->regs->trapnum == 7)
> +			math_state_restore();
> +
> +		/* Restore SYSENTER if it's supposed to be on. */
> +		if (boot_cpu_has(X86_FEATURE_SEP))
> +			wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
> +
> +		/* Now we're ready to be interrupted or moved to other CPUs */
> +		local_irq_enable();
> +
> +		/* OK, so what happened? */
> +		switch (lg->regs->trapnum) {
> +		case 13: /* We've intercepted a GPF. */
> +			/* Check if this was one of those annoying IN or OUT
> +			 * instructions which we need to emulate.  If so, we
> +			 * just go back into the Guest after we've done it. */
> +			if (lg->regs->errcode == 0) {
> +				if (emulate_insn(lg))
> +					continue;
> +			}
> +			break;
> +		case 14: /* We've intercepted a page fault. */
> +			/* The Guest accessed a virtual address that wasn't
> +			 * mapped.  This happens a lot: we don't actually set
> +			 * up most of the page tables for the Guest at all when
> +			 * we start: as it runs it asks for more and more, and
> +			 * we set them up as required. In this case, we don't
> +			 * even tell the Guest that the fault happened.
> +			 *
> +			 * The errcode tells whether this was a read or a
> +			 * write, and whether kernel or userspace code. */
> +			if (demand_page(lg, cr2, lg->regs->errcode))
> +				continue;
> +
> +			/* OK, it's really not there (or not OK): the Guest
> +			 * needs to know.  We write out the cr2 value so it
> +			 * knows where the fault occurred.
> +			 *
> +			 * Note that if the Guest were really messed up, this
> +			 * could happen before it's done the INITIALIZE
> +			 * hypercall, so lg->lguest_data will be NULL */
> +			if (lg->lguest_data
> +			    && put_user(cr2, &lg->lguest_data->cr2))
> +				kill_guest(lg, "Writing cr2");
> +			break;
> +		case 7: /* We've intercepted a Device Not Available fault. */
> +			/* If the Guest doesn't want to know, we already
> +			 * restored the Floating Point Unit, so we just
> +			 * continue without telling it. */
> +			if (!lg->ts)
> +				continue;
> +			break;
> +		case 32 ... 255:
> +			/* These values mean a real interrupt occurred, in
> +			 * which case the Host handler has already been run.
> +			 * We just do a friendly check if another process
> +			 * should now be run, then fall through to loop
> +			 * around: */
> +			cond_resched();
> +		case LGUEST_TRAP_ENTRY: /* Handled at top of loop */
> +			continue;
> +		}
> +
> +		/* If we get here, it's a trap the Guest wants to know
> +		 * about. */
> +		if (deliver_trap(lg, lg->regs->trapnum))
> +			continue;
> +
> +		/* If the Guest doesn't have a handler (either it hasn't
> +		 * registered any yet, or it's one of the faults we don't let
> +		 * it handle), it dies with a cryptic error message. */
> +		kill_guest(lg, "unhandled trap %li at %#lx (%#lx)",
> +			   lg->regs->trapnum, lg->regs->eip,
> +			   lg->regs->trapnum == 14 ? cr2 : lg->regs->errcode);
> +	}
> +	/* The Guest is dead => "No such file or directory" */
> +	return -ENOENT;
> +}
> +
> +/* Now we can look at each of the routines this calls, in increasing order of
> + * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
> + * deliver_trap() and demand_page().  After all those, we'll be ready to
> + * examine the Switcher, and our philosophical understanding of the Host/Guest
> + * duality will be complete. :*/
> +static void adjust_pge(void *on)
> +{
> +	if (on)
> +		write_cr4(read_cr4() | X86_CR4_PGE);
> +	else
> +		write_cr4(read_cr4() & ~X86_CR4_PGE);
> +}
> +
> +int __init lguest_arch_host_init(void)
> +{
> +
> +	/* Finally, we need to turn off "Page Global Enable".  PGE is an
> +	 * optimization where page table entries are specially marked to show
> +	 * they never change.  The Host kernel marks all the kernel pages this
> +	 * way because it's always present, even when userspace is running.
> +	 *
> +	 * Lguest breaks this: unbeknownst to the rest of the Host kernel, we
> +	 * switch to the Guest kernel.  If you don't disable this on all CPUs,
> +	 * you'll get really weird bugs that you'll chase for two days.
> +	 *
> +	 * I used to turn PGE off every time we switched to the Guest and back
> +	 * on when we return, but that slowed the Switcher down noticibly. */
> +
> +	/* We don't need the complexity of CPUs coming and going while we're
> +	 * doing this. */
> +	lock_cpu_hotplug();
> +	if (cpu_has_pge) { /* We have a broader idea of "global". */
> +		/* Remember that this was originally set (for cleanup). */
> +		cpu_had_pge = 1;
> +		/* adjust_pge is a helper function which sets or unsets the PGE
> +		 * bit on its CPU, depending on the argument (0 == unset). */
> +		on_each_cpu(adjust_pge, (void *)0, 0, 1);
> +		/* Turn off the feature in the global feature set. */
> +		clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
> +	}
> +	unlock_cpu_hotplug();
> +	return 0;
> +};
> +
> +void __exit lguest_arch_host_fini(void)
> +{
> +	/* If we had PGE before we started, turn it back on now. */
> +	lock_cpu_hotplug();
> +	if (cpu_had_pge) {
> +		set_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability);
> +		/* adjust_pge's argument "1" means set PGE. */
> +		on_each_cpu(adjust_pge, (void *)1, 0, 1);
> +	}
> +	unlock_cpu_hotplug();
> +}
> +
> +/*H:130 Our Guest is usually so well behaved; it never tries to do things it
> + * isn't allowed to.  Unfortunately, "struct paravirt_ops" isn't quite
> + * complete, because it doesn't contain replacements for the Intel I/O
> + * instructions.  As a result, the Guest sometimes fumbles across one during
> + * the boot process as it probes for various things which are usually attached
> + * to a PC.
> + *
> + * When the Guest uses one of these instructions, we get trap #13 (General
> + * Protection Fault) and come here.  We see if it's one of those troublesome
> + * instructions and skip over it.  We return true if we did. */
> +static int emulate_insn(struct lguest *lg)
> +{
> +	u8 insn;
> +	unsigned int insnlen = 0, in = 0, shift = 0;
> +	/* The eip contains the *virtual* address of the Guest's instruction:
> +	 * guest_pa just subtracts the Guest's page_offset. */
> +	unsigned long physaddr = guest_pa(lg, lg->regs->eip);
> +
> +	/* The guest_pa() function only works for Guest kernel addresses, but
> +	 * that's all we're trying to do anyway. */
> +	if (lg->regs->eip < lg->page_offset)
> +		return 0;
> +
> +	/* Decoding x86 instructions is icky. */
> +	lgread(lg, &insn, physaddr, 1);
> +
> +	/* 0x66 is an "operand prefix".  It means it's using the upper 16 bits
> +	   of the eax register. */
> +	if (insn == 0x66) {
> +		shift = 16;
> +		/* The instruction is 1 byte so far, read the next byte. */
> +		insnlen = 1;
> +		lgread(lg, &insn, physaddr + insnlen, 1);
> +	}
> +
> +	/* We can ignore the lower bit for the moment and decode the 4 opcodes
> +	 * we need to emulate. */
> +	switch (insn & 0xFE) {
> +	case 0xE4: /* in     <next byte>,%al */
> +		insnlen += 2;
> +		in = 1;
> +		break;
> +	case 0xEC: /* in     (%dx),%al */
> +		insnlen += 1;
> +		in = 1;
> +		break;
> +	case 0xE6: /* out    %al,<next byte> */
> +		insnlen += 2;
> +		break;
> +	case 0xEE: /* out    %al,(%dx) */
> +		insnlen += 1;
> +		break;
> +	default:
> +		/* OK, we don't know what this is, can't emulate. */
> +		return 0;
> +	}
> +
> +	/* If it was an "IN" instruction, they expect the result to be read
> +	 * into %eax, so we change %eax.  We always return all-ones, which
> +	 * traditionally means "there's nothing there". */
> +	if (in) {
> +		/* Lower bit tells is whether it's a 16 or 32 bit access */
> +		if (insn & 0x1)
> +			lg->regs->eax = 0xFFFFFFFF;
> +		else
> +			lg->regs->eax |= (0xFFFF << shift);
> +	}
> +	/* Finally, we've "done" the instruction, so move past it. */
> +	lg->regs->eip += insnlen;
> +	/* Success! */
> +	return 1;
> +}
> +/*:*/
> Index: linux-2.6.23-rc3/drivers/lguest/interrupts_and_traps.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/drivers/lguest/interrupts_and_traps.c
> +++ linux-2.6.23-rc3/drivers/lguest/interrupts_and_traps.c
> @@ -170,7 +170,7 @@ void maybe_do_interrupt(struct lguest *l
>  	/* Look at the IDT entry the Guest gave us for this interrupt.  The
>  	 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
>  	 * over them. */
> -	idt = &lg->idt[FIRST_EXTERNAL_VECTOR+irq];
> +	idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
>  	/* If they don't have a handler (yet?), we just ignore it */
>  	if (idt_present(idt->a, idt->b)) {
>  		/* OK, mark it no longer pending and deliver it. */
> @@ -247,14 +247,14 @@ int deliver_trap(struct lguest *lg, unsi
>  {
>  	/* Trap numbers are always 8 bit, but we set an impossible trap number
>  	 * for traps inside the Switcher, so check that here. */
> -	if (num >= ARRAY_SIZE(lg->idt))
> +	if (num >= ARRAY_SIZE(lg->arch.idt))
>  		return 0;
>  
>  	/* Early on the Guest hasn't set the IDT entries (or maybe it put a
>  	 * bogus one in): if we fail here, the Guest will be killed. */
> -	if (!idt_present(lg->idt[num].a, lg->idt[num].b))
> +	if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b))
>  		return 0;
> -	set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num));
> +	set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b, has_err(num));
>  	return 1;
>  }
>  
> @@ -388,10 +388,10 @@ void load_guest_idt_entry(struct lguest 
>  	lg->changed |= CHANGED_IDT;
>  
>  	/* Check that the Guest doesn't try to step outside the bounds. */
> -	if (num >= ARRAY_SIZE(lg->idt))
> +	if (num >= ARRAY_SIZE(lg->arch.idt))
>  		kill_guest(lg, "Setting idt entry %u", num);
>  	else
> -		set_trap(lg, &lg->idt[num], num, lo, hi);
> +		set_trap(lg, &lg->arch.idt[num], num, lo, hi);
>  }
>  
>  /* The default entry for each interrupt points into the Switcher routines which
> @@ -434,7 +434,7 @@ void copy_traps(const struct lguest *lg,
>  
>  	/* We can simply copy the direct traps, otherwise we use the default
>  	 * ones in the Switcher: they will return to the Host. */
> -	for (i = 0; i < ARRAY_SIZE(lg->idt); i++) {
> +	for (i = 0; i < ARRAY_SIZE(lg->arch.idt); i++) {
>  		/* If no Guest can ever override this trap, leave it alone. */
>  		if (!direct_trap(i))
>  			continue;
> @@ -443,8 +443,8 @@ void copy_traps(const struct lguest *lg,
>  		 * Interrupt gates (type 14) disable interrupts as they are
>  		 * entered, which we never let the Guest do.  Not present
>  		 * entries (type 0x0) also can't go direct, of course. */
> -		if (idt_type(lg->idt[i].a, lg->idt[i].b) == 0xF)
> -			idt[i] = lg->idt[i];
> +		if (idt_type(lg->arch.idt[i].a, lg->arch.idt[i].b) == 0xF)
> +			idt[i] = lg->arch.idt[i];
>  		else
>  			/* Reset it to the default. */
>  			default_idt_entry(&idt[i], i, def[i]);
> Index: linux-2.6.23-rc3/drivers/lguest/lg.h
> ===================================================================
> --- linux-2.6.23-rc3.orig/drivers/lguest/lg.h
> +++ linux-2.6.23-rc3/drivers/lguest/lg.h
> @@ -1,13 +1,6 @@
>  #ifndef _LGUEST_H
>  #define _LGUEST_H
>  
> -#include <asm/desc.h>
> -
> -#define GDT_ENTRY_LGUEST_CS	10
> -#define GDT_ENTRY_LGUEST_DS	11
> -#define LGUEST_CS		(GDT_ENTRY_LGUEST_CS * 8)
> -#define LGUEST_DS		(GDT_ENTRY_LGUEST_DS * 8)
> -
>  #ifndef __ASSEMBLY__
>  #include <linux/types.h>
>  #include <linux/init.h>
> @@ -19,26 +12,10 @@
>  #include <linux/wait.h>
>  #include <linux/err.h>
>  #include <asm/semaphore.h>
> -#include "irq_vectors.h"
>  
> -#define GUEST_PL 1
> +#include <asm/lguest.h>
>  
> -struct lguest_regs
> -{
> -	/* Manually saved part. */
> -	unsigned long ebx, ecx, edx;
> -	unsigned long esi, edi, ebp;
> -	unsigned long gs;
> -	unsigned long eax;
> -	unsigned long fs, ds, es;
> -	unsigned long trapnum, errcode;
> -	/* Trap pushed part */
> -	unsigned long eip;
> -	unsigned long cs;
> -	unsigned long eflags;
> -	unsigned long esp;
> -	unsigned long ss;
> -};
> +#define GUEST_PL 1
>  
>  void free_pagetables(void);
>  int init_pagetables(struct page **switcher_page, unsigned int pages);
> @@ -99,23 +76,6 @@ struct pgdir
>  	spgd_t *pgdir;
>  };
>  
> -/* This is a guest-specific page (mapped ro) into the guest. */
> -struct lguest_ro_state
> -{
> -	/* Host information we need to restore when we switch back. */
> -	u32 host_cr3;
> -	struct Xgt_desc_struct host_idt_desc;
> -	struct Xgt_desc_struct host_gdt_desc;
> -	u32 host_sp;
> -
> -	/* Fields which are used when guest is running. */
> -	struct Xgt_desc_struct guest_idt_desc;
> -	struct Xgt_desc_struct guest_gdt_desc;
> -	struct i386_hw_tss guest_tss;
> -	struct desc_struct guest_idt[IDT_ENTRIES];
> -	struct desc_struct guest_gdt[GDT_ENTRIES];
> -};
> -
>  /* We have two pages shared with guests, per cpu.  */
>  struct lguest_pages
>  {
> @@ -181,11 +141,7 @@ struct lguest
>  	/* Dead? */
>  	const char *dead;
>  
> -	/* The GDT entries copied into lguest_ro_state when running. */
> -	struct desc_struct gdt[GDT_ENTRIES];
> -
> -	/* The IDT entries: some copied into lguest_ro_state when running. */
> -	struct desc_struct idt[IDT_ENTRIES];
> +	struct lguest_arch arch;
>  
>  	/* Virtual clock device */
>  	struct hrtimer hrt;
> @@ -242,6 +198,11 @@ void map_switcher_in_guest(struct lguest
>  int demand_page(struct lguest *info, unsigned long cr2, int errcode);
>  void pin_page(struct lguest *lg, unsigned long vaddr);
>  
> +/* <arch>_guest.c: */
> +int lguest_arch_map_switcher(void);
> +int lguest_arch_host_init(void);
> +int lguest_arch_host_fini(void);
> +
>  /* lguest_user.c: */
>  int lguest_device_init(void);
>  void lguest_device_remove(void);
> Index: linux-2.6.23-rc3/drivers/lguest/segments.c
> ===================================================================
> --- linux-2.6.23-rc3.orig/drivers/lguest/segments.c
> +++ linux-2.6.23-rc3/drivers/lguest/segments.c
> @@ -73,14 +73,14 @@ static void fixup_gdt_table(struct lgues
>  		/* Segment descriptors contain a privilege level: the Guest is
>  		 * sometimes careless and leaves this as 0, even though it's
>  		 * running at privilege level 1.  If so, we fix it here. */
> -		if ((lg->gdt[i].b & 0x00006000) == 0)
> -			lg->gdt[i].b |= (GUEST_PL << 13);
> +		if ((lg->arch.gdt[i].b & 0x00006000) == 0)
> +			lg->arch.gdt[i].b |= (GUEST_PL << 13);
>  
>  		/* Each descriptor has an "accessed" bit.  If we don't set it
>  		 * now, the CPU will try to set it when the Guest first loads
>  		 * that entry into a segment register.  But the GDT isn't
>  		 * writable by the Guest, so bad things can happen. */
> -		lg->gdt[i].b |= 0x00000100;
> +		lg->arch.gdt[i].b |= 0x00000100;

Does ia64 use segments?  I actually didn't replace this file for
lguest64.

>  	}
>  }
>  
> @@ -106,12 +106,12 @@ void setup_default_gdt_entries(struct lg
>  void setup_guest_gdt(struct lguest *lg)
>  {
>  	/* Start with full 0-4G segments... */
> -	lg->gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
> -	lg->gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
> +	lg->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
> +	lg->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
>  	/* ...except the Guest is allowed to use them, so set the privilege
>  	 * level appropriately in the flags. */
> -	lg->gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
> -	lg->gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
> +	lg->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
> +	lg->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
>  }
>  
>  /* Like the IDT, we never simply use the GDT the Guest gives us.  We set up the
> @@ -126,7 +126,7 @@ void copy_gdt_tls(const struct lguest *l
>  	unsigned int i;
>  
>  	for (i = GDT_ENTRY_TLS_MIN; i <= GDT_ENTRY_TLS_MAX; i++)
> -		gdt[i] = lg->gdt[i];
> +		gdt[i] = lg->arch.gdt[i];
>  }
>  
>  /* This is the full version */
> @@ -138,7 +138,7 @@ void copy_gdt(const struct lguest *lg, s
>  	 * replaced.  See ignored_gdt() above. */
>  	for (i = 0; i < GDT_ENTRIES; i++)
>  		if (!ignored_gdt(i))
> -			gdt[i] = lg->gdt[i];
> +			gdt[i] = lg->arch.gdt[i];
>  }
>  
>  /* This is where the Guest asks us to load a new GDT (LHCALL_LOAD_GDT). */
> @@ -146,12 +146,12 @@ void load_guest_gdt(struct lguest *lg, u
>  {
>  	/* We assume the Guest has the same number of GDT entries as the
>  	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
> -	if (num > ARRAY_SIZE(lg->gdt))
> +	if (num > ARRAY_SIZE(lg->arch.gdt))
>  		kill_guest(lg, "too many gdt entries %i", num);
>  
>  	/* We read the whole thing in, then fix it up. */
> -	lgread(lg, lg->gdt, table, num * sizeof(lg->gdt[0]));
> -	fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->gdt));
> +	lgread(lg, lg->arch.gdt, table, num * sizeof(lg->arch.gdt[0]));
> +	fixup_gdt_table(lg, 0, ARRAY_SIZE(lg->arch.gdt));
>  	/* Mark that the GDT changed so the core knows it has to copy it again,
>  	 * even if the Guest is run on the same CPU. */
>  	lg->changed |= CHANGED_GDT;
> @@ -159,7 +159,7 @@ void load_guest_gdt(struct lguest *lg, u
>  
>  void guest_load_tls(struct lguest *lg, unsigned long gtls)
>  {
> -	struct desc_struct *tls = &lg->gdt[GDT_ENTRY_TLS_MIN];
> +	struct desc_struct *tls = &lg->arch.gdt[GDT_ENTRY_TLS_MIN];
>  
>  	lgread(lg, tls, gtls, sizeof(*tls)*GDT_ENTRY_TLS_ENTRIES);
>  	fixup_gdt_table(lg, GDT_ENTRY_TLS_MIN, GDT_ENTRY_TLS_MAX+1);
> Index: linux-2.6.23-rc3/include/asm-i386/lguest.h
> ===================================================================
> --- /dev/null
> +++ linux-2.6.23-rc3/include/asm-i386/lguest.h
> @@ -0,0 +1,54 @@
> +#ifndef _I386_LGUEST_H
> +#define _I386_LGUEST_H
> +
> +#include <asm/desc.h>
> +
> +#define GDT_ENTRY_LGUEST_CS	10
> +#define GDT_ENTRY_LGUEST_DS	11
> +#define LGUEST_CS		(GDT_ENTRY_LGUEST_CS * 8)
> +#define LGUEST_DS		(GDT_ENTRY_LGUEST_DS * 8)
> +
> +struct lguest_regs
> +{
> +	/* Manually saved part. */
> +	unsigned long ebx, ecx, edx;
> +	unsigned long esi, edi, ebp;
> +	unsigned long gs;
> +	unsigned long eax;
> +	unsigned long fs, ds, es;
> +	unsigned long trapnum, errcode;
> +	/* Trap pushed part */
> +	unsigned long eip;
> +	unsigned long cs;
> +	unsigned long eflags;
> +	unsigned long esp;
> +	unsigned long ss;
> +};
> +
> +/* This is a guest-specific page (mapped ro) into the guest. */
> +struct lguest_ro_state
> +{
> +	/* Host information we need to restore when we switch back. */
> +	u32 host_cr3;
> +	struct Xgt_desc_struct host_idt_desc;
> +	struct Xgt_desc_struct host_gdt_desc;
> +	u32 host_sp;
> +
> +	/* Fields which are used when guest is running. */
> +	struct Xgt_desc_struct guest_idt_desc;
> +	struct Xgt_desc_struct guest_gdt_desc;
> +	struct i386_hw_tss guest_tss;
> +	struct desc_struct guest_idt[IDT_ENTRIES];
> +	struct desc_struct guest_gdt[GDT_ENTRIES];
> +};
> +
> +struct lguest_arch
> +{
> +	/* The GDT entries copied into lguest_ro_state when running. */
> +	struct desc_struct gdt[GDT_ENTRIES];
> +
> +	/* The IDT entries: some copied into lguest_ro_state when running. */
> +	struct desc_struct idt[IDT_ENTRIES];
> +};
> +
> +#endif


-- Steve





More information about the Lguest mailing list