[Lguest] [PATCH 2/2] Allow guest to specify syscall vector to use.

Rusty Russell rusty at rustcorp.com.au
Mon Aug 6 17:22:01 EST 2007


(Inspired by Ron Minnich's LGUEST_PLAN9_SYSCALL patch).

This patch allows Guests to specify what system call vector they want,
and we try to reserve it.  We only allow one non-Linux system call
vector, to try to avoid DoS on the Host.

Signed-off-by: Rusty Russell <rusty at rustcorp.com.au>

diff -r 0d6d7b88c3f8 drivers/lguest/core.c
--- a/drivers/lguest/core.c	Mon Aug 06 17:02:12 2007 +1000
+++ b/drivers/lguest/core.c	Mon Aug 06 17:14:38 2007 +1000
@@ -749,6 +749,7 @@ static int __init init(void)
 /* Cleaning up is just the same code, backwards.  With a little French. */
 static void __exit fini(void)
 {
+	free_interrupts();
 	lguest_device_remove();
 	free_pagetables();
 	unmap_switcher();
diff -r 0d6d7b88c3f8 drivers/lguest/hypercalls.c
--- a/drivers/lguest/hypercalls.c	Mon Aug 06 17:02:12 2007 +1000
+++ b/drivers/lguest/hypercalls.c	Mon Aug 06 17:15:00 2007 +1000
@@ -182,7 +182,7 @@ static void do_async_hcalls(struct lgues
  * Guest makes a hypercall, we end up here to set things up: */
 static void initialize(struct lguest *lg)
 {
-	u32 tsc_speed;
+	u32 tsc_speed, syscall_vec = 0;
 
 	/* You can't do anything until you're initialized.  The Guest knows the
 	 * rules, so we're unforgiving here. */
@@ -219,6 +219,8 @@ static void initialize(struct lguest *lg
 	 * the range of addresses into "struct lguest_data". */
 	if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
 	    || get_user(lg->noirq_end, &lg->lguest_data->noirq_end)
+	    /* A Plan 9 Guest tells it wants to use 0x40 for system calls. */
+	    || get_user(syscall_vec, &lg->lguest_data->syscall_vec)
 	    /* We tell the Guest that it can't use the top 4MB of virtual
 	     * addresses used by the Switcher. */
 	    || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
@@ -226,6 +228,9 @@ static void initialize(struct lguest *lg
 	    /* We also give the Guest a unique id, as used in lguest_net.c. */
 	    || put_user(lg->guestid, &lg->lguest_data->guestid))
 		kill_guest(lg, "bad guest page %p", lg->lguest_data);
+
+	/* The interrupt code might not like this system call vector. */
+	check_syscall_vector(lg, syscall_vec);
 
 	/* We write the current time into the Guest's data page once now. */
 	write_timestamp(lg);
diff -r 0d6d7b88c3f8 drivers/lguest/interrupts_and_traps.c
--- a/drivers/lguest/interrupts_and_traps.c	Mon Aug 06 17:02:12 2007 +1000
+++ b/drivers/lguest/interrupts_and_traps.c	Mon Aug 06 17:10:50 2007 +1000
@@ -12,7 +12,12 @@
  * them first, so we also have a way of "reflecting" them into the Guest as if
  * they had been delivered to it directly. :*/
 #include <linux/uaccess.h>
+#include <linux/interrupt.h>
 #include "lg.h"
+
+/* Did we reserve a system call vector via request_irq?  We let them only
+ * request one, to prevent denial of service. */
+static int reserved_vec;
 
 /* The address of the interrupt handler is split into two bits: */
 static unsigned long idt_address(u32 lo, u32 hi)
@@ -183,6 +188,51 @@ void maybe_do_interrupt(struct lguest *l
 	 * timer interrupt. */
 	write_timestamp(lg);
 }
+/*:*/
+
+/* Linux uses trap 128 for system calls.  Plan9 uses 64, and Ron Minnich sent
+ * me a patch, so we support that too.  It'd be a big step for lguest if half
+ * the Plan 9 user base were to start using it.
+ *
+ * Actually now I think of it, it's possible that Ron *is* half the Plan 9
+ * userbase.  Oh well. */
+static bool could_be_syscall(unsigned int num)
+{
+	return num == SYSCALL_VECTOR || (reserved_vec && num == reserved_vec);
+}
+
+static irqreturn_t unhandled(int irq, void *unused)
+{
+	/* This should never happen: we've reserved it. */
+	return IRQ_NONE;
+}
+
+void check_syscall_vector(struct lguest *lg, unsigned int vector)
+{
+	/* Normal Linux SYSCALL_VECTOR or already reserved? */
+	if (could_be_syscall(vector))
+		return;
+
+	mutex_lock(&lguest_lock);
+	if (!reserved_vec) {
+		/* Make sure that the Host doesn't try to use the Plan 9 system
+		 * call interrupt for a real device (it won't use the normal
+		 * Linux system call interrupt, so that case is hardcoded in
+		 * could_be_syscall). */
+		if (request_irq(vector, unhandled, 0, "lguest", NULL))
+			kill_guest(lg, "could not reserve syscall %u", vector);
+		else
+			reserved_vec = vector;
+	} else
+		kill_guest(lg, "syscall %u is not %u", vector, reserved_vec);
+	mutex_unlock(&lguest_lock);
+}
+
+void free_interrupts(void)
+{
+	if (reserved_vec)
+		free_irq(reserved_vec, NULL);
+}
 
 /*H:220 Now we've got the routines to deliver interrupts, delivering traps
  * like page fault is easy.  The only trick is that Intel decided that some
@@ -221,7 +271,7 @@ static int direct_trap(unsigned int num)
 {
 	/* Hardware interrupts don't go to the Guest at all (except system
 	 * call). */
-	if (num >= FIRST_EXTERNAL_VECTOR && num != SYSCALL_VECTOR)
+	if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num))
 		return 0;
 
 	/* The Host needs to see page faults (for shadow paging and to save the
diff -r 0d6d7b88c3f8 drivers/lguest/lg.h
--- a/drivers/lguest/lg.h	Mon Aug 06 17:02:12 2007 +1000
+++ b/drivers/lguest/lg.h	Mon Aug 06 17:14:18 2007 +1000
@@ -218,6 +218,8 @@ void copy_traps(const struct lguest *lg,
 		const unsigned long *def);
 void guest_set_clockevent(struct lguest *lg, unsigned long delta);
 void init_clockdev(struct lguest *lg);
+void check_syscall_vector(struct lguest *lg, unsigned int vector);
+void free_interrupts(void);
 
 /* segments.c: */
 void setup_default_gdt_entries(struct lguest_ro_state *state);
diff -r 0d6d7b88c3f8 drivers/lguest/lguest.c
--- a/drivers/lguest/lguest.c	Mon Aug 06 17:02:12 2007 +1000
+++ b/drivers/lguest/lguest.c	Mon Aug 06 17:12:02 2007 +1000
@@ -86,6 +86,7 @@ struct lguest_data lguest_data = {
 	.noirq_start = (u32)lguest_noirq_start,
 	.noirq_end = (u32)lguest_noirq_end,
 	.blocked_interrupts = { 1 }, /* Block timer interrupts */
+	.syscall_vec = SYSCALL_VECTOR,
 };
 struct lguest_device_desc *lguest_devices;
 static cycle_t clock_base;
diff -r 0d6d7b88c3f8 include/linux/lguest.h
--- a/include/linux/lguest.h	Mon Aug 06 17:02:12 2007 +1000
+++ b/include/linux/lguest.h	Mon Aug 06 17:02:17 2007 +1000
@@ -110,6 +110,9 @@ struct lguest_data
 /* Fields initialized by the Guest at boot: */
 	/* Instruction range to suppress interrupts even if enabled */
 	unsigned long noirq_start, noirq_end;
+
+	/* The vector to try to use for system calls (0x40 or 0x80). */
+	unsigned int syscall_vec;
 };
 extern struct lguest_data lguest_data;
 #endif /* __ASSEMBLY__ */





More information about the Lguest mailing list