[Lguest] [PATCH 1/1][V2] Least Recently Used pgdirs

Jason Yeh jasonyeh0908 at gmail.com
Mon Aug 20 04:08:26 EST 2007


hi all,

This is the update to the previous patch I send under the same title.
The main difference between the two versions are:
	1. flush_user_mapping() no longer always flush the current
	pgdir. It takes an a struct pgdir * parameter and flush the
	pgdir.

	2. The Least Recently Used list is updated only when the pgdir
	is about to be used.

	3. Formatting changes.

I also measured the time it takes to compile a kernel with "-j 4" flag
under the Guest. I am not sure if it is the best way to measure
the impact of the patch. If you have better idea, please let me know.
The average of five runs with the patch is:
	real    9m26.849s
	user    7m13.130s
	sys     2m11.330s

without the patch, the number looks like:
	real    9m46.572s
	user    7m33.140s
	sys     2m11.260s

Suggestions/comments are welcomed.

Jason

signed-off-by: Jason Yeh <jasonyeh0908 at gmail.com>
---
  core.c        |    2
  lg.h          |   12 ++-
  page_tables.c |  193 
+++++++++++++++++++++++++++++++++++-----------------------
  3 files changed, 129 insertions(+), 78 deletions(-)
	
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 4a315f0..18a9bf1 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -474,7 +474,7 @@ static void run_guest_once(struct lguest *lg, struct 
lguest_pages *pages)
  		      * 0-th argument above, ie "a").  %ebx contains the
  		      * physical address of the Guest's top-level page
  		      * directory. */
-		     : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
+			: "0"(pages), "1"(__pa(lg->pgdir_cur->pgdir))
  		     /* We tell gcc that all these registers could change,
  		      * which means we don't have to save and restore them in
  		      * the Switcher. */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 64f0abe..eab6a13 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -7,6 +7,8 @@
  #define GDT_ENTRY_LGUEST_DS	11
  #define LGUEST_CS		(GDT_ENTRY_LGUEST_CS * 8)
  #define LGUEST_DS		(GDT_ENTRY_LGUEST_DS * 8)
+/* Number of PGDIR we keep for each guest */
+#define PGDIR_MAX 4

  #ifndef __ASSEMBLY__
  #include <linux/types.h>
@@ -97,6 +99,7 @@ struct pgdir
  {
  	unsigned long cr3;
  	spgd_t *pgdir;
+	struct list_head list;
  };

  /* This is a guest-specific page (mapped ro) into the guest. */
@@ -159,9 +162,12 @@ struct lguest
  	int changed;
  	struct lguest_pages *last_pages;

-	/* We keep a small number of these. */
-	u32 pgdidx;
-	struct pgdir pgdirs[4];
+	/* pgdir LRU
+	 * Newest pgdir is added to the head of the list.
+	 * pgdir_list is the tail of the list */
+	struct list_head pgdir_list;
+	struct pgdir *pgdir_cur;
+	unsigned int num_pgdirs;

  	/* Cached wakeup: we hold a reference to this task. */
  	struct task_struct *wake;
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index b7a924a..3dc991d 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -77,9 +77,10 @@ static unsigned vaddr_to_pgd_index(unsigned long vaddr)
   *
   * spgd_addr() takes the virtual address and returns a pointer to the 
top-level
   * page directory entry for that address.  Since we keep track of 
several page
- * tables, the "i" argument tells us which one we're interested in (it's
+ * tables, the pgdir argument tells us which one we're interested in (it's
   * usually the current one). */
-static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
+static spgd_t *spgd_addr(struct lguest *lg, struct pgdir *pgdir,
+	unsigned long vaddr)
  {
  	unsigned int index = vaddr_to_pgd_index(vaddr);

@@ -89,7 +90,7 @@ static spgd_t *spgd_addr(struct lguest *lg, u32 i, 
unsigned long vaddr)
  		index = 0;
  	}
  	/* Return a pointer index'th pgd entry for the i'th page table. */
-	return &lg->pgdirs[i].pgdir[index];
+	return &pgdir->pgdir[index];
  }

  /* This routine then takes the PGD entry given above, which contains the
@@ -108,7 +109,7 @@ static spte_t *spte_addr(struct lguest *lg, spgd_t 
spgd, unsigned long vaddr)
  static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
  {
  	unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
-	return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t);
+	return lg->pgdir_cur->cr3 + index * sizeof(gpgd_t);
  }

  static unsigned long gpte_addr(struct lguest *lg,
@@ -224,7 +225,7 @@ int demand_page(struct lguest *lg, unsigned long 
vaddr, int errcode)
  		return 0;

  	/* Now look at the matching shadow entry. */
-	spgd = spgd_addr(lg, lg->pgdidx, vaddr);
+	spgd = spgd_addr(lg, lg->pgdir_cur, vaddr);
  	if (!(spgd->flags & _PAGE_PRESENT)) {
  		/* No shadow entry: allocate a new shadow PTE page. */
  		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
@@ -309,7 +310,7 @@ static int page_writable(struct lguest *lg, unsigned 
long vaddr)
  	unsigned long flags;

  	/* Look at the top level entry: is it present? */
-	spgd = spgd_addr(lg, lg->pgdidx, vaddr);
+	spgd = spgd_addr(lg, lg->pgdir_cur, vaddr);
  	if (!(spgd->flags & _PAGE_PRESENT))
  		return 0;

@@ -352,12 +353,12 @@ static void release_pgd(struct lguest *lg, spgd_t 
*spgd)
   *
   * We saw flush_user_mappings() called when we re-used a top-level 
pgdir page.
   * It simply releases every PTE page from 0 up to the kernel address. */
-static void flush_user_mappings(struct lguest *lg, int idx)
+static void flush_user_mappings(struct lguest *lg, struct pgdir *pgdir)
  {
  	unsigned int i;
  	/* Release every pgd entry up to the kernel's address. */
  	for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++)
-		release_pgd(lg, lg->pgdirs[idx].pgdir + i);
+		release_pgd(lg, pgdir->pgdir + i);
  }

  /* The Guest also has a hypercall to do this manually: it's used when 
a large
@@ -365,51 +366,76 @@ static void flush_user_mappings(struct lguest *lg, 
int idx)
  void guest_pagetable_flush_user(struct lguest *lg)
  {
  	/* Drop the userspace part of the current page table. */
-	flush_user_mappings(lg, lg->pgdidx);
+	flush_user_mappings(lg, lg->pgdir_cur);
  }
  /*:*/

  /* We keep several page tables.  This is a simple routine to find the page
   * table (if any) corresponding to this top-level address the Guest 
has given
   * us. */
-static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
+static struct pgdir * find_pgdir_from_list(struct lguest *lg, unsigned 
long pgtable)
  {
-	unsigned int i;
-	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		if (lg->pgdirs[i].cr3 == pgtable)
-			break;
-	return i;
+	struct pgdir *p;
+	list_for_each_entry(p, &lg->pgdir_list, list) {
+		if (p->cr3 == pgtable)
+			return p;
+	}
+
+	return NULL;
+}
+
+/* Helper to allocate new pgdir */
+struct pgdir * alloc_newpgdir(void)
+{
+	struct pgdir *p;
+
+	p = kzalloc(sizeof(struct pgdir), GFP_KERNEL);
+	if (!p)
+		return NULL;
+
+	p->pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
+	if (!p->pgdir) {
+		kfree(p);
+		return NULL;
+	}
+
+	return p;
  }

  /*H:435 And this is us, creating the new page directory.  If we really do
   * allocate a new one (and so the kernel parts are not there), we set
   * blank_pgdir. */
-static unsigned int new_pgdir(struct lguest *lg,
+static struct pgdir * new_pgdir(struct lguest *lg,
  			      unsigned long cr3,
  			      int *blank_pgdir)
  {
-	unsigned int next;
-
-	/* We pick one entry at random to throw out.  Choosing the Least
-	 * Recently Used might be better, but this is easy. */
-	next = random32() % ARRAY_SIZE(lg->pgdirs);
-	/* If it's never been allocated at all before, try now. */
-	if (!lg->pgdirs[next].pgdir) {
-		lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL);
-		/* If the allocation fails, just keep using the one we have */
-		if (!lg->pgdirs[next].pgdir)
-			next = lg->pgdidx;
-		else
-			/* This is a blank page, so there are no kernel
-			 * mappings: caller must map the stack! */
-			*blank_pgdir = 1;
+	struct pgdir *newpgdir;
+
+	/* Check if we reach the maximum number of pgdirs we can hold.
+	 * Reuse the Least Recently Used one if we have. */
+	if (lg->num_pgdirs > PGDIR_MAX)  {
+		newpgdir= list_entry(lg->pgdir_list.prev, struct pgdir, list);
+		/* If we reuse a pgdir, update its position to be the Most
+		 * Recently Used. */
+		list_move(&newpgdir->list, &lg->pgdir_list);
+	} else {
+		/* If it's never been allocated at all before, try now. */
+		newpgdir = alloc_newpgdir();
+		if (!newpgdir)
+			kill_guest(lg, "failed to get zeroed page");
+
+		list_add(&newpgdir->list, &lg->pgdir_list);
+		lg->num_pgdirs++;
+		/* This is a blank page, so there are no kernel
+		 * mappings: caller must map the stack! */
+		*blank_pgdir = 1;
  	}
+
  	/* Record which Guest toplevel this shadows. */
-	lg->pgdirs[next].cr3 = cr3;
+	newpgdir->cr3 = cr3;
  	/* Release all the non-kernel mappings. */
-	flush_user_mappings(lg, next);
-
-	return next;
+	flush_user_mappings(lg, newpgdir);
+	return newpgdir;
  }

  /*H:430 (iv) Switching page tables
@@ -418,33 +444,39 @@ static unsigned int new_pgdir(struct lguest *lg,
   * top-level pgdir).  This happens on almost every context switch. */
  void guest_new_pagetable(struct lguest *lg, unsigned long pgtable)
  {
-	int newpgdir, repin = 0;
+	int repin = 0;
+	struct pgdir * newpgdir;

  	/* Look to see if we have this one already. */
-	newpgdir = find_pgdir(lg, pgtable);
-	/* If not, we allocate or mug an existing one: if it's a fresh one,
-	 * repin gets set to 1. */
-	if (newpgdir == ARRAY_SIZE(lg->pgdirs))
+	newpgdir = find_pgdir_from_list(lg, pgtable);
+	if (!newpgdir) {
+		/* If not, we allocate or mug an existing one: if it's a fresh one,
+		 * repin gets set to 1. */
  		newpgdir = new_pgdir(lg, pgtable, &repin);
-	/* Change the current pgd index to the new one. */
-	lg->pgdidx = newpgdir;
-	/* If it was completely blank, we map in the Guest kernel stack */
-	if (repin)
-		pin_stack_pages(lg);
+
+		/* If it was completely blank, we map in the Guest kernel stack */
+		if (repin)
+			pin_stack_pages(lg);
+	}
+
+	/* Change the current pgd pointer to the new one. */
+	lg->pgdir_cur = newpgdir;
  }

  /*H:470 Finally, a routine which throws away everything: all PGD 
entries in all
   * the shadow page tables.  This is used when we destroy the Guest. */
  static void release_all_pagetables(struct lguest *lg)
  {
-	unsigned int i, j;
+	unsigned int i;
+	struct pgdir *p;

  	/* Every shadow pagetable this Guest has */
-	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		if (lg->pgdirs[i].pgdir)
-			/* Every PGD entry except the Switcher at the top */
-			for (j = 0; j < SWITCHER_PGD_INDEX; j++)
-				release_pgd(lg, lg->pgdirs[i].pgdir + j);
+	list_for_each_entry(p, &lg->pgdir_list, list) {
+		/* Every PGD entry except the Switcher at the top */
+		if (p->pgdir)
+			for (i = 0; i < SWITCHER_PGD_INDEX; i++)
+				release_pgd(lg, p->pgdir + i);
+	}
  }

  /* We also throw away everything when a Guest tells us it's changed a 
kernel
@@ -471,11 +503,11 @@ void guest_pagetable_clear_all(struct lguest *lg)
   * _PAGE_ACCESSED then we can put a read-only PTE entry in 
immediately, and if
   * they set _PAGE_DIRTY then we can put a writable PTE entry in 
immediately.
   */
-static void do_set_pte(struct lguest *lg, int idx,
+static void do_set_pte(struct lguest *lg, struct pgdir *pgdir,
  		       unsigned long vaddr, gpte_t gpte)
  {
  	/* Look up the matching shadow page directot entry. */
-	spgd_t *spgd = spgd_addr(lg, idx, vaddr);
+	spgd_t *spgd = spgd_addr(lg, pgdir, vaddr);

  	/* If the top level isn't present, there's no entry to update. */
  	if (spgd->flags & _PAGE_PRESENT) {
@@ -508,19 +540,20 @@ static void do_set_pte(struct lguest *lg, int idx,
  void guest_set_pte(struct lguest *lg,
  		   unsigned long cr3, unsigned long vaddr, gpte_t gpte)
  {
+	struct pgdir *p;
+
  	/* Kernel mappings must be changed on all top levels.  Slow, but
  	 * doesn't happen often. */
  	if (vaddr >= lg->page_offset) {
-		unsigned int i;
-		for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-			if (lg->pgdirs[i].pgdir)
-				do_set_pte(lg, i, vaddr, gpte);
+		list_for_each_entry(p, &lg->pgdir_list, list) {
+			if (p->pgdir)
+				do_set_pte(lg, p, vaddr, gpte);
+		}
  	} else {
  		/* Is this page table one we have a shadow for? */
-		int pgdir = find_pgdir(lg, cr3);
-		if (pgdir != ARRAY_SIZE(lg->pgdirs))
-			/* If so, do the update. */
-			do_set_pte(lg, pgdir, vaddr, gpte);
+		p = find_pgdir_from_list(lg, cr3);
+		/* If so, do the update. */
+		do_set_pte(lg, p, vaddr, gpte);
  	}
  }

@@ -540,7 +573,7 @@ void guest_set_pte(struct lguest *lg,
   */
  void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
  {
-	int pgdir;
+	struct pgdir * pgdir;

  	/* The kernel seems to try to initialize this early on: we ignore its
  	 * attempts to map over the Switcher. */
@@ -548,10 +581,10 @@ void guest_set_pmd(struct lguest *lg, unsigned 
long cr3, u32 idx)
  		return;

  	/* If they're talking about a page table we have a shadow for... */
-	pgdir = find_pgdir(lg, cr3);
-	if (pgdir < ARRAY_SIZE(lg->pgdirs))
+	pgdir = find_pgdir_from_list(lg, cr3);
+	if (pgdir)
  		/* ... throw it away. */
-		release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
+		release_pgd(lg, pgdir->pgdir + idx);
  }

  /*H:500 (vii) Setting up the page tables initially.
@@ -560,31 +593,43 @@ void guest_set_pmd(struct lguest *lg, unsigned 
long cr3, u32 idx)
   * its first page table is.  We set some things up here: */
  int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
  {
+	struct pgdir * newpgdir;
+
  	/* In flush_user_mappings() we loop from 0 to
  	 * "vaddr_to_pgd_index(lg->page_offset)".  This assumes it won't hit
  	 * the Switcher mappings, so check that now. */
  	if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
  		return -EINVAL;
-	/* We start on the first shadow page table, and give it a blank PGD
-	 * page. */
-	lg->pgdidx = 0;
-	lg->pgdirs[lg->pgdidx].cr3 = pgtable;
-	lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
-	if (!lg->pgdirs[lg->pgdidx].pgdir)
+
+	INIT_LIST_HEAD(&lg->pgdir_list);
+
+	newpgdir = alloc_newpgdir();
+	if (!newpgdir)
  		return -ENOMEM;
+
+	lg->pgdir_cur = newpgdir;
+	newpgdir->cr3 = pgtable;
+	list_add(&newpgdir->list, &lg->pgdir_list);
+	lg->num_pgdirs = 1;
+
  	return 0;
  }

  /* When a Guest dies, our cleanup is fairly simple. */
  void free_guest_pagetable(struct lguest *lg)
  {
-	unsigned int i;
+	struct pgdir *p;

  	/* Throw away all page table pages. */
  	release_all_pagetables(lg);
  	/* Now free the top levels: free_page() can handle 0 just fine. */
-	for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-		free_page((long)lg->pgdirs[i].pgdir);
+	while (!list_empty(&lg->pgdir_list)) {
+		p = list_first_entry(&lg->pgdir_list, struct pgdir, list);
+		free_page((long)p->pgdir);
+		list_del(&p->list);
+		kfree(p);
+		lg->num_pgdirs--;
+	}
  }

  /*H:480 (vi) Mapping the Switcher when the Guest is about to run.
@@ -602,7 +647,7 @@ void map_switcher_in_guest(struct lguest *lg, struct 
lguest_pages *pages)
  	 * page for this CPU (with appropriate flags). */
  	switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT;
  	switcher_pgd.flags = _PAGE_KERNEL;
-	lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
+	lg->pgdir_cur->pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;

  	/* We also change the Switcher PTE page.  When we're running the Guest,
  	 * we want the Guest's "regs" page to appear where the first Switcher



More information about the Lguest mailing list