[Lguest] [PATCH 1/1] Least Recently Used pgdirs

Jason Yeh jasonyeh0908 at gmail.com
Tue Aug 14 15:06:43 EST 2007


Hi all,

After seeing the code comment "Choosing the Least Recently Used might be 
better, but this is easy" in function new_pgdir, I decided to give it a 
try.

This is a naive implementation of Least Recently Used list replacing 
pgdirs array of struct lg.  pgdirs are inserted into the list, and moved 
in the way that the least recently used pgdir is at the tail of the 
list. pgdir_cur replaces pgdidx pointing to the current pgdir. I have 
not done any measurements on the code yet other than verify that it 
boots the image. I would like to get some feedbacks and making more 
changes before doing any benchmarks. All comments, criticism, and 
suggestions are welcomed.

The patch is made against Linus git tree. If there's a need to make the 
patch agains the lguest mercurial server, please let me know.


Signed-Off-By: Jason Yeh<jasonyeh0908 at gmail.com>
---
core.c              |    2
lg.h                  |   11 ++-
page_tables.c |  188 
+++++++++++++++++++++++++++++++++++++---------------------
3 files changed, 130 insertions(+), 71 deletions(-)

diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 4a315f0..18a9bf1 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -474,7 +474,7 @@ static void run_guest_once(struct lguest *lg, struct 
lguest_pages *pages)
              * 0-th argument above, ie "a").  %ebx contains the
              * physical address of the Guest's top-level page
              * directory. */
-             : "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir))
+            : "0"(pages), "1"(__pa(lg->pgdir_cur->pgdir))
             /* We tell gcc that all these registers could change,
              * which means we don't have to save and restore them in
              * the Switcher. */
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 64f0abe..75ac222 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -7,6 +7,8 @@
#define GDT_ENTRY_LGUEST_DS    11
#define LGUEST_CS        (GDT_ENTRY_LGUEST_CS * 8)
#define LGUEST_DS        (GDT_ENTRY_LGUEST_DS * 8)
+/* Number of PGDIR we keep for each guest */
+#define PGDIR_MAX 4

#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -97,6 +99,7 @@ struct pgdir
{
    unsigned long cr3;
    spgd_t *pgdir;
+    struct list_head list;       };

/* This is a guest-specific page (mapped ro) into the guest. */
@@ -159,9 +162,11 @@ struct lguest
    int changed;
    struct lguest_pages *last_pages;

-    /* We keep a small number of these. */
-    u32 pgdidx;
-    struct pgdir pgdirs[4];
+    /* pgdir LRU
+     * Newest pgdir is added to the head of the list.
+     * pgdir_list is the tail of the list */
+    struct list_head pgdir_list;
+    struct pgdir *pgdir_cur;

    /* Cached wakeup: we hold a reference to this task. */
    struct task_struct *wake;
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index b7a924a..6909950 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -15,6 +15,7 @@
#include <asm/tlbflush.h>
#include "lg.h"

+
/*M:008 We hold reference to pages, which prevents them from being swapped.
 * It'd be nice to have a callback in the "struct mm_struct" when Linux 
wants
 * to swap out.  If we had this, and a shrinker callback to trim PTE 
pages, we
@@ -77,9 +78,10 @@ static unsigned vaddr_to_pgd_index(unsigned long vaddr)
 *
 * spgd_addr() takes the virtual address and returns a pointer to the 
top-level
 * page directory entry for that address.  Since we keep track of 
several page
- * tables, the "i" argument tells us which one we're interested in (it's
+ * tables, the pgdir argument tells us which one we're interested in (it's
 * usually the current one). */
-static spgd_t *spgd_addr(struct lguest *lg, u32 i, unsigned long vaddr)
+static spgd_t *spgd_addr(struct lguest *lg, struct pgdir *pgdir,
+    unsigned long vaddr)
{
    unsigned int index = vaddr_to_pgd_index(vaddr);

@@ -88,8 +90,9 @@ static spgd_t *spgd_addr(struct lguest *lg, u32 i, 
unsigned long vaddr)
        kill_guest(lg, "attempt to access switcher pages");
        index = 0;
    }
+       /* Return a pointer index'th pgd entry for the i'th page table. */
-    return &lg->pgdirs[i].pgdir[index];
+    return &pgdir->pgdir[index];
}

/* This routine then takes the PGD entry given above, which contains the
@@ -108,7 +111,8 @@ static spte_t *spte_addr(struct lguest *lg, spgd_t 
spgd, unsigned long vaddr)
static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr)
{
    unsigned int index = vaddr >> (PAGE_SHIFT + PTES_PER_PAGE_SHIFT);
-    return lg->pgdirs[lg->pgdidx].cr3 + index * sizeof(gpgd_t);
+
+    return lg->pgdir_cur->cr3 + index * sizeof(gpgd_t);
}

static unsigned long gpte_addr(struct lguest *lg,
@@ -224,7 +228,7 @@ int demand_page(struct lguest *lg, unsigned long 
vaddr, int errcode)
        return 0;

    /* Now look at the matching shadow entry. */
-    spgd = spgd_addr(lg, lg->pgdidx, vaddr);
+    spgd = spgd_addr(lg, lg->pgdir_cur, vaddr);
    if (!(spgd->flags & _PAGE_PRESENT)) {
        /* No shadow entry: allocate a new shadow PTE page. */
        unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
@@ -309,7 +313,7 @@ static int page_writable(struct lguest *lg, unsigned 
long vaddr)
    unsigned long flags;

    /* Look at the top level entry: is it present? */
-    spgd = spgd_addr(lg, lg->pgdidx, vaddr);
+    spgd = spgd_addr(lg, lg->pgdir_cur, vaddr);
    if (!(spgd->flags & _PAGE_PRESENT))
        return 0;

@@ -357,7 +361,7 @@ static void flush_user_mappings(struct lguest *lg, 
int idx)
    unsigned int i;
    /* Release every pgd entry up to the kernel's address. */
    for (i = 0; i < vaddr_to_pgd_index(lg->page_offset); i++)
-        release_pgd(lg, lg->pgdirs[idx].pgdir + i);
+        release_pgd(lg, lg->pgdir_cur->pgdir + i);   }

/* The Guest also has a hypercall to do this manually: it's used when a 
large
@@ -365,51 +369,68 @@ static void flush_user_mappings(struct lguest *lg, 
int idx)
void guest_pagetable_flush_user(struct lguest *lg)
{
    /* Drop the userspace part of the current page table. */
-    flush_user_mappings(lg, lg->pgdidx);
+    flush_user_mappings(lg, 0);
}
/*:*/

/* We keep several page tables.  This is a simple routine to find the page
 * table (if any) corresponding to this top-level address the Guest has 
given
 * us. */
-static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
+static struct pgdir * find_pgdir_from_list(struct lguest *lg, unsigned 
long pgtable)
{
-    unsigned int i;
-    for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-        if (lg->pgdirs[i].cr3 == pgtable)
+    struct pgdir *p, *rpgdir = NULL;
+   +    list_for_each_entry(p, &lg->pgdir_list, list) {
+        if (p->cr3 == pgtable) {
+            rpgdir  = p;
+            list_move(&rpgdir->list, &lg->pgdir_list);
            break;
-    return i;
+        }
+    }
+    return rpgdir;
}

+
/*H:435 And this is us, creating the new page directory.  If we really do
 * allocate a new one (and so the kernel parts are not there), we set
 * blank_pgdir. */
-static unsigned int new_pgdir(struct lguest *lg,
+static struct pgdir * new_pgdir(struct lguest *lg,
                  unsigned long cr3,
                  int *blank_pgdir)
{
    unsigned int next;
+    struct pgdir *newpgdir, *tmp;

-    /* We pick one entry at random to throw out.  Choosing the Least
-     * Recently Used might be better, but this is easy. */
-    next = random32() % ARRAY_SIZE(lg->pgdirs);
-    /* If it's never been allocated at all before, try now. */
-    if (!lg->pgdirs[next].pgdir) {
-        lg->pgdirs[next].pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL);
-        /* If the allocation fails, just keep using the one we have */
-        if (!lg->pgdirs[next].pgdir)
-            next = lg->pgdidx;
-        else
-            /* This is a blank page, so there are no kernel
-             * mappings: caller must map the stack! */
-            *blank_pgdir = 1;
+    /* Check if we reach the maximum number of pgdirs we can hold.
+     * Reuse the Least Recently Used one if we have. */
+    if (lg->pgdir_list_size > PGDIR_MAX)  {
+        newpgdir= list_entry(lg->pgdir_list.prev, struct pgdir, list);
+        list_move(&newpgdir->list, &lg->pgdir_list);
+    } else {
+        /* If it's never been allocated at all before, try now. */
+        newpgdir = kmalloc(sizeof(struct pgdir), GFP_KERNEL);
+        if (!newpgdir)
+            goto out;
+
+        newpgdir->pgdir = (spgd_t *)get_zeroed_page(GFP_KERNEL);
+        if (!newpgdir)
+            kill_guest(lg, "failed to get zeroed page");
+   +        list_add(&newpgdir->list, &lg->pgdir_list);
+        lg->pgdir_list_size++;
+        /* This is a blank page, so there are no kernel
+         * mappings: caller must map the stack! */
+        *blank_pgdir = 1;
    }
+
+    /* Change the current pgd pointer to the new one. */
+    lg->pgdir_cur = newpgdir;
    /* Record which Guest toplevel this shadows. */
-    lg->pgdirs[next].cr3 = cr3;
+    lg->pgdir_cur->cr3 = cr3;
    /* Release all the non-kernel mappings. */
-    flush_user_mappings(lg, next);
-
-    return next;
+    flush_user_mappings(lg, 0);
+out:
+    return newpgdir;
}

/*H:430 (iv) Switching page tables
@@ -418,19 +439,23 @@ static unsigned int new_pgdir(struct lguest *lg,
 * top-level pgdir).  This happens on almost every context switch. */
void guest_new_pagetable(struct lguest *lg, unsigned long pgtable)
{
-    int newpgdir, repin = 0;
+    int repin = 0;
+    struct pgdir * newpgdir;   
    /* Look to see if we have this one already. */
-    newpgdir = find_pgdir(lg, pgtable);
-    /* If not, we allocate or mug an existing one: if it's a fresh one,
-     * repin gets set to 1. */
-    if (newpgdir == ARRAY_SIZE(lg->pgdirs))
+    newpgdir = find_pgdir_from_list(lg, pgtable);
+    if (!newpgdir) {
+        /* If not, we allocate or mug an existing one: if it's a fresh 
one,
+         * repin gets set to 1. */
        newpgdir = new_pgdir(lg, pgtable, &repin);
-    /* Change the current pgd index to the new one. */
-    lg->pgdidx = newpgdir;
-    /* If it was completely blank, we map in the Guest kernel stack */
-    if (repin)
-        pin_stack_pages(lg);
+       +        /* If it was completely blank, we map in the Guest 
kernel stack */
+        if (repin)
+            pin_stack_pages(lg);
+    }
+
+    /* Change the current pgd pointer to the new one. */
+    lg->pgdir_cur = newpgdir;
}

/*H:470 Finally, a routine which throws away everything: all PGD entries 
in all
@@ -438,13 +463,17 @@ void guest_new_pagetable(struct lguest *lg, 
unsigned long pgtable)
static void release_all_pagetables(struct lguest *lg)
{
    unsigned int i, j;
+    struct pgdir *tmp;
+    struct list_head *pos, *q;  
    /* Every shadow pagetable this Guest has */
-    for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-        if (lg->pgdirs[i].pgdir)
+    list_for_each_safe (pos, q, &lg->pgdir_list) {
+        tmp = list_entry (pos, struct pgdir, list);       +        if 
(tmp->pgdir)
            /* Every PGD entry except the Switcher at the top */
            for (j = 0; j < SWITCHER_PGD_INDEX; j++)
-                release_pgd(lg, lg->pgdirs[i].pgdir + j);
+                release_pgd(lg, tmp->pgdir + j);
+    }
}

/* We also throw away everything when a Guest tells us it's changed a 
kernel
@@ -471,11 +500,11 @@ void guest_pagetable_clear_all(struct lguest *lg)
 * _PAGE_ACCESSED then we can put a read-only PTE entry in immediately, 
and if
 * they set _PAGE_DIRTY then we can put a writable PTE entry in 
immediately.
 */
-static void do_set_pte(struct lguest *lg, int idx,
+static void do_set_pte_my(struct lguest *lg, struct pgdir *pgdir,
               unsigned long vaddr, gpte_t gpte)
{
    /* Look up the matching shadow page directot entry. */
-    spgd_t *spgd = spgd_addr(lg, idx, vaddr);
+    spgd_t *spgd = spgd_addr(lg, pgdir, vaddr);

    /* If the top level isn't present, there's no entry to update. */
    if (spgd->flags & _PAGE_PRESENT) {
@@ -508,20 +537,24 @@ static void do_set_pte(struct lguest *lg, int idx,
void guest_set_pte(struct lguest *lg,
           unsigned long cr3, unsigned long vaddr, gpte_t gpte)
{
+    struct pgdir *p;
+       /* Kernel mappings must be changed on all top levels.  Slow, but
     * doesn't happen often. */
    if (vaddr >= lg->page_offset) {
-        unsigned int i;
-        for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-            if (lg->pgdirs[i].pgdir)
-                do_set_pte(lg, i, vaddr, gpte);
+        list_for_each_entry(p, &lg->pgdir_list, list) {
+            if (p->pgdir)
+                do_set_pte_my(lg, p, vaddr, gpte);
+        }
    } else {
        /* Is this page table one we have a shadow for? */
-        int pgdir = find_pgdir(lg, cr3);
-        if (pgdir != ARRAY_SIZE(lg->pgdirs))
-            /* If so, do the update. */
-            do_set_pte(lg, pgdir, vaddr, gpte);
+        p = find_pgdir_from_list(lg, cr3);
+        /* If so, do the update. */
+        do_set_pte_my(lg, p, vaddr, gpte);
+
    }
+
+
}

/*H:400
@@ -540,7 +573,7 @@ void guest_set_pte(struct lguest *lg,
 */
void guest_set_pmd(struct lguest *lg, unsigned long cr3, u32 idx)
{
-    int pgdir;
+    struct pgdir * pgdir;

    /* The kernel seems to try to initialize this early on: we ignore its
     * attempts to map over the Switcher. */
@@ -548,10 +581,11 @@ void guest_set_pmd(struct lguest *lg, unsigned 
long cr3, u32 idx)
        return;

    /* If they're talking about a page table we have a shadow for... */
-    pgdir = find_pgdir(lg, cr3);
-    if (pgdir < ARRAY_SIZE(lg->pgdirs))
+    pgdir = find_pgdir_from_list(lg, cr3);
+    if (pgdir)
        /* ... throw it away. */
-        release_pgd(lg, lg->pgdirs[pgdir].pgdir + idx);
+        release_pgd(lg, pgdir->pgdir + idx);
+
}

/*H:500 (vii) Setting up the page tables initially.
@@ -560,18 +594,30 @@ void guest_set_pmd(struct lguest *lg, unsigned 
long cr3, u32 idx)
 * its first page table is.  We set some things up here: */
int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
{
+    struct pgdir * newpgdir;
+
    /* In flush_user_mappings() we loop from 0 to
     * "vaddr_to_pgd_index(lg->page_offset)".  This assumes it won't hit
     * the Switcher mappings, so check that now. */
    if (vaddr_to_pgd_index(lg->page_offset) >= SWITCHER_PGD_INDEX)
        return -EINVAL;
-    /* We start on the first shadow page table, and give it a blank PGD
-     * page. */
-    lg->pgdidx = 0;
-    lg->pgdirs[lg->pgdidx].cr3 = pgtable;
-    lg->pgdirs[lg->pgdidx].pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
-    if (!lg->pgdirs[lg->pgdidx].pgdir)
+
+    INIT_LIST_HEAD(&lg->pgdir_list);
+
+    newpgdir = kmalloc(sizeof(struct pgdir), GFP_KERNEL);
+    if (!newpgdir) {
        return -ENOMEM;
+    }
+
+    lg->pgdir_cur = newpgdir;
+    newpgdir->cr3 = pgtable;
+    newpgdir->pgdir = (spgd_t*)get_zeroed_page(GFP_KERNEL);
+    if (!newpgdir->pgdir)
+        return -ENOMEM;
+
+    list_add(&newpgdir->list, &lg->pgdir_list);
+    lg->pgdir_list_size = 1;
+
    return 0;
}

@@ -579,12 +625,19 @@ int init_guest_pagetable(struct lguest *lg, 
unsigned long pgtable)
void free_guest_pagetable(struct lguest *lg)
{
    unsigned int i;
+    struct list_head *pos, *q;   +    struct pgdir *tmp;

    /* Throw away all page table pages. */
    release_all_pagetables(lg);
    /* Now free the top levels: free_page() can handle 0 just fine. */
-    for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
-        free_page((long)lg->pgdirs[i].pgdir);
+    list_for_each_safe (pos, q, &lg->pgdir_list) {
+        tmp = list_entry (pos, struct pgdir, list);       +        
free_page((long)tmp->pgdir);
+        list_del(&tmp->list);
+        kfree(tmp);
+        lg->pgdir_list_size--;
+    }
}

/*H:480 (vi) Mapping the Switcher when the Guest is about to run.
@@ -602,7 +655,8 @@ void map_switcher_in_guest(struct lguest *lg, struct 
lguest_pages *pages)
     * page for this CPU (with appropriate flags). */
    switcher_pgd.pfn = __pa(switcher_pte_page) >> PAGE_SHIFT;
    switcher_pgd.flags = _PAGE_KERNEL;
-    lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
+    lg->pgdir_cur->pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
+

    /* We also change the Switcher PTE page.  When we're running the Guest,
     * we want the Guest's "regs" page to appear where the first Switcher




More information about the Lguest mailing list