Improved ppc64 hugepage patch

Tue Jul 8 14:45:56 EST 2003

Hi David,

> The patch below is an improved version of the hugepage support for
> ppc64.  As well as fixing a couple of bugs it removes some
> restrictions on hugepage mappings:

Nice work, we've been testing this in the lab. Here is NUMA local memory
allocation for largepage, its basically the same as wli did for x86.

With this patch we are able to show higher bandwidth to local nodes
vs remote ones on a POWER4 box.

Anton

--- ppc64-2.5//arch/ppc64/mm/hugetlbpage.c~	2003-07-08 02:31:04.000000000 +0000
+++ ppc64-2.5//arch/ppc64/mm/hugetlbpage.c	2003-07-08 02:48:26.000000000 +0000
@@ -37,7 +37,40 @@

 static int htlbpage_free; /* = 0 */
 static int htlbpage_total; /* = 0 */
-static LIST_HEAD(htlbpage_freelist);
+static struct list_head hugepage_freelists[MAX_NUMNODES];
+
+static void enqueue_huge_page(struct page *page)
+{
+	list_add(&page->list,
+		&hugepage_freelists[page_zone(page)->zone_pgdat->node_id]);
+}
+
+static struct page *dequeue_huge_page(void)
+{
+	int nid = numa_node_id();
+	struct page *page = NULL;
+
+	if (list_empty(&hugepage_freelists[nid])) {
+		for (nid = 0; nid < MAX_NUMNODES; ++nid)
+			if (!list_empty(&hugepage_freelists[nid]))
+				break;
+	}
+	/* XXX do we need nid >= 0 */
+	if (nid >= 0 && nid < MAX_NUMNODES && !list_empty(&hugepage_freelists[nid])) {
+		page = list_entry(hugepage_freelists[nid].next, struct page, list);
+		list_del(&page->list);
+	}
+	return page;
+}
+
+static struct page *alloc_fresh_huge_page(void)
+{
+	static int nid = 0;
+	struct page *page;
+	page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER);
+	nid = (nid + 1) % numnodes;
+	return page;
+}

 /* HugePTE layout:
  *
@@ -103,13 +136,12 @@
 	struct page *page;

 	spin_lock(&htlbpage_lock);
-	if (list_empty(&htlbpage_freelist)) {
+	page = dequeue_huge_page();
+	if (!page) {
 		spin_unlock(&htlbpage_lock);
 		return NULL;
 	}

-	page = list_entry(htlbpage_freelist.next, struct page, list);
-	list_del(&page->list);
 	htlbpage_free--;
 	spin_unlock(&htlbpage_lock);
 	set_page_count(page, 1);
@@ -365,7 +397,7 @@
 	INIT_LIST_HEAD(&page->list);

 	spin_lock(&htlbpage_lock);
-	list_add(&page->list, &htlbpage_freelist);
+	enqueue_huge_page(page);
 	htlbpage_free++;
 	spin_unlock(&htlbpage_lock);
 }
@@ -766,11 +798,11 @@
 		return htlbpage_total;
 	if (lcount > 0) {	/* Increase the mem size. */
 		while (lcount--) {
-			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
+			page = alloc_fresh_huge_page();
 			if (page == NULL)
 				break;
 			spin_lock(&htlbpage_lock);
-			list_add(&page->list, &htlbpage_freelist);
+			enqueue_huge_page(page);
 			htlbpage_free++;
 			htlbpage_total++;
 			spin_unlock(&htlbpage_lock);
@@ -811,12 +843,15 @@
 	struct page *page;

 	if (cur_cpu_spec->cpu_features & CPU_FTR_16M_PAGE) {
+		for (i = 0; i < MAX_NUMNODES; ++i)
+			INIT_LIST_HEAD(&hugepage_freelists[i]);
+
 		for (i = 0; i < htlbpage_max; ++i) {
-			page = alloc_pages(__GFP_HIGHMEM, HUGETLB_PAGE_ORDER);
+			page = alloc_fresh_huge_page();
 			if (!page)
 				break;
 			spin_lock(&htlbpage_lock);
-			list_add(&page->list, &htlbpage_freelist);
+			enqueue_huge_page(page);
 			spin_unlock(&htlbpage_lock);
 		}
 		htlbpage_max = htlbpage_free = htlbpage_total = i;
@@ -825,7 +860,7 @@
 		htlbpage_max = 0;
 		printk("CPU does not support HugeTLB\n");
 	}
-
+
 	return 0;
 }
 module_init(hugetlb_init);

** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/