[PATCH 32/49] mm/sparse-vmemmap: consolidate shared tail page allocation

Muchun Song songmuchun at bytedance.com
Sun Apr 5 22:52:23 AEST 2026


Currently, both HugeTLB and sparse-vmemmap have their own logic to get
or allocate the shared tail page for vmemmap optimization. The HugeTLB
version handles runtime concurrency using cmpxchg, while the
sparse-vmemmap version (used only at boot time) was simpler.

This patch unifies them into a single function in mm/sparse-vmemmap.c.

The new function of vmemmap_shared_tail_page() is introduced: it returns
the shared page frame used to map the tail vmemmap pages of a compound
page.

Furthermore, vmemmap_alloc_block_zero() is used as a safe allocation
method for both situations:

1. It calls alloc_pages_node() (via vmemmap_alloc_block()) when slab is
   available.

2. It falls back to bootmem allocation during early boot, making the function
   suitable for use in both early boot (sparse-vmemmap init) and runtime
   (HugeTLB HVO) contexts.

This reduces code duplication and ensures consistent behavior.

Signed-off-by: Muchun Song <songmuchun at bytedance.com>
---
 include/linux/mm.h   |  1 +
 mm/hugetlb_vmemmap.c | 28 +---------------------------
 mm/sparse-vmemmap.c  | 42 +++++++++++++++++++++---------------------
 3 files changed, 23 insertions(+), 48 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 93e447468131..15841829b7eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4880,6 +4880,7 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node,
 void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
 			  unsigned long headsize);
 void vmemmap_populate_print_last(void);
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone);
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end,
 		struct vmem_altmap *altmap);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index a190b9b94346..a7ea98fcc18e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -493,32 +493,6 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
 	return true;
 }
 
-static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
-{
-	const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
-	struct page *tail, *p;
-	int node = zone_to_nid(zone);
-
-	tail = READ_ONCE(zone->vmemmap_tails[idx]);
-	if (likely(tail))
-		return tail;
-
-	tail = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
-	if (!tail)
-		return NULL;
-
-	p = page_to_virt(tail);
-	for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
-		init_compound_tail(p + i, NULL, order, zone);
-
-	if (cmpxchg(&zone->vmemmap_tails[idx], NULL, tail)) {
-		__free_page(tail);
-		tail = READ_ONCE(zone->vmemmap_tails[idx]);
-	}
-
-	return tail;
-}
-
 static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 					    struct folio *folio,
 					    struct list_head *vmemmap_pages,
@@ -535,7 +509,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
 		return ret;
 
 	nid = folio_nid(folio);
-	vmemmap_tail = vmemmap_get_tail(h->order, folio_zone(folio));
+	vmemmap_tail = vmemmap_shared_tail_page(h->order, folio_zone(folio));
 	if (!vmemmap_tail)
 		return -ENOMEM;
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index c35d912a1fef..309d935fb05e 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -143,8 +143,6 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
 			start, end - 1);
 }
 
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone);
-
 static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
 					      struct vmem_altmap *altmap,
 					      unsigned long ptpfn)
@@ -160,8 +158,8 @@ static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, in
 			unsigned long pfn = page_to_pfn((struct page *)addr);
 			const struct mem_section *ms = __pfn_to_section(pfn);
 
-			page = vmemmap_get_tail(section_order(ms),
-						pfn_to_zone(pfn, node));
+			page = vmemmap_shared_tail_page(section_order(ms),
+							pfn_to_zone(pfn, node));
 			if (!page)
 				return NULL;
 			ptpfn = page_to_pfn(page);
@@ -338,32 +336,34 @@ void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end,
 	}
 }
 
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone)
 {
-	struct page *p, *tail;
-	unsigned int idx;
-	int node = zone_to_nid(zone);
+	void *addr;
+	struct page *page;
+	unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
 
-	if (WARN_ON_ONCE(order < OPTIMIZABLE_FOLIO_MIN_ORDER))
-		return NULL;
-	if (WARN_ON_ONCE(order > MAX_FOLIO_ORDER))
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(zone->vmemmap_tails)))
 		return NULL;
 
-	idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
-	tail = zone->vmemmap_tails[idx];
-	if (tail)
-		return tail;
+	page = READ_ONCE(zone->vmemmap_tails[idx]);
+	if (likely(page))
+		return page;
 
-	p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
-	if (!p)
+	addr = vmemmap_alloc_block_zero(PAGE_SIZE, zone_to_nid(zone));
+	if (!addr)
 		return NULL;
+
 	for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
-		init_compound_tail(p + i, NULL, order, zone);
+		init_compound_tail((struct page *)addr + i, NULL, order, zone);
 
-	tail = virt_to_page(p);
-	zone->vmemmap_tails[idx] = tail;
+	page = virt_to_page(addr);
+	if (cmpxchg(&zone->vmemmap_tails[idx], NULL, page) != NULL) {
+		VM_BUG_ON(!slab_is_available());
+		__free_page(page);
+		page = READ_ONCE(zone->vmemmap_tails[idx]);
+	}
 
-	return tail;
+	return page;
 }
 
 void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
-- 
2.20.1



More information about the Linuxppc-dev mailing list