[PATCH 32/49] mm/sparse-vmemmap: consolidate shared tail page allocation
Muchun Song
songmuchun at bytedance.com
Sun Apr 5 22:52:23 AEST 2026
Currently, both HugeTLB and sparse-vmemmap have their own logic to get
or allocate the shared tail page for vmemmap optimization. The HugeTLB
version handles runtime concurrency using cmpxchg, while the
sparse-vmemmap version (used only at boot time) was simpler.
This patch unifies them into a single function in mm/sparse-vmemmap.c.
The new function of vmemmap_shared_tail_page() is introduced: it returns
the shared page frame used to map the tail vmemmap pages of a compound
page.
Furthermore, vmemmap_alloc_block_zero() is used as a safe allocation
method for both situations:
1. It calls alloc_pages_node() (via vmemmap_alloc_block()) when slab is
available.
2. It falls back to bootmem allocation during early boot, making the function
suitable for use in both early boot (sparse-vmemmap init) and runtime
(HugeTLB HVO) contexts.
This reduces code duplication and ensures consistent behavior.
Signed-off-by: Muchun Song <songmuchun at bytedance.com>
---
include/linux/mm.h | 1 +
mm/hugetlb_vmemmap.c | 28 +---------------------------
mm/sparse-vmemmap.c | 42 +++++++++++++++++++++---------------------
3 files changed, 23 insertions(+), 48 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 93e447468131..15841829b7eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4880,6 +4880,7 @@ int vmemmap_populate(unsigned long start, unsigned long end, int node,
void vmemmap_wrprotect_hvo(unsigned long start, unsigned long end, int node,
unsigned long headsize);
void vmemmap_populate_print_last(void);
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone);
#ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap);
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index a190b9b94346..a7ea98fcc18e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -493,32 +493,6 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
return true;
}
-static struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
-{
- const unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
- struct page *tail, *p;
- int node = zone_to_nid(zone);
-
- tail = READ_ONCE(zone->vmemmap_tails[idx]);
- if (likely(tail))
- return tail;
-
- tail = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
- if (!tail)
- return NULL;
-
- p = page_to_virt(tail);
- for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
- init_compound_tail(p + i, NULL, order, zone);
-
- if (cmpxchg(&zone->vmemmap_tails[idx], NULL, tail)) {
- __free_page(tail);
- tail = READ_ONCE(zone->vmemmap_tails[idx]);
- }
-
- return tail;
-}
-
static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
struct folio *folio,
struct list_head *vmemmap_pages,
@@ -535,7 +509,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
return ret;
nid = folio_nid(folio);
- vmemmap_tail = vmemmap_get_tail(h->order, folio_zone(folio));
+ vmemmap_tail = vmemmap_shared_tail_page(h->order, folio_zone(folio));
if (!vmemmap_tail)
return -ENOMEM;
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index c35d912a1fef..309d935fb05e 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -143,8 +143,6 @@ void __meminit vmemmap_verify(pte_t *pte, int node,
start, end - 1);
}
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone);
-
static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node,
struct vmem_altmap *altmap,
unsigned long ptpfn)
@@ -160,8 +158,8 @@ static pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, in
unsigned long pfn = page_to_pfn((struct page *)addr);
const struct mem_section *ms = __pfn_to_section(pfn);
- page = vmemmap_get_tail(section_order(ms),
- pfn_to_zone(pfn, node));
+ page = vmemmap_shared_tail_page(section_order(ms),
+ pfn_to_zone(pfn, node));
if (!page)
return NULL;
ptpfn = page_to_pfn(page);
@@ -338,32 +336,34 @@ void vmemmap_wrprotect_hvo(unsigned long addr, unsigned long end,
}
}
-static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone *zone)
+struct page *vmemmap_shared_tail_page(unsigned int order, struct zone *zone)
{
- struct page *p, *tail;
- unsigned int idx;
- int node = zone_to_nid(zone);
+ void *addr;
+ struct page *page;
+ unsigned int idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
- if (WARN_ON_ONCE(order < OPTIMIZABLE_FOLIO_MIN_ORDER))
- return NULL;
- if (WARN_ON_ONCE(order > MAX_FOLIO_ORDER))
+ if (WARN_ON_ONCE(idx >= ARRAY_SIZE(zone->vmemmap_tails)))
return NULL;
- idx = order - OPTIMIZABLE_FOLIO_MIN_ORDER;
- tail = zone->vmemmap_tails[idx];
- if (tail)
- return tail;
+ page = READ_ONCE(zone->vmemmap_tails[idx]);
+ if (likely(page))
+ return page;
- p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
- if (!p)
+ addr = vmemmap_alloc_block_zero(PAGE_SIZE, zone_to_nid(zone));
+ if (!addr)
return NULL;
+
for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
- init_compound_tail(p + i, NULL, order, zone);
+ init_compound_tail((struct page *)addr + i, NULL, order, zone);
- tail = virt_to_page(p);
- zone->vmemmap_tails[idx] = tail;
+ page = virt_to_page(addr);
+ if (cmpxchg(&zone->vmemmap_tails[idx], NULL, page) != NULL) {
+ VM_BUG_ON(!slab_is_available());
+ __free_page(page);
+ page = READ_ONCE(zone->vmemmap_tails[idx]);
+ }
- return tail;
+ return page;
}
void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
--
2.20.1
More information about the Linuxppc-dev
mailing list