[PATCH 23/49] mm/mm_init: skip initializing shared tail pages for compound pages

Muchun Song songmuchun at bytedance.com
Sun Apr 5 22:52:14 AEST 2026


Currently, memmap_init_range() unconditionally initializes all struct pages
within a section. However, when HugeTLB Vmemmap Optimization (HVO) is enabled,
shared vmemmap tail pages are allocated during the vmemmap population phase
(e.g., via vmemmap_get_tail()). These shared tail pages are left intentionally
uninitialized at that time because the subsequent memmap_init() would simply
overwrite them.

If memmap_init_range() continues to initialize these shared tail pages, it
will overwrite the carefully constructed HVO mappings and metadata. This forces
subsystems like HugeTLB to implement workarounds (like re-initializing or
compensating for the overwritten data in their own init routines, as seen
in hugetlb_vmemmap_init()).

Therefore, the primary motivation of this patch is to prevent memmap_init_range()
from incorrectly overwriting the shared vmemmap tail pages. By detecting if a
page is an optimizable compound vmemmap page (using the newly introduced section
order), we can safely skip its redundant initialization.

As a significant side-effect, skipping the initialization of these shared tail
pages also saves substantial CPU cycles during the early boot stage.

Signed-off-by: Muchun Song <songmuchun at bytedance.com>
---
 mm/internal.h | 11 +++++++++++
 mm/mm_init.c  | 19 +++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index a8acabcd1d93..1060d7c07f5b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1011,6 +1011,17 @@ static inline void sparse_init_subsection_map(void)
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
+static inline bool vmemmap_page_optimizable(const struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	unsigned int order = section_order(__pfn_to_section(pfn));
+
+	if (!is_power_of_2(sizeof(struct page)))
+		return false;
+
+	return (pfn & ((1L << order) - 1)) >= OPTIMIZED_FOLIO_VMEMMAP_PAGE_STRUCTS;
+}
+
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 977a837b7ef6..7f5b326e9298 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -676,12 +676,13 @@ static inline void fixup_hashdist(void) {}
 
 static __meminit void pageblock_migratetype_init_range(unsigned long pfn,
 						       unsigned long nr_pages,
-						       int migratetype)
+						       int migratetype,
+						       bool isolate)
 {
 	unsigned long end = pfn + nr_pages;
 
 	for (pfn = pageblock_align(pfn); pfn < end; pfn += pageblock_nr_pages) {
-		init_pageblock_migratetype(pfn_to_page(pfn), migratetype, false);
+		init_pageblock_migratetype(pfn_to_page(pfn), migratetype, isolate);
 		cond_resched();
 	}
 }
@@ -912,6 +913,16 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
 		}
 
 		page = pfn_to_page(pfn);
+		if (vmemmap_page_optimizable(page)) {
+			struct mem_section *ms = __pfn_to_section(pfn);
+			unsigned long start = pfn;
+
+			pfn = min(ALIGN(start, 1L << section_order(ms)), end_pfn);
+			pageblock_migratetype_init_range(start, pfn - start, migratetype,
+							 isolate_pageblock);
+			continue;
+		}
+
 		__init_single_page(page, pfn, zone, nid);
 		if (context == MEMINIT_HOTPLUG) {
 #ifdef CONFIG_ZONE_DEVICE
@@ -1138,7 +1149,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
 	 * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
 	 * because this is done early in section_activate()
 	 */
-	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE);
+	pageblock_migratetype_init_range(start_pfn, nr_pages, MIGRATE_MOVABLE, false);
 
 	pr_debug("%s initialised %lu pages in %ums\n", __func__,
 		nr_pages, jiffies_to_msecs(jiffies - start));
@@ -1963,7 +1974,7 @@ static void __init deferred_free_pages(unsigned long pfn,
 	if (!nr_pages)
 		return;
 
-	pageblock_migratetype_init_range(pfn, nr_pages, MIGRATE_MOVABLE);
+	pageblock_migratetype_init_range(pfn, nr_pages, MIGRATE_MOVABLE, false);
 
 	page = pfn_to_page(pfn);
 
-- 
2.20.1



More information about the Linuxppc-dev mailing list