[PATCH 44/49] mm/sparse-vmemmap: drop ARCH_WANT_OPTIMIZE_DAX_VMEMMAP and simplify checks

Muchun Song songmuchun at bytedance.com
Sun Apr 5 22:52:35 AEST 2026


Historically, when device DAX vmemmap optimization was introduced, it was
initially implemented as a generic feature within sparse-vmemmap.c. However,
it was later discovered that architectures with specific page table formats
(such as PowerPC with hash translation) would crash because the generic
vmemmap_populate_compound_pages() was unaware of their specific page table
setup (e.g., bolted table entries).

To address this, commit 87a7ae75d738 ("mm/vmemmap/devdax: fix kernel crash
when probing devdax devices") introduced a restrictive config option,
which eventually evolved into ARCH_WANT_OPTIMIZE_DAX_VMEMMAP (via commits
0b376f1e0ff5 and 0b6f15824cc7). This effectively turned a generic
optimization into an opt-in architectural feature.

However, the architecture landscape has evolved. The decision of whether
to apply DAX vmemmap optimization techniques for specific page table formats
is now fully delegated to the architecture-specific implementations (e.g.,
within vmemmap_populate()). The upper-level Kconfig restrictions and the
rigid generic wrapper functions are no longer necessary to prevent crashes,
as the architectures themselves handle the viability of the mappings. If an
architecture does not support DAX vmemmap optimization, it can simply
implement fallback logic similar to what PowerPC does in its
vmemmap_populate() routines.

If the architecture supports neither HugeTLB vmemmap optimization nor DAX
vmemmap optimization, but still wants to reduce code size and disable this
feature entirely, it is now possible to turn off SPARSEMEM_VMEMMAP_OPTIMIZATION.
It is no longer a hidden option, but rather a user-configurable boolean under
the SPARSEMEM_VMEMMAP umbrella.

Therefore, this patch removes the redundant ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
and drops the complicated vmemmap_can_optimize() helper. Instead, we
unify SPARSEMEM_VMEMMAP_OPTIMIZATION as a fundamental core capability that
is enabled by default whenever SPARSEMEM_VMEMMAP is selected.

The check in sparse_add_section() is safely simplified to:
if (!altmap && pgmap && nr_pages == PAGES_PER_SECTION)
which succinctly reflects the prerequisites for the optimization without
unnecessary boilerplate.

Signed-off-by: Muchun Song <songmuchun at bytedance.com>
---
 arch/powerpc/Kconfig |  1 -
 arch/riscv/Kconfig   |  1 -
 arch/x86/Kconfig     |  1 -
 include/linux/mm.h   | 34 ----------------------------------
 mm/Kconfig           | 14 ++++++++------
 mm/sparse-vmemmap.c  |  2 +-
 6 files changed, 9 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index da4e2ec2af20..8158d5d0c226 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -184,7 +184,6 @@ config PPC
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
 	select ARCH_WANT_LD_ORPHAN_WARN
-	select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP	if PPC_RADIX_MMU
 	select ARCH_WANTS_MODULES_DATA_IN_VMALLOC	if PPC_BOOK3S_32 || PPC_8xx
 	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 61a9d8d3ea64..a8eccb828e7b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -85,7 +85,6 @@ config RISCV
 	select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
 	select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
 	select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
-	select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
 	select ARCH_WANTS_NO_INSTR
 	select ARCH_WANTS_THP_SWAP if HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f19625648f0f..83c55e286b40 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -146,7 +146,6 @@ config X86
 	select ARCH_WANT_GENERAL_HUGETLB
 	select ARCH_WANT_HUGE_PMD_SHARE		if X86_64
 	select ARCH_WANT_LD_ORPHAN_WARN
-	select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP	if X86_64
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP	if X86_64
 	select ARCH_WANTS_THP_SWAP		if X86_64
 	select ARCH_HAS_PARANOID_L1D_FLUSH
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c36001c9d571..8baa224444be 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4910,40 +4910,6 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
 }
 #endif
 
-#define VMEMMAP_RESERVE_NR	OPTIMIZED_FOLIO_VMEMMAP_PAGES
-#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
-static inline bool __vmemmap_can_optimize(struct vmem_altmap *altmap,
-					  struct dev_pagemap *pgmap)
-{
-	unsigned long nr_pages;
-	unsigned long nr_vmemmap_pages;
-
-	if (!pgmap || !is_power_of_2(sizeof(struct page)))
-		return false;
-
-	nr_pages = pgmap_vmemmap_nr(pgmap);
-	nr_vmemmap_pages = ((nr_pages * sizeof(struct page)) >> PAGE_SHIFT);
-	/*
-	 * For vmemmap optimization with DAX we need minimum 2 vmemmap
-	 * pages. See layout diagram in Documentation/mm/vmemmap_dedup.rst
-	 */
-	return !altmap && (nr_vmemmap_pages > VMEMMAP_RESERVE_NR);
-}
-/*
- * If we don't have an architecture override, use the generic rule
- */
-#ifndef vmemmap_can_optimize
-#define vmemmap_can_optimize __vmemmap_can_optimize
-#endif
-
-#else
-static inline bool vmemmap_can_optimize(struct vmem_altmap *altmap,
-					   struct dev_pagemap *pgmap)
-{
-	return false;
-}
-#endif
-
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
 	MF_ACTION_REQUIRED = 1 << 1,
diff --git a/mm/Kconfig b/mm/Kconfig
index e81aa77182b2..166552d5d69a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -411,17 +411,19 @@ config SPARSEMEM_VMEMMAP
 	  efficient option when sufficient kernel resources are available.
 
 config SPARSEMEM_VMEMMAP_OPTIMIZATION
-	bool
+	bool "Enable Vmemmap Optimization Infrastructure"
+	default y
 	depends on SPARSEMEM_VMEMMAP
+	help
+	  This allows features like HugeTLB and DAX to map multiple contiguous
+	  vmemmap pages to a single underlying physical page to save memory.
+
+	  If unsure, say Y.
 
 #
 # Select this config option from the architecture Kconfig, if it is preferred
-# to enable the feature of HugeTLB/dev_dax vmemmap optimization.
+# to enable the feature of HugeTLB vmemmap optimization.
 #
-config ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
-	bool
-	select SPARSEMEM_VMEMMAP_OPTIMIZATION if SPARSEMEM_VMEMMAP
-
 config ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
 	bool
 
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index ac2efba9ef92..752a48112504 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -698,7 +698,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn,
 		return ret;
 
 	ms = __nr_to_section(section_nr);
-	if (vmemmap_can_optimize(altmap, pgmap) && nr_pages == PAGES_PER_SECTION) {
+	if (!altmap && pgmap && nr_pages == PAGES_PER_SECTION) {
 		section_set_order(ms, pgmap->vmemmap_shift);
 #ifdef CONFIG_ZONE_DEVICE
 		section_set_zone(ms, ZONE_DEVICE);
-- 
2.20.1



More information about the Linuxppc-dev mailing list