[RFC PATCH 1/2] mm/pgtable: use ptdesc for pmd_huge_pte

alexs at kernel.org alexs at kernel.org
Sun Dec 14 17:55:45 AEDT 2025


From: Alex Shi <alexs at kernel.org>

'pmd_huge_pte' are pgtable variables, but used 'pgtable->lru'
instead of pgtable->pt_list in pgtable_trans_huge_deposit/withdraw
functions, That's a bit weird.

So let's convert the pgtable_t to precise 'struct ptdesc *' for
ptdesc->pmd_huge_pte, and mm->pmd_huge_pte, then convert function
pgtable_trans_huge_deposit() to use correct ptdesc.

This convertion works for most of arch, but failed on s390/sparc/powerpc
since they use 'pte_t *' as pgtable_t. Is there any suggestion for these
archs? If we could have a solution, we may remove the pgtable_t for other
archs.

Signed-off-by: Alex Shi <alexs at kernel.org>
Cc: linux-mm at kvack.org
Cc: sparclinux at vger.kernel.org
Cc: linux-s390 at vger.kernel.org
Cc: linuxppc-dev at lists.ozlabs.org
Cc: Magnus Lindholm <linmag7 at gmail.com>
Cc: Matthew Wilcox  <willy at infradead.org>
Cc: Will Deacon <will at kernel.org>
Cc: Thomas Huth <thuth at redhat.com>
Cc: Alistair Popple <apopple at nvidia.com>
Cc: Ying Huang <ying.huang at linux.alibaba.com>
Cc: Gregory Price <gourry at gourry.net>
Cc: Byungchul Park <byungchul at sk.com>
Cc: Rakie Kim <rakie.kim at sk.com>
Cc: Joshua Hahn <joshua.hahnjy at gmail.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Cc: Lance Yang <lance.yang at linux.dev>
Cc: Barry Song <baohua at kernel.org>
Cc: Dev Jain <dev.jain at arm.com>
Cc: Ryan Roberts <ryan.roberts at arm.com>
Cc: Nico Pache <npache at redhat.com>
Cc: Baolin Wang <baolin.wang at linux.alibaba.com>
Cc: Zi Yan <ziy at nvidia.com>
Cc: Michal Hocko <mhocko at suse.com>
Cc: Suren Baghdasaryan <surenb at google.com>
Cc: Mike Rapoport <rppt at kernel.org>
Cc: Vlastimil Babka <vbabka at suse.cz>
Cc: Liam R. Howlett <Liam.Howlett at oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes at oracle.com>
Cc: David Hildenbrand <david at kernel.org>
Cc: Andrew Morton <akpm at linux-foundation.org>
Cc: Andreas Larsson <andreas at gaisler.com>
Cc: David S. Miller <davem at davemloft.net>
Cc: Sven Schnelle <svens at linux.ibm.com>
Cc: Christian Borntraeger <borntraeger at linux.ibm.com>
Cc: Vasily Gorbik <gor at linux.ibm.com>
Cc: Heiko Carstens <hca at linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer at linux.ibm.com>
Cc: Alexander Gordeev <agordeev at linux.ibm.com>
Cc: Christophe Leroy  <chleroy at kernel.org>
Cc: Nicholas Piggin <npiggin at gmail.com>
Cc: Michael Ellerman <mpe at ellerman.id.au>
Cc: Madhavan Srinivasan <maddy at linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  6 +++---
 arch/s390/include/asm/pgtable.h              |  2 +-
 arch/s390/mm/pgtable.c                       |  2 +-
 arch/sparc/include/asm/pgtable_64.h          |  2 +-
 arch/sparc/mm/tlb.c                          |  2 +-
 include/linux/mm_types.h                     |  4 ++--
 include/linux/pgtable.h                      |  2 +-
 mm/debug_vm_pgtable.c                        |  3 ++-
 mm/huge_memory.c                             | 16 +++++++++-------
 mm/khugepaged.c                              |  2 +-
 mm/memory.c                                  |  3 ++-
 mm/migrate_device.c                          |  2 +-
 mm/pgtable-generic.c                         | 16 ++++++++--------
 13 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index aac8ce30cd3b..f10736af296d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1320,11 +1320,11 @@ pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
 
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
-					      pmd_t *pmdp, pgtable_t pgtable)
+					      pmd_t *pmdp, struct ptdesc *pgtable)
 {
 	if (radix_enabled())
-		return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
-	return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+		return radix__pgtable_trans_huge_deposit(mm, pmdp, page_ptdesc(pgtable));
+	return hash__pgtable_trans_huge_deposit(mm, pmdp, page_ptdesc(pgtable));
 }
 
 #define __HAVE_ARCH_PGTABLE_WITHDRAW
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index bca9b29778c3..e45cb52a923a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1751,7 +1751,7 @@ pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
 
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable);
+				struct ptdesc *pgtable);
 
 #define __HAVE_ARCH_PGTABLE_WITHDRAW
 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 666adcd681ab..c301af71b3ec 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -520,7 +520,7 @@ EXPORT_SYMBOL(pudp_xchg_direct);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
+				struct ptdesc *pgtable)
 {
 	struct list_head *lh = (struct list_head *) pgtable;
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 615f460c50af..4b7f7113a1b3 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -992,7 +992,7 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable);
+				struct ptdesc *pgtable);
 
 #define __HAVE_ARCH_PGTABLE_WITHDRAW
 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index a35ddcca5e76..5dfee57d2440 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -270,7 +270,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 }
 
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
+				struct ptdesc *pgtable)
 {
 	struct list_head *lh = (struct list_head *) pgtable;
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9f6de068295d..674e5fd4cf0d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -577,7 +577,7 @@ struct ptdesc {
 		struct list_head pt_list;
 		struct {
 			unsigned long _pt_pad_1;
-			pgtable_t pmd_huge_pte;
+			struct ptdesc *pmd_huge_pte;
 		};
 	};
 	unsigned long __page_mapping;
@@ -1249,7 +1249,7 @@ struct mm_struct {
 		struct mmu_notifier_subscriptions *notifier_subscriptions;
 #endif
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
-		pgtable_t pmd_huge_pte; /* protected by page_table_lock */
+		struct ptdesc *pmd_huge_pte; /* protected by page_table_lock */
 #endif
 #ifdef CONFIG_NUMA_BALANCING
 		/*
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 652f287c1ef6..a5b1e3f7452a 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1017,7 +1017,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 
 #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
 extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
+				       struct ptdesc *pgtable);
 #endif
 
 #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index ae9b9310d96f..26ff92705558 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -240,7 +240,8 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
 	/* Align the address wrt HPAGE_PMD_SIZE */
 	vaddr &= HPAGE_PMD_MASK;
 
-	pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep);
+	pgtable_trans_huge_deposit(args->mm, args->pmdp,
+					page_ptdesc(args->start_ptep));
 
 	pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
 	set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f7c565f11a98..ff74bd70690d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1352,7 +1352,8 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			VM_BUG_ON(ret & VM_FAULT_FALLBACK);
 			return ret;
 		}
-		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+		pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd,
+						page_ptdesc(pgtable));
 		map_anon_folio_pmd_pf(folio, vmf->pmd, vma, haddr);
 		mm_inc_nr_ptes(vma->vm_mm);
 		spin_unlock(vmf->ptl);
@@ -1450,7 +1451,7 @@ static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm,
 	pmd_t entry;
 	entry = folio_mk_pmd(zero_folio, vma->vm_page_prot);
 	entry = pmd_mkspecial(entry);
-	pgtable_trans_huge_deposit(mm, pmd, pgtable);
+	pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
 	set_pmd_at(mm, haddr, pmd, entry);
 	mm_inc_nr_ptes(mm);
 }
@@ -1576,7 +1577,7 @@ static vm_fault_t insert_pmd(struct vm_area_struct *vma, unsigned long addr,
 	}
 
 	if (pgtable) {
-		pgtable_trans_huge_deposit(mm, pmd, pgtable);
+		pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
 		mm_inc_nr_ptes(mm);
 		pgtable = NULL;
 	}
@@ -1837,7 +1838,7 @@ static void copy_huge_non_present_pmd(
 
 	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 	mm_inc_nr_ptes(dst_mm);
-	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+	pgtable_trans_huge_deposit(dst_mm, dst_pmd, page_ptdesc(pgtable));
 	if (!userfaultfd_wp(dst_vma))
 		pmd = pmd_swp_clear_uffd_wp(pmd);
 	set_pmd_at(dst_mm, addr, dst_pmd, pmd);
@@ -1932,7 +1933,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
 out_zero_page:
 	mm_inc_nr_ptes(dst_mm);
-	pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+	pgtable_trans_huge_deposit(dst_mm, dst_pmd, page_ptdesc(pgtable));
 	pmdp_set_wrprotect(src_mm, addr, src_pmd);
 	if (!userfaultfd_wp(dst_vma))
 		pmd = pmd_clear_uffd_wp(pmd);
@@ -2493,7 +2494,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {
 			pgtable_t pgtable;
 			pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
-			pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
+			pgtable_trans_huge_deposit(mm, new_pmd,
+							page_ptdesc(pgtable));
 		}
 		pmd = move_soft_dirty_pmd(pmd);
 		if (vma_has_uffd_without_event_remap(vma))
@@ -2799,7 +2801,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
 	set_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd);
 
 	src_pgtable = pgtable_trans_huge_withdraw(mm, src_pmd);
-	pgtable_trans_huge_deposit(mm, dst_pmd, src_pgtable);
+	pgtable_trans_huge_deposit(mm, dst_pmd, page_ptdesc(src_pgtable));
 unlock_ptls:
 	double_pt_unlock(src_ptl, dst_ptl);
 	/* unblock rmap walks */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 97d1b2824386..f9b1f8e75360 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1228,7 +1228,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 
 	spin_lock(pmd_ptl);
 	BUG_ON(!pmd_none(*pmd));
-	pgtable_trans_huge_deposit(mm, pmd, pgtable);
+	pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
 	map_anon_folio_pmd_nopf(folio, pmd, vma, address);
 	spin_unlock(pmd_ptl);
 
diff --git a/mm/memory.c b/mm/memory.c
index 2a55edc48a65..f777de39cede 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5351,7 +5351,8 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
-	pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
+	pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd,
+					page_ptdesc(vmf->prealloc_pte));
 	/*
 	 * We are going to consume the prealloc table,
 	 * count that as nr_ptes.
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 23379663b1e1..dd83bfff4f44 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -883,7 +883,7 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
 		flush_cache_page(vma, addr, addr + HPAGE_PMD_SIZE);
 		pmdp_invalidate(vma, addr, pmdp);
 	} else {
-		pgtable_trans_huge_deposit(vma->vm_mm, pmdp, pgtable);
+		pgtable_trans_huge_deposit(vma->vm_mm, pmdp, page_ptdesc(pgtable));
 		mm_inc_nr_ptes(vma->vm_mm);
 	}
 	set_pmd_at(vma->vm_mm, addr, pmdp, entry);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index d3aec7a9926a..220844a81e38 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -164,15 +164,15 @@ pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 
 #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				pgtable_t pgtable)
+				struct ptdesc *pgtable)
 {
 	assert_spin_locked(pmd_lockptr(mm, pmdp));
 
 	/* FIFO */
 	if (!pmd_huge_pte(mm, pmdp))
-		INIT_LIST_HEAD(&pgtable->lru);
+		INIT_LIST_HEAD(&pgtable->pt_list);
 	else
-		list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru);
+		list_add(&pgtable->pt_list, &pmd_huge_pte(mm, pmdp)->pt_list);
 	pmd_huge_pte(mm, pmdp) = pgtable;
 }
 #endif
@@ -181,17 +181,17 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
 /* no "address" argument so destroys page coloring of some arch */
 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
 {
-	pgtable_t pgtable;
+	struct ptdesc *pgtable;
 
 	assert_spin_locked(pmd_lockptr(mm, pmdp));
 
 	/* FIFO */
 	pgtable = pmd_huge_pte(mm, pmdp);
-	pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru,
-							  struct page, lru);
+	pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->pt_list,
+							  struct ptdesc, pt_list);
 	if (pmd_huge_pte(mm, pmdp))
-		list_del(&pgtable->lru);
-	return pgtable;
+		list_del(&pgtable->pt_list);
+	return ptdesc_page(pgtable);
 }
 #endif
 
-- 
2.43.0



More information about the Linuxppc-dev mailing list