[RFC PATCH 1/2] mm/pgtable: use ptdesc for pmd_huge_pte
alexs at kernel.org
alexs at kernel.org
Sun Dec 14 17:55:45 AEDT 2025
From: Alex Shi <alexs at kernel.org>
'pmd_huge_pte' are pgtable variables, but used 'pgtable->lru'
instead of pgtable->pt_list in pgtable_trans_huge_deposit/withdraw
functions, That's a bit weird.
So let's convert the pgtable_t to precise 'struct ptdesc *' for
ptdesc->pmd_huge_pte, and mm->pmd_huge_pte, then convert function
pgtable_trans_huge_deposit() to use correct ptdesc.
This convertion works for most of arch, but failed on s390/sparc/powerpc
since they use 'pte_t *' as pgtable_t. Is there any suggestion for these
archs? If we could have a solution, we may remove the pgtable_t for other
archs.
Signed-off-by: Alex Shi <alexs at kernel.org>
Cc: linux-mm at kvack.org
Cc: sparclinux at vger.kernel.org
Cc: linux-s390 at vger.kernel.org
Cc: linuxppc-dev at lists.ozlabs.org
Cc: Magnus Lindholm <linmag7 at gmail.com>
Cc: Matthew Wilcox <willy at infradead.org>
Cc: Will Deacon <will at kernel.org>
Cc: Thomas Huth <thuth at redhat.com>
Cc: Alistair Popple <apopple at nvidia.com>
Cc: Ying Huang <ying.huang at linux.alibaba.com>
Cc: Gregory Price <gourry at gourry.net>
Cc: Byungchul Park <byungchul at sk.com>
Cc: Rakie Kim <rakie.kim at sk.com>
Cc: Joshua Hahn <joshua.hahnjy at gmail.com>
Cc: Matthew Brost <matthew.brost at intel.com>
Cc: Lance Yang <lance.yang at linux.dev>
Cc: Barry Song <baohua at kernel.org>
Cc: Dev Jain <dev.jain at arm.com>
Cc: Ryan Roberts <ryan.roberts at arm.com>
Cc: Nico Pache <npache at redhat.com>
Cc: Baolin Wang <baolin.wang at linux.alibaba.com>
Cc: Zi Yan <ziy at nvidia.com>
Cc: Michal Hocko <mhocko at suse.com>
Cc: Suren Baghdasaryan <surenb at google.com>
Cc: Mike Rapoport <rppt at kernel.org>
Cc: Vlastimil Babka <vbabka at suse.cz>
Cc: Liam R. Howlett <Liam.Howlett at oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes at oracle.com>
Cc: David Hildenbrand <david at kernel.org>
Cc: Andrew Morton <akpm at linux-foundation.org>
Cc: Andreas Larsson <andreas at gaisler.com>
Cc: David S. Miller <davem at davemloft.net>
Cc: Sven Schnelle <svens at linux.ibm.com>
Cc: Christian Borntraeger <borntraeger at linux.ibm.com>
Cc: Vasily Gorbik <gor at linux.ibm.com>
Cc: Heiko Carstens <hca at linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer at linux.ibm.com>
Cc: Alexander Gordeev <agordeev at linux.ibm.com>
Cc: Christophe Leroy <chleroy at kernel.org>
Cc: Nicholas Piggin <npiggin at gmail.com>
Cc: Michael Ellerman <mpe at ellerman.id.au>
Cc: Madhavan Srinivasan <maddy at linux.ibm.com>
---
arch/powerpc/include/asm/book3s/64/pgtable.h | 6 +++---
arch/s390/include/asm/pgtable.h | 2 +-
arch/s390/mm/pgtable.c | 2 +-
arch/sparc/include/asm/pgtable_64.h | 2 +-
arch/sparc/mm/tlb.c | 2 +-
include/linux/mm_types.h | 4 ++--
include/linux/pgtable.h | 2 +-
mm/debug_vm_pgtable.c | 3 ++-
mm/huge_memory.c | 16 +++++++++-------
mm/khugepaged.c | 2 +-
mm/memory.c | 3 ++-
mm/migrate_device.c | 2 +-
mm/pgtable-generic.c | 16 ++++++++--------
13 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index aac8ce30cd3b..f10736af296d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1320,11 +1320,11 @@ pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
#define __HAVE_ARCH_PGTABLE_DEPOSIT
static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
- pmd_t *pmdp, pgtable_t pgtable)
+ pmd_t *pmdp, struct ptdesc *pgtable)
{
if (radix_enabled())
- return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
- return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+ return radix__pgtable_trans_huge_deposit(mm, pmdp, page_ptdesc(pgtable));
+ return hash__pgtable_trans_huge_deposit(mm, pmdp, page_ptdesc(pgtable));
}
#define __HAVE_ARCH_PGTABLE_WITHDRAW
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index bca9b29778c3..e45cb52a923a 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1751,7 +1751,7 @@ pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t);
#define __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+ struct ptdesc *pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 666adcd681ab..c301af71b3ec 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -520,7 +520,7 @@ EXPORT_SYMBOL(pudp_xchg_direct);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
+ struct ptdesc *pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 615f460c50af..4b7f7113a1b3 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -992,7 +992,7 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
#define __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+ struct ptdesc *pgtable);
#define __HAVE_ARCH_PGTABLE_WITHDRAW
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index a35ddcca5e76..5dfee57d2440 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -270,7 +270,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
+ struct ptdesc *pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9f6de068295d..674e5fd4cf0d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -577,7 +577,7 @@ struct ptdesc {
struct list_head pt_list;
struct {
unsigned long _pt_pad_1;
- pgtable_t pmd_huge_pte;
+ struct ptdesc *pmd_huge_pte;
};
};
unsigned long __page_mapping;
@@ -1249,7 +1249,7 @@ struct mm_struct {
struct mmu_notifier_subscriptions *notifier_subscriptions;
#endif
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
- pgtable_t pmd_huge_pte; /* protected by page_table_lock */
+ struct ptdesc *pmd_huge_pte; /* protected by page_table_lock */
#endif
#ifdef CONFIG_NUMA_BALANCING
/*
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 652f287c1ef6..a5b1e3f7452a 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1017,7 +1017,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
+ struct ptdesc *pgtable);
#endif
#ifndef __HAVE_ARCH_PGTABLE_WITHDRAW
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index ae9b9310d96f..26ff92705558 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -240,7 +240,8 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args)
/* Align the address wrt HPAGE_PMD_SIZE */
vaddr &= HPAGE_PMD_MASK;
- pgtable_trans_huge_deposit(args->mm, args->pmdp, args->start_ptep);
+ pgtable_trans_huge_deposit(args->mm, args->pmdp,
+ page_ptdesc(args->start_ptep));
pmd = pfn_pmd(args->pmd_pfn, args->page_prot);
set_pmd_at(args->mm, vaddr, args->pmdp, pmd);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f7c565f11a98..ff74bd70690d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1352,7 +1352,8 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf)
VM_BUG_ON(ret & VM_FAULT_FALLBACK);
return ret;
}
- pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd,
+ page_ptdesc(pgtable));
map_anon_folio_pmd_pf(folio, vmf->pmd, vma, haddr);
mm_inc_nr_ptes(vma->vm_mm);
spin_unlock(vmf->ptl);
@@ -1450,7 +1451,7 @@ static void set_huge_zero_folio(pgtable_t pgtable, struct mm_struct *mm,
pmd_t entry;
entry = folio_mk_pmd(zero_folio, vma->vm_page_prot);
entry = pmd_mkspecial(entry);
- pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
set_pmd_at(mm, haddr, pmd, entry);
mm_inc_nr_ptes(mm);
}
@@ -1576,7 +1577,7 @@ static vm_fault_t insert_pmd(struct vm_area_struct *vma, unsigned long addr,
}
if (pgtable) {
- pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
mm_inc_nr_ptes(mm);
pgtable = NULL;
}
@@ -1837,7 +1838,7 @@ static void copy_huge_non_present_pmd(
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
mm_inc_nr_ptes(dst_mm);
- pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+ pgtable_trans_huge_deposit(dst_mm, dst_pmd, page_ptdesc(pgtable));
if (!userfaultfd_wp(dst_vma))
pmd = pmd_swp_clear_uffd_wp(pmd);
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
@@ -1932,7 +1933,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
out_zero_page:
mm_inc_nr_ptes(dst_mm);
- pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
+ pgtable_trans_huge_deposit(dst_mm, dst_pmd, page_ptdesc(pgtable));
pmdp_set_wrprotect(src_mm, addr, src_pmd);
if (!userfaultfd_wp(dst_vma))
pmd = pmd_clear_uffd_wp(pmd);
@@ -2493,7 +2494,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) {
pgtable_t pgtable;
pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
- pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
+ pgtable_trans_huge_deposit(mm, new_pmd,
+ page_ptdesc(pgtable));
}
pmd = move_soft_dirty_pmd(pmd);
if (vma_has_uffd_without_event_remap(vma))
@@ -2799,7 +2801,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
set_pmd_at(mm, dst_addr, dst_pmd, _dst_pmd);
src_pgtable = pgtable_trans_huge_withdraw(mm, src_pmd);
- pgtable_trans_huge_deposit(mm, dst_pmd, src_pgtable);
+ pgtable_trans_huge_deposit(mm, dst_pmd, page_ptdesc(src_pgtable));
unlock_ptls:
double_pt_unlock(src_ptl, dst_ptl);
/* unblock rmap walks */
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 97d1b2824386..f9b1f8e75360 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1228,7 +1228,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
- pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ pgtable_trans_huge_deposit(mm, pmd, page_ptdesc(pgtable));
map_anon_folio_pmd_nopf(folio, pmd, vma, address);
spin_unlock(pmd_ptl);
diff --git a/mm/memory.c b/mm/memory.c
index 2a55edc48a65..f777de39cede 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5351,7 +5351,8 @@ static void deposit_prealloc_pte(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
- pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte);
+ pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd,
+ page_ptdesc(vmf->prealloc_pte));
/*
* We are going to consume the prealloc table,
* count that as nr_ptes.
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 23379663b1e1..dd83bfff4f44 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -883,7 +883,7 @@ static int migrate_vma_insert_huge_pmd_page(struct migrate_vma *migrate,
flush_cache_page(vma, addr, addr + HPAGE_PMD_SIZE);
pmdp_invalidate(vma, addr, pmdp);
} else {
- pgtable_trans_huge_deposit(vma->vm_mm, pmdp, pgtable);
+ pgtable_trans_huge_deposit(vma->vm_mm, pmdp, page_ptdesc(pgtable));
mm_inc_nr_ptes(vma->vm_mm);
}
set_pmd_at(vma->vm_mm, addr, pmdp, entry);
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index d3aec7a9926a..220844a81e38 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -164,15 +164,15 @@ pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
#ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
+ struct ptdesc *pgtable)
{
assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
if (!pmd_huge_pte(mm, pmdp))
- INIT_LIST_HEAD(&pgtable->lru);
+ INIT_LIST_HEAD(&pgtable->pt_list);
else
- list_add(&pgtable->lru, &pmd_huge_pte(mm, pmdp)->lru);
+ list_add(&pgtable->pt_list, &pmd_huge_pte(mm, pmdp)->pt_list);
pmd_huge_pte(mm, pmdp) = pgtable;
}
#endif
@@ -181,17 +181,17 @@ void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
/* no "address" argument so destroys page coloring of some arch */
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
- pgtable_t pgtable;
+ struct ptdesc *pgtable;
assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
pgtable = pmd_huge_pte(mm, pmdp);
- pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->lru,
- struct page, lru);
+ pmd_huge_pte(mm, pmdp) = list_first_entry_or_null(&pgtable->pt_list,
+ struct ptdesc, pt_list);
if (pmd_huge_pte(mm, pmdp))
- list_del(&pgtable->lru);
- return pgtable;
+ list_del(&pgtable->pt_list);
+ return ptdesc_page(pgtable);
}
#endif
--
2.43.0
More information about the Linuxppc-dev
mailing list