[PATCH v7 02/11] mm/mremap: Fix race between MOVE_PUD mremap and pageout

Aneesh Kumar K.V aneesh.kumar at linux.ibm.com
Mon Jun 7 15:51:22 AEST 2021


CPU 1				CPU 2					CPU 3

mremap(old_addr, new_addr)      page_shrinker/try_to_unmap_one

mmap_write_lock_killable()

				addr = old_addr
				lock(pte_ptl)
lock(pud_ptl)
pud = *old_pud
pud_clear(old_pud)
flush_tlb_range(old_addr)

*new_pud = pud
									*new_addr = 10; and fills
									TLB with new addr
									and old pfn

unlock(pud_ptl)
				ptep_clear_flush()
				old pfn is free.
									Stale TLB entry
Fix this race by holding pud lock in pageout.

Fixes: c49dd3401802 ("mm: speedup mremap on 1GB or larger regions")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
---
 include/linux/rmap.h |  4 ++++
 mm/page_vma_mapped.c | 13 ++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 272ab0c2b60b..491c65ce1d46 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -209,6 +209,7 @@ struct page_vma_mapped_walk {
 	pte_t *pte;
 	spinlock_t *pte_ptl;
 	spinlock_t *pmd_ptl;
+	spinlock_t *pud_ptl;
 	unsigned int flags;
 };
 
@@ -221,6 +222,9 @@ static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 		spin_unlock(pvmw->pte_ptl);
 	if (pvmw->pmd_ptl)
 		spin_unlock(pvmw->pmd_ptl);
+	if (pvmw->pud_ptl)
+		spin_unlock(pvmw->pud_ptl);
+
 }
 
 bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw);
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 87a2c94c7e27..c913bc34b1d3 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -180,8 +180,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	pud = pud_offset(p4d, pvmw->address);
 	if (!pud_present(*pud))
 		return false;
+
+	pvmw->pud_ptl = pud_lock(mm, pud);
 	pvmw->pmd = pmd_offset(pud, pvmw->address);
-	pvmw->pmd_ptl = pmd_lock(mm, pvmw->pmd);
+	if (USE_SPLIT_PMD_PTLOCKS)
+		pvmw->pmd_ptl = pmd_lock(mm, pvmw->pmd);
 	/*
 	 * Make sure the pmd value isn't cached in a register by the
 	 * compiler and used as a stale value after we've observed a
@@ -235,8 +238,12 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 					spin_unlock(pvmw->pte_ptl);
 					pvmw->pte_ptl = NULL;
 				}
-				spin_unlock(pvmw->pmd_ptl);
-				pvmw->pmd_ptl = NULL;
+				if (pvmw->pmd_ptl) {
+					spin_unlock(pvmw->pmd_ptl);
+					pvmw->pmd_ptl = NULL;
+				}
+				spin_unlock(pvmw->pud_ptl);
+				pvmw->pud_ptl = NULL;
 				goto restart;
 			} else {
 				pvmw->pte++;
-- 
2.31.1



More information about the Linuxppc-dev mailing list