[RFC PATCH 5/5] powerpc/64s/radix: Use non-atomic PTE updates if the MMU does not modify the PTE

Nicholas Piggin npiggin at gmail.com
Sun Dec 20 10:48:13 AEDT 2020


PTE updates performed by Linux are serialized by the page table locks.
There is no need to use atomic operations for these modifications if the
hardware does not perform updates asynchronously (e.g., via the MMU RC
update mechanism).

Current POWER CPUs that support radix do not perform such updates in the
core MMU, but the nest MMU does so make this conditional on whether the
nest mmu is active for the mm.

This improves a page fault / mprotect microbenchmark by about 10% on
POWER9.

There is a question of how compatibility would be work if a future
processor implements core MMU PTE udpates. This is being discussed with
ISA developers at the moment...

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 arch/powerpc/include/asm/book3s/64/radix.h | 54 ++++++---------
 arch/powerpc/mm/book3s64/radix_pgtable.c   | 79 ++++++++++++++++++----
 2 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 2491f3befda0..837ed6fb1c7d 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -137,16 +137,17 @@ extern void radix__mark_rodata_ro(void);
 extern void radix__mark_initmem_nx(void);
 #endif
 
-extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
-					 pte_t entry, unsigned long address,
-					 int psize);
-
-extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
-					   unsigned long addr, pte_t *ptep,
-					   pte_t old_pte, pte_t pte);
-
-static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
-					       unsigned long set)
+void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+				pte_t entry, unsigned long address, int psize);
+void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
+				pte_t old_pte, pte_t pte);
+unsigned long radix__pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				unsigned long clr, unsigned long set, int huge);
+pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				int full);
+
+static inline unsigned long __radix_pte_update_atomic(pte_t *ptep, unsigned long clr,
+							unsigned long set)
 {
 	__be64 old_be, tmp_be;
 
@@ -163,38 +164,21 @@ static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
 	return be64_to_cpu(old_be);
 }
 
-static inline unsigned long radix__pte_update(struct mm_struct *mm,
-					unsigned long addr,
-					pte_t *ptep, unsigned long clr,
-					unsigned long set,
-					int huge)
+static inline unsigned long __radix_pte_update_nonatomic(pte_t *ptep, unsigned long clr,
+							unsigned long set)
 {
-	unsigned long old_pte;
+	unsigned long old_pte = pte_val(*ptep);
 
-	old_pte = __radix_pte_update(ptep, clr, set);
-	if (!huge)
-		assert_pte_locked(mm, addr);
+	*ptep = __pte((old_pte & ~clr) | set);
 
 	return old_pte;
 }
 
-static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
-						   unsigned long addr,
-						   pte_t *ptep, int full)
+static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
+						unsigned long set)
 {
-	unsigned long old_pte;
-
-	if (full) {
-		/*
-		 * We know that this is a full mm pte clear and
-		 * hence can be sure there is no parallel set_pte.
-		 */
-		old_pte = pte_val(*ptep);
-		*ptep = __pte(0);
-	} else
-		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
-
-	return __pte(old_pte);
+	/* Must use atomic updates because accelerators may be attached which can modify the pte */
+	return __radix_pte_update_atomic(ptep, clr, set);
 }
 
 static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 9495206b9b91..f7ecb90daf87 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1030,6 +1030,47 @@ pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
 
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline bool radix__mm_has_atomic_pte_update(struct mm_struct *mm)
+{
+	if (mm_has_nmmu(mm))
+		return true;
+	return false;
+}
+
+static inline bool radix__mm_has_core_atomic_pte_update(struct mm_struct *mm)
+{
+	return false;
+}
+
+unsigned long radix__pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				unsigned long clr, unsigned long set, int huge)
+{
+	unsigned long old_pte;
+
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	if (radix__mm_has_atomic_pte_update(mm))
+		old_pte = __radix_pte_update_atomic(ptep, clr, set);
+	else
+		old_pte = __radix_pte_update_nonatomic(ptep, clr, set);
+
+	return old_pte;
+}
+
+pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+				int full)
+{
+	unsigned long old_pte;
+
+	if (full)
+		old_pte = __radix_pte_update_nonatomic(ptep, ~0ul, 0);
+	else
+		old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
+
+	return __pte(old_pte);
+}
+
 void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 				  pte_t entry, unsigned long address, int psize)
 {
@@ -1037,23 +1078,31 @@ void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
 	unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED |
 					      _PAGE_RW | _PAGE_EXEC);
 
-	unsigned long change = pte_val(entry) ^ pte_val(*ptep);
-	/*
-	 * To avoid NMMU hang while relaxing access, we need mark
-	 * the pte invalid in between.
-	 */
-	if ((change & _PAGE_RW) && mm_has_nmmu(mm)) {
-		unsigned long old_pte, new_pte;
+	if (mm_has_nmmu(mm)) {
+		unsigned long change = pte_val(entry) ^ pte_val(*ptep);
 
-		old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
-		/*
-		 * new value of pte
-		 */
-		new_pte = old_pte | set;
-		radix__flush_tlb_page_psize(mm, address, psize);
-		__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
+		if ((change & _PAGE_RW)) {
+			unsigned long old_pte, new_pte;
+
+			/*
+			 * To avoid NMMU hang while relaxing access, we need mark
+			 * the pte invalid in between.
+			 */
+			old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
+			/*
+			 * new value of pte
+			 */
+			new_pte = old_pte | set;
+			radix__flush_tlb_page_psize(mm, address, psize);
+			__radix_pte_update(ptep, _PAGE_INVALID, new_pte);
+		} else {
+			__radix_pte_update(ptep, 0, set);
+		}
 	} else {
-		__radix_pte_update(ptep, 0, set);
+		if (radix__mm_has_core_atomic_pte_update(mm))
+			__radix_pte_update_atomic(ptep, 0, set);
+		else
+			__radix_pte_update_nonatomic(ptep, 0, set);
 		/*
 		 * Book3S does not require a TLB flush when relaxing access
 		 * restrictions when the address space is not attached to a
-- 
2.23.0



More information about the Linuxppc-dev mailing list