[PATCH v2 1/9] Revert "powerpc/64s/hash: remove user SLB data from the paca"

Nicholas Piggin npiggin at gmail.com
Wed Oct 3 00:27:51 AEST 2018


This reverts commit 8fed04d0f6aedf99b3d811ba58d38bb7f938a47a.

There are a number of problems with this patch, there are minor bugs
like vmalloc_sllp update no longer being picked up into pacas, but
more fundamentally the SLB flush can not be broadcast out to other
CPUs because it must be done in places where interrupts can be
disabled, which would deadlock.

Rather than try put some bandaids on this, the best fix is to revert
it. I think it's a worthwhile approach, but it should be solved by
demotig things eagerly at setup or map time, rather than in low level
hash faults. Most cases are rarely used by software or apply to very
old hardware.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 -
 arch/powerpc/include/asm/paca.h               | 13 ++++++
 arch/powerpc/kernel/asm-offsets.c             |  9 ++++
 arch/powerpc/kernel/paca.c                    | 22 +++++++++
 arch/powerpc/mm/hash_utils_64.c               | 46 ++++++++++++++-----
 arch/powerpc/mm/mmu_context.c                 |  3 +-
 arch/powerpc/mm/slb.c                         | 20 +-------
 arch/powerpc/mm/slice.c                       | 29 ++++++++----
 8 files changed, 103 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index bbeaf6adf93c..14e552ea5e52 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -503,7 +503,6 @@ struct slb_entry {
 };
 
 extern void slb_initialize(void);
-extern void core_flush_all_slbs(struct mm_struct *mm);
 extern void slb_flush_and_rebolt(void);
 void slb_flush_all_realmode(void);
 void __slb_restore_bolted_realmode(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 6d6b3706232c..4838149ee07b 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -146,6 +146,18 @@ struct paca_struct {
 	struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
+#ifdef CONFIG_PPC_BOOK3S
+	mm_context_id_t mm_ctx_id;
+#ifdef CONFIG_PPC_MM_SLICES
+	unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
+	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned long mm_ctx_slb_addr_limit;
+#else
+	u16 mm_ctx_user_psize;
+	u16 mm_ctx_sllp;
+#endif
+#endif
+
 	/*
 	 * then miscellaneous read-write fields
 	 */
@@ -248,6 +260,7 @@ struct paca_struct {
 #endif /* CONFIG_PPC_BOOK3S_64 */
 } ____cacheline_aligned;
 
+extern void copy_mm_to_paca(struct mm_struct *mm);
 extern struct paca_struct **paca_ptrs;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index ba9d0fc98730..27c78b6c9909 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -180,6 +180,15 @@ int main(void)
 	OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
 	OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
 	OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
+#ifdef CONFIG_PPC_BOOK3S
+	OFFSET(PACACONTEXTID, paca_struct, mm_ctx_id);
+#ifdef CONFIG_PPC_MM_SLICES
+	OFFSET(PACALOWSLICESPSIZE, paca_struct, mm_ctx_low_slices_psize);
+	OFFSET(PACAHIGHSLICEPSIZE, paca_struct, mm_ctx_high_slices_psize);
+	OFFSET(PACA_SLB_ADDR_LIMIT, paca_struct, mm_ctx_slb_addr_limit);
+	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
+#endif /* CONFIG_PPC_MM_SLICES */
+#endif
 
 #ifdef CONFIG_PPC_BOOK3E
 	OFFSET(PACAPGD, paca_struct, pgd);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0cf84e30d1cd..0ee3e6d50f28 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -258,3 +258,25 @@ void __init free_unused_pacas(void)
 	printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
 			paca_ptrs_size + paca_struct_size, nr_cpu_ids);
 }
+
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_BOOK3S
+	mm_context_t *context = &mm->context;
+
+	get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+	VM_BUG_ON(!mm->context.slb_addr_limit);
+	get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
+	memcpy(&get_paca()->mm_ctx_low_slices_psize,
+	       &context->low_slices_psize, sizeof(context->low_slices_psize));
+	memcpy(&get_paca()->mm_ctx_high_slices_psize,
+	       &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+#else /* CONFIG_PPC_MM_SLICES */
+	get_paca()->mm_ctx_user_psize = context->user_psize;
+	get_paca()->mm_ctx_sllp = context->sllp;
+#endif
+#else /* !CONFIG_PPC_BOOK3S */
+	return;
+#endif
+}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 88c95dc8b141..f23a89d8e4ce 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1088,16 +1088,16 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
-static unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
+static unsigned int get_paca_psize(unsigned long addr)
 {
 	unsigned char *psizes;
 	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		psizes = mm->context.low_slices_psize;
+		psizes = get_paca()->mm_ctx_low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
 	} else {
-		psizes = mm->context.high_slices_psize;
+		psizes = get_paca()->mm_ctx_high_slices_psize;
 		index = GET_HIGH_SLICE_INDEX(addr);
 	}
 	mask_index = index & 0x1;
@@ -1105,9 +1105,9 @@ static unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
 }
 
 #else
-unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
+unsigned int get_paca_psize(unsigned long addr)
 {
-	return mm->context.user_psize;
+	return get_paca()->mm_ctx_user_psize;
 }
 #endif
 
@@ -1118,11 +1118,15 @@ unsigned int get_psize(struct mm_struct *mm, unsigned long addr)
 #ifdef CONFIG_PPC_64K_PAGES
 void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 {
-	if (get_psize(mm, addr) == MMU_PAGE_4K)
+	if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
 		return;
 	slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
 	copro_flush_all_slbs(mm);
-	core_flush_all_slbs(mm);
+	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
+
+		copy_mm_to_paca(mm);
+		slb_flush_and_rebolt();
+	}
 }
 #endif /* CONFIG_PPC_64K_PAGES */
 
@@ -1187,6 +1191,22 @@ void hash_failure_debug(unsigned long ea, unsigned long access,
 		trap, vsid, ssize, psize, lpsize, pte);
 }
 
+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+			     int psize, bool user_region)
+{
+	if (user_region) {
+		if (psize != get_paca_psize(ea)) {
+			copy_mm_to_paca(mm);
+			slb_flush_and_rebolt();
+		}
+	} else if (get_paca()->vmalloc_sllp !=
+		   mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+		get_paca()->vmalloc_sllp =
+			mmu_psize_defs[mmu_vmalloc_psize].sllp;
+		slb_vmalloc_update();
+	}
+}
+
 /* Result code is:
  *  0 - handled
  *  1 - normal page fault
@@ -1219,7 +1239,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 			rc = 1;
 			goto bail;
 		}
-		psize = get_psize(mm, ea);
+		psize = get_slice_psize(mm, ea);
 		ssize = user_segment_size(ea);
 		vsid = get_user_vsid(&mm->context, ea, ssize);
 		break;
@@ -1307,6 +1327,9 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 			WARN_ON(1);
 		}
 #endif
+		if (current->mm == mm)
+			check_paca_psize(ea, mm, psize, user_region);
+
 		goto bail;
 	}
 
@@ -1341,14 +1364,15 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 			       "to 4kB pages because of "
 			       "non-cacheable mapping\n");
 			psize = mmu_vmalloc_psize = MMU_PAGE_4K;
-			slb_vmalloc_update();
 			copro_flush_all_slbs(mm);
-			core_flush_all_slbs(mm);
 		}
 	}
 
 #endif /* CONFIG_PPC_64K_PAGES */
 
+	if (current->mm == mm)
+		check_paca_psize(ea, mm, psize, user_region);
+
 #ifdef CONFIG_PPC_64K_PAGES
 	if (psize == MMU_PAGE_64K)
 		rc = __hash_page_64K(ea, access, vsid, ptep, trap,
@@ -1436,7 +1460,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
 #ifdef CONFIG_PPC_MM_SLICES
 static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
 {
-	int psize = get_psize(mm, ea);
+	int psize = get_slice_psize(mm, ea);
 
 	/* We only prefault standard pages for now */
 	if (unlikely(psize != mm->context.user_psize))
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 28ae2835db3d..f84e14f23e50 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -54,7 +54,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 * MMU context id, which is then moved to SPRN_PID.
 		 *
 		 * For the hash MMU it is either the first load from slb_cache
-		 * in switch_slb(), and/or load of MMU context id.
+		 * in switch_slb(), and/or the store of paca->mm_ctx_id in
+		 * copy_mm_to_paca().
 		 *
 		 * On the other side, the barrier is in mm/tlb-radix.c for
 		 * radix which orders earlier stores to clear the PTEs vs
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index b438220c4336..74c3b6f8e9b7 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -430,6 +430,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 	}
 	get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
 
+	copy_mm_to_paca(mm);
+
 	/*
 	 * We gradually age out SLBs after a number of context switches to
 	 * reduce reload overhead of unused entries (like we do with FP/VEC
@@ -460,24 +462,6 @@ void slb_set_size(u16 size)
 	mmu_slb_size = size;
 }
 
-static void cpu_flush_slb(void *parm)
-{
-	struct mm_struct *mm = parm;
-	unsigned long flags;
-
-	if (mm != current->active_mm)
-		return;
-
-	local_irq_save(flags);
-	slb_flush_and_rebolt();
-	local_irq_restore(flags);
-}
-
-void core_flush_all_slbs(struct mm_struct *mm)
-{
-	on_each_cpu(cpu_flush_slb, mm, 1);
-}
-
 void slb_initialize(void)
 {
 	unsigned long linear_llp, vmalloc_llp, io_llp;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index fc5b3a1ec666..546dd07c8083 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -207,6 +207,23 @@ static bool slice_check_range_fits(struct mm_struct *mm,
 	return true;
 }
 
+static void slice_flush_segments(void *parm)
+{
+#ifdef CONFIG_PPC64
+	struct mm_struct *mm = parm;
+	unsigned long flags;
+
+	if (mm != current->active_mm)
+		return;
+
+	copy_mm_to_paca(current->active_mm);
+
+	local_irq_save(flags);
+	slb_flush_and_rebolt();
+	local_irq_restore(flags);
+#endif
+}
+
 static void slice_convert(struct mm_struct *mm,
 				const struct slice_mask *mask, int psize)
 {
@@ -272,9 +289,6 @@ static void slice_convert(struct mm_struct *mm,
 	spin_unlock_irqrestore(&slice_convert_lock, flags);
 
 	copro_flush_all_slbs(mm);
-#ifdef CONFIG_PPC64
-	core_flush_all_slbs(mm);
-#endif
 }
 
 /*
@@ -488,9 +502,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		 * be already initialised beyond the old address limit.
 		 */
 		mm->context.slb_addr_limit = high_limit;
-#ifdef CONFIG_PPC64
-		core_flush_all_slbs(mm);
-#endif
+
+		on_each_cpu(slice_flush_segments, mm, 1);
 	}
 
 	/* Sanity checks */
@@ -652,10 +665,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
 		(SLICE_NUM_HIGH &&
 		 !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
 		slice_convert(mm, &potential_mask, psize);
-#ifdef CONFIG_PPC64
 		if (psize > MMU_PAGE_BASE)
-			core_flush_all_slbs(mm);
-#endif
+			on_each_cpu(slice_flush_segments, mm, 1);
 	}
 	return newaddr;
 
-- 
2.18.0



More information about the Linuxppc-dev mailing list