[PATCH 2/2] powerpc/64s/hash: add torture_hpt kernel boot option to increase hash faults

Nicholas Piggin npiggin at gmail.com
Sun May 3 18:22:36 AEST 2020


This option increases the number of hash misses by limiting the number of
kernel HPT entries. This helps stress test difficult to hit paths in the
kernel.

Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 .../admin-guide/kernel-parameters.txt         |  9 +++
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 10 +++
 arch/powerpc/mm/book3s64/hash_4k.c            |  3 +
 arch/powerpc/mm/book3s64/hash_64k.c           |  8 +++
 arch/powerpc/mm/book3s64/hash_utils.c         | 66 ++++++++++++++++++-
 5 files changed, 95 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5a34b7dd9ebe..1ec6a32a717a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -876,6 +876,15 @@
 			them frequently to increase the rate of SLB faults
 			on kernel addresses.
 
+	torture_hpt	[PPC]
+			Limits the number of kernel HPT entries in the hash
+			page table to increase the rate of hash page table
+			faults on kernel addresses.
+
+			This may hang when run on processors / emulators which
+			do not have a TLB, or flush it more often than
+			required, QEMU seems to have problems.
+
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.txt.
 
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 758de1e0f676..539e3d91eac4 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -324,6 +324,16 @@ static inline bool torture_slb(void)
 	return static_branch_unlikely(&torture_slb_key);
 }
 
+extern bool torture_hpt_enabled;
+DECLARE_STATIC_KEY_FALSE(torture_hpt_key);
+static inline bool torture_hpt(void)
+{
+	return static_branch_unlikely(&torture_hpt_key);
+}
+
+void hpt_do_torture(unsigned long ea, unsigned long access,
+		    unsigned long rflags, unsigned long hpte_group);
+
 /*
  * This computes the AVPN and B fields of the first dword of a HPTE,
  * for use when we want to match an existing PTE.  The bottom 7 bits
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 22e787123cdf..54e4ff8c558d 100644
--- a/arch/powerpc/mm/book3s64/hash_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -118,6 +118,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		}
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (torture_hpt())
+			hpt_do_torture(ea, access, rflags, hpte_group);
 	}
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 7084ce2951e6..19ea0fc145a9 100644
--- a/arch/powerpc/mm/book3s64/hash_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -216,6 +216,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
 	new_pte |= H_PAGE_HASHPTE;
 
+	if (torture_hpt())
+		hpt_do_torture(ea, access, rflags, hpte_group);
+
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
@@ -327,7 +330,12 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (torture_hpt())
+			hpt_do_torture(ea, access, rflags, hpte_group);
 	}
+
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
 	return 0;
 }
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9c487b5782ef..e9bdf825f897 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -353,8 +353,12 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 	return ret;
 }
 
-static bool disable_1tb_segments = false;
+static bool disable_1tb_segments __read_mostly = false;
 bool torture_slb_enabled __read_mostly = false;
+bool torture_hpt_enabled __read_mostly = false;
+
+/* per-CPU array allocated if we enable torture_hpt. */
+static unsigned long *torture_hpt_last_group;
 
 static int __init parse_disable_1tb_segments(char *p)
 {
@@ -370,6 +374,13 @@ static int __init parse_torture_slb(char *p)
 }
 early_param("torture_slb", parse_torture_slb);
 
+static int __init parse_torture_hpt(char *p)
+{
+	torture_hpt_enabled = true;
+	return 0;
+}
+early_param("torture_hpt", parse_torture_hpt);
+
 static int __init htab_dt_scan_seg_sizes(unsigned long node,
 					 const char *uname, int depth,
 					 void *data)
@@ -863,6 +874,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
 }
 
 DEFINE_STATIC_KEY_FALSE(torture_slb_key);
+DEFINE_STATIC_KEY_FALSE(torture_hpt_key);
 
 static void __init htab_initialize(void)
 {
@@ -882,6 +894,15 @@ static void __init htab_initialize(void)
 
 	if (torture_slb_enabled)
 		static_branch_enable(&torture_slb_key);
+	if (torture_hpt_enabled) {
+		unsigned long tmp;
+		static_branch_enable(&torture_hpt_key);
+		tmp = memblock_phys_alloc_range(sizeof(unsigned long) * NR_CPUS,
+						  0,
+						  0, MEMBLOCK_ALLOC_ANYWHERE);
+		memset((void *)tmp, 0xff, sizeof(unsigned long) * NR_CPUS);
+		torture_hpt_last_group = __va(tmp);
+	}
 
 	/*
 	 * Calculate the required size of the htab.  We want the number of
@@ -1901,6 +1922,49 @@ long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
 	return slot;
 }
 
+void hpt_do_torture(unsigned long ea, unsigned long access,
+		    unsigned long rflags, unsigned long hpte_group)
+{
+	unsigned long last_group;
+	int cpu = raw_smp_processor_id();
+
+	last_group = torture_hpt_last_group[cpu];
+	if (last_group != -1UL) {
+		while (mmu_hash_ops.hpte_remove(last_group) != -1)
+			;
+		torture_hpt_last_group[cpu] = -1UL;
+	}
+
+#define QEMU_WORKAROUND	0
+
+	if (ea >= PAGE_OFFSET) {
+		if (!QEMU_WORKAROUND && (access & (_PAGE_READ|_PAGE_WRITE)) &&
+		    !(rflags & (HPTE_R_I|HPTE_R_G))) {
+			/* prefetch / prefetchw does not seem to set up a TLB
+			 * entry with the powerpc systemsim (mambo) emulator,
+			 * though it works with real hardware. An alternative
+			 * approach that would work more reliably on quirky
+			 * emulators like QEMU may be to remember the last
+			 * insertion and remove that, rather than removing the
+			 * current insertion. Then no prefetch is required.
+			 */
+			if ((access & _PAGE_WRITE) && (access & _PAGE_READ))
+				atomic_add(0, (atomic_t *)(ea & ~0x3));
+			else if (access & _PAGE_READ)
+				*(volatile char *)ea;
+
+			mb();
+
+			while (mmu_hash_ops.hpte_remove(hpte_group) != -1)
+				;
+		} else {
+			/* Can't prefetch cache-inhibited so clear next time. */
+			torture_hpt_last_group[cpu] = hpte_group;
+		}
+	}
+}
+
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
 {
-- 
2.23.0



More information about the Linuxppc-dev mailing list