[2/4] RFC: SLB Rewrite (unify do_slb_bolted and slb_allocate)

David Gibson david at gibson.dropbear.id.au
Wed Jul 7 16:06:48 EST 2004


Unify do_slb_bolted with the general SLB miss path.  There is now one
SLB miss handler, in assembler, and called with only the low-level
exception prolog (EXCEPTION_PROLOG_[PI]SERIES rather than
EXCEPTION_PROLOG_COMMON) and minimal extra save/restore logic.

Index: working-2.6/arch/ppc64/kernel/asm-offsets.c
===================================================================
--- working-2.6.orig/arch/ppc64/kernel/asm-offsets.c
+++ working-2.6/arch/ppc64/kernel/asm-offsets.c
@@ -93,6 +93,7 @@
 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+	DEFINE(PACASLBR3, offsetof(struct paca_struct, slb_r3));
 #ifdef CONFIG_HUGETLB_PAGE
 	DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
 #endif /* CONFIG_HUGETLB_PAGE */
Index: working-2.6/arch/ppc64/kernel/head.S
===================================================================
--- working-2.6.orig/arch/ppc64/kernel/head.S
+++ working-2.6/arch/ppc64/kernel/head.S
@@ -200,6 +200,7 @@
 #define EX_R13		32
 #define EX_SRR0		40
 #define EX_DAR		48
+#define EX_LR		48	/* SLB miss saves LR, but not DAR */
 #define EX_DSISR	56
 #define EX_CCR		60

@@ -433,18 +434,16 @@
 	.globl DataAccessSLB_Pseries
 DataAccessSLB_Pseries:
 	mtspr	SPRG1,r13
-	mtspr	SPRG2,r12
-	mfspr	r13,DAR
-	mfcr	r12
-	srdi	r13,r13,60
-	cmpdi	r13,0xc
-	beq	.do_slb_bolted_Pseries
-	mtcrf	0x80,r12
-	mfspr	r12,SPRG2
-	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, DataAccessSLB_common)
+	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, data_slb_Pseries)

 	STD_EXCEPTION_PSERIES(0x400, InstructionAccess)
-	STD_EXCEPTION_PSERIES(0x480, InstructionAccessSLB)
+
+	. = 0x480
+	.globl InstructionAccessSLB_Pseries
+InstructionAccessSLB_Pseries:
+	mtspr	SPRG1,r13
+	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, instr_slb_Pseries)
+
 	STD_EXCEPTION_PSERIES(0x500, HardwareInterrupt)
 	STD_EXCEPTION_PSERIES(0x600, Alignment)
 	STD_EXCEPTION_PSERIES(0x700, ProgramCheck)
@@ -494,10 +493,6 @@
 	mfspr	r12,SPRG2
 	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)

-_GLOBAL(do_slb_bolted_Pseries)
-	mtcrf	0x80,r12
-	mfspr	r12,SPRG2
-	EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_slb_bolted)

 	/* Space for the naca.  Architected to be located at real address
 	 * NACA_PHYS_ADDR.  Various tools rely on this location being fixed.
@@ -586,27 +581,23 @@
 	.globl	DataAccessSLB_Iseries
 DataAccessSLB_Iseries:
 	mtspr	SPRG1,r13		/* save r13 */
-	mtspr	SPRG2,r12
-	mfspr	r13,DAR
-	mfcr	r12
-	srdi	r13,r13,60
-	cmpdi	r13,0xc
-	beq	.do_slb_bolted_Iseries
-	mtcrf	0x80,r12
-	mfspr	r12,SPRG2
-	EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
+	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
 	EXCEPTION_PROLOG_ISERIES_2
-	b	DataAccessSLB_common
+	std	r3,PACASLBR3(r13)
+	mfspr	r3,DAR
+	b	.do_slb_miss

-.do_slb_bolted_Iseries:
-	mtcrf	0x80,r12
-	mfspr	r12,SPRG2
+	STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN)
+
+	.globl	InstructionAccessSLB_Iseries
+InstructionAccessSLB_Iseries:
+	mtspr	SPRG1,r13		/* save r13 */
 	EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
 	EXCEPTION_PROLOG_ISERIES_2
-	b	.do_slb_bolted
+	std	r3,PACASLBR3(r13)
+	mr	r3,r11
+	b	.do_slb_miss

-	STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN)
-	STD_EXCEPTION_ISERIES(0x480, InstructionAccessSLB, PACA_EXGEN)
 	MASKABLE_EXCEPTION_ISERIES(0x500, HardwareInterrupt)
 	STD_EXCEPTION_ISERIES(0x600, Alignment, PACA_EXGEN)
 	STD_EXCEPTION_ISERIES(0x700, ProgramCheck, PACA_EXGEN)
@@ -864,17 +855,6 @@
 	b	.do_hash_page	 	/* Try to handle as hpte fault */

 	.align	7
-	.globl DataAccessSLB_common
-DataAccessSLB_common:
-	mfspr	r10,DAR
-	std	r10,PACA_EXGEN+EX_DAR(r13)
-	EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
-	ld	r3,PACA_EXGEN+EX_DAR(r13)
-	std	r3,_DAR(r1)
-	bl	.slb_allocate
-	b	fast_exception_return
-
-	.align	7
 	.globl InstructionAccess_common
 InstructionAccess_common:
 	EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
@@ -884,14 +864,6 @@
 	b	.do_hash_page		/* Try to handle as hpte fault */

 	.align	7
-	.globl InstructionAccessSLB_common
-InstructionAccessSLB_common:
-	EXCEPTION_PROLOG_COMMON(0x480, PACA_EXGEN)
-	ld	r3,_NIP(r1)		/* SRR0 = NIA	*/
-	bl	.slb_allocate
-	b	fast_exception_return
-
-	.align	7
 	.globl HardwareInterrupt_common
 	.globl HardwareInterrupt_entry
 HardwareInterrupt_common:
@@ -1137,130 +1109,52 @@
 	ld	r13,PACA_EXSLB+EX_R13(r13)
 	rfid

+data_slb_Pseries:
+	std	r3,PACASLBR3(r13)
+	mfspr	r3,DAR
+	b	.do_slb_miss
+
+instr_slb_Pseries:
+	std	r3,PACASLBR3(r13)
+	mr	r3,r11		/* prolog stored SRR0 in r11 */
+	b	.do_slb_miss
+
 /*
  * r13 points to the PACA, r9 contains the saved CR,
  * r11 and r12 contain the saved SRR0 and SRR1.
+ * r3 has the faulting address
  * r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
  * We assume we aren't going to take any exceptions during this procedure.
  */
-_GLOBAL(do_slb_bolted)
+_GLOBAL(do_slb_miss)
+	mflr	r10
+
 	stw	r9,PACA_EXSLB+EX_CCR(r13)	/* save CR in exc. frame */
 	std	r11,PACA_EXSLB+EX_SRR0(r13)	/* save SRR0 in exc. frame */
+	std	r10,PACA_EXSLB+EX_LR(r13)	/* save LR */

-	/*
-	 * We take the next entry, round robin. Previously we tried
-	 * to find a free slot first but that took too long. Unfortunately
-	 * we dont have any LRU information to help us choose a slot.
-	 */
-
-	/* r13 = paca */
-	/* use a cpu feature mask if we ever change our slb size */
-1:	ld	r10,PACASTABRR(r13)
-	addi	r10,r10,1
-	cmpdi	r10,SLB_NUM_ENTRIES
-	blt+	2f
-	li	r10,SLB_NUM_BOLTED		/* dont touch bolted slots */
-2:	std	r10,PACASTABRR(r13)
-
-	/* r13 = paca, r10 = entry */
-
-	/*
-	 * Never cast out the segment for our kernel stack. Since we
-	 * dont invalidate the ERAT we could have a valid translation
-	 * for the kernel stack during the first part of exception exit
-	 * which gets invalidated due to a tlbie from another cpu at a
-	 * non recoverable point (after setting srr0/1) - Anton
-	 */
-	slbmfee	r9,r10
-	srdi	r9,r9,27
-	/*
-	 * Use paca->ksave as the value of the kernel stack pointer,
-	 * because this is valid at all times.
-	 * The >> 27 (rather than >> 28) is so that the LSB is the
-	 * valid bit - this way we check valid and ESID in one compare.
-	 * In order to completely close the tiny race in the context
-	 * switch (between updating r1 and updating paca->ksave),
-	 * we check against both r1 and paca->ksave.
-	 */
-	srdi	r11,r1,27
-	ori	r11,r11,1
-	cmpd	r11,r9
-	beq-	1b
-	ld	r11,PACAKSAVE(r13)
-	srdi	r11,r11,27
- 	ori	r11,r11,1
- 	cmpd	r11,r9
- 	beq-	1b
-
-	/* r13 = paca, r10 = entry */
-
-	/* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
-	mfspr	r9,DAR
-	rldicl	r11,r9,36,51
-	sldi	r11,r11,15
-	srdi	r9,r9,60
-	or	r11,r11,r9
-
-	/* VSID_RANDOMIZER */
-	li	r9,9
-	sldi	r9,r9,32
-	oris	r9,r9,58231
-	ori	r9,r9,39831
-
-	/* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */
-	mulld	r11,r11,r9
-	clrldi	r11,r11,28
-
-	/* r13 = paca, r10 = entry, r11 = vsid */
-
-	/* Put together slb word1 */
-	sldi	r11,r11,12
-
-BEGIN_FTR_SECTION
-	/* set kp and c bits */
-	ori	r11,r11,0x480
-END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
-BEGIN_FTR_SECTION
-	/* set kp, l and c bits */
-	ori	r11,r11,0x580
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-
-	/* r13 = paca, r10 = entry, r11 = slb word1 */
-
-	/* Put together slb word0 */
-	mfspr	r9,DAR
-	clrrdi	r9,r9,28	/* get the new esid */
-	oris	r9,r9,0x800	/* set valid bit */
-	rldimi	r9,r10,0,52	/* insert entry */
-
-	/* r13 = paca, r9 = slb word0, r11 = slb word1 */
-
-	/*
-	 * No need for an isync before or after this slbmte. The exception
-	 * we enter with and the rfid we exit with are context synchronizing .
-	 */
-	slbmte	r11,r9
+	bl	.slb_allocate			/* handle it */

 	/* All done -- return from exception. */
+
+	ld	r10,PACA_EXSLB+EX_LR(r13)
+	ld	r3,PACASLBR3(r13)
 	lwz	r9,PACA_EXSLB+EX_CCR(r13)	/* get saved CR */
 	ld	r11,PACA_EXSLB+EX_SRR0(r13)	/* get saved SRR0 */

+	mtlr	r10
+
 	andi.	r10,r12,MSR_RI	/* check for unrecoverable exception */
 	beq-	unrecov_slb

-	/*
-	 * Until everyone updates binutils hardwire the POWER4 optimised
-	 * single field mtcrf
-	 */
-#if 0
-	.machine	push
-	.machine	"power4"
+.machine	push
+.machine	"power4"
 	mtcrf	0x80,r9
-	.machine	pop
-#else
-	.long 0x7d380120
-#endif
+	mtcrf	0x01,r9		/* slb_allocate uses cr0 and cr7 */
+.machine	pop

+	/* Clear RI */
 	mfmsr	r10
 	clrrdi	r10,r10,2
 	mtmsrd	r10,1
Index: working-2.6/include/asm-ppc64/paca.h
===================================================================
--- working-2.6.orig/include/asm-ppc64/paca.h
+++ working-2.6/include/asm-ppc64/paca.h
@@ -78,6 +78,7 @@
 	u64 exmc[8];		/* used for machine checks */
 	u64 exslb[8];		/* used for SLB/segment table misses
 				 * on the linear mapping */
+	u64 slb_r3;		/* spot to save R3 on SLB miss */
 	mm_context_t context;
 	u16 slb_cache[SLB_CACHE_ENTRIES];
 	u16 slb_cache_ptr;

--
David Gibson			| For every complex problem there is a
david AT gibson.dropbear.id.au	| solution which is simple, neat and
				| wrong.
http://www.ozlabs.org/people/dgibson

** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/





More information about the Linuxppc64-dev mailing list