[2/4] RFC: SLB Rewrite (unify do_slb_bolted and slb_allocate)
David Gibson
david at gibson.dropbear.id.au
Wed Jul 7 16:06:48 EST 2004
Unify do_slb_bolted with the general SLB miss path. There is now one
SLB miss handler, in assembler, and called with only the low-level
exception prolog (EXCEPTION_PROLOG_[PI]SERIES rather than
EXCEPTION_PROLOG_COMMON) and minimal extra save/restore logic.
Index: working-2.6/arch/ppc64/kernel/asm-offsets.c
===================================================================
--- working-2.6.orig/arch/ppc64/kernel/asm-offsets.c
+++ working-2.6/arch/ppc64/kernel/asm-offsets.c
@@ -93,6 +93,7 @@
DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+ DEFINE(PACASLBR3, offsetof(struct paca_struct, slb_r3));
#ifdef CONFIG_HUGETLB_PAGE
DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs));
#endif /* CONFIG_HUGETLB_PAGE */
Index: working-2.6/arch/ppc64/kernel/head.S
===================================================================
--- working-2.6.orig/arch/ppc64/kernel/head.S
+++ working-2.6/arch/ppc64/kernel/head.S
@@ -200,6 +200,7 @@
#define EX_R13 32
#define EX_SRR0 40
#define EX_DAR 48
+#define EX_LR 48 /* SLB miss saves LR, but not DAR */
#define EX_DSISR 56
#define EX_CCR 60
@@ -433,18 +434,16 @@
.globl DataAccessSLB_Pseries
DataAccessSLB_Pseries:
mtspr SPRG1,r13
- mtspr SPRG2,r12
- mfspr r13,DAR
- mfcr r12
- srdi r13,r13,60
- cmpdi r13,0xc
- beq .do_slb_bolted_Pseries
- mtcrf 0x80,r12
- mfspr r12,SPRG2
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, DataAccessSLB_common)
+ EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, data_slb_Pseries)
STD_EXCEPTION_PSERIES(0x400, InstructionAccess)
- STD_EXCEPTION_PSERIES(0x480, InstructionAccessSLB)
+
+ . = 0x480
+ .globl InstructionAccessSLB_Pseries
+InstructionAccessSLB_Pseries:
+ mtspr SPRG1,r13
+ EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, instr_slb_Pseries)
+
STD_EXCEPTION_PSERIES(0x500, HardwareInterrupt)
STD_EXCEPTION_PSERIES(0x600, Alignment)
STD_EXCEPTION_PSERIES(0x700, ProgramCheck)
@@ -494,10 +493,6 @@
mfspr r12,SPRG2
EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted)
-_GLOBAL(do_slb_bolted_Pseries)
- mtcrf 0x80,r12
- mfspr r12,SPRG2
- EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_slb_bolted)
/* Space for the naca. Architected to be located at real address
* NACA_PHYS_ADDR. Various tools rely on this location being fixed.
@@ -586,27 +581,23 @@
.globl DataAccessSLB_Iseries
DataAccessSLB_Iseries:
mtspr SPRG1,r13 /* save r13 */
- mtspr SPRG2,r12
- mfspr r13,DAR
- mfcr r12
- srdi r13,r13,60
- cmpdi r13,0xc
- beq .do_slb_bolted_Iseries
- mtcrf 0x80,r12
- mfspr r12,SPRG2
- EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN)
+ EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
EXCEPTION_PROLOG_ISERIES_2
- b DataAccessSLB_common
+ std r3,PACASLBR3(r13)
+ mfspr r3,DAR
+ b .do_slb_miss
-.do_slb_bolted_Iseries:
- mtcrf 0x80,r12
- mfspr r12,SPRG2
+ STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN)
+
+ .globl InstructionAccessSLB_Iseries
+InstructionAccessSLB_Iseries:
+ mtspr SPRG1,r13 /* save r13 */
EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB)
EXCEPTION_PROLOG_ISERIES_2
- b .do_slb_bolted
+ std r3,PACASLBR3(r13)
+ mr r3,r11
+ b .do_slb_miss
- STD_EXCEPTION_ISERIES(0x400, InstructionAccess, PACA_EXGEN)
- STD_EXCEPTION_ISERIES(0x480, InstructionAccessSLB, PACA_EXGEN)
MASKABLE_EXCEPTION_ISERIES(0x500, HardwareInterrupt)
STD_EXCEPTION_ISERIES(0x600, Alignment, PACA_EXGEN)
STD_EXCEPTION_ISERIES(0x700, ProgramCheck, PACA_EXGEN)
@@ -864,17 +855,6 @@
b .do_hash_page /* Try to handle as hpte fault */
.align 7
- .globl DataAccessSLB_common
-DataAccessSLB_common:
- mfspr r10,DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- std r3,_DAR(r1)
- bl .slb_allocate
- b fast_exception_return
-
- .align 7
.globl InstructionAccess_common
InstructionAccess_common:
EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
@@ -884,14 +864,6 @@
b .do_hash_page /* Try to handle as hpte fault */
.align 7
- .globl InstructionAccessSLB_common
-InstructionAccessSLB_common:
- EXCEPTION_PROLOG_COMMON(0x480, PACA_EXGEN)
- ld r3,_NIP(r1) /* SRR0 = NIA */
- bl .slb_allocate
- b fast_exception_return
-
- .align 7
.globl HardwareInterrupt_common
.globl HardwareInterrupt_entry
HardwareInterrupt_common:
@@ -1137,130 +1109,52 @@
ld r13,PACA_EXSLB+EX_R13(r13)
rfid
+data_slb_Pseries:
+ std r3,PACASLBR3(r13)
+ mfspr r3,DAR
+ b .do_slb_miss
+
+instr_slb_Pseries:
+ std r3,PACASLBR3(r13)
+ mr r3,r11 /* prolog stored SRR0 in r11 */
+ b .do_slb_miss
+
/*
* r13 points to the PACA, r9 contains the saved CR,
* r11 and r12 contain the saved SRR0 and SRR1.
+ * r3 has the faulting address
* r9 - r13 are saved in paca->exslb.
+ * r3 is saved in paca->slb_r3
* We assume we aren't going to take any exceptions during this procedure.
*/
-_GLOBAL(do_slb_bolted)
+_GLOBAL(do_slb_miss)
+ mflr r10
+
stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */
+ std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
- /*
- * We take the next entry, round robin. Previously we tried
- * to find a free slot first but that took too long. Unfortunately
- * we dont have any LRU information to help us choose a slot.
- */
-
- /* r13 = paca */
- /* use a cpu feature mask if we ever change our slb size */
-1: ld r10,PACASTABRR(r13)
- addi r10,r10,1
- cmpdi r10,SLB_NUM_ENTRIES
- blt+ 2f
- li r10,SLB_NUM_BOLTED /* dont touch bolted slots */
-2: std r10,PACASTABRR(r13)
-
- /* r13 = paca, r10 = entry */
-
- /*
- * Never cast out the segment for our kernel stack. Since we
- * dont invalidate the ERAT we could have a valid translation
- * for the kernel stack during the first part of exception exit
- * which gets invalidated due to a tlbie from another cpu at a
- * non recoverable point (after setting srr0/1) - Anton
- */
- slbmfee r9,r10
- srdi r9,r9,27
- /*
- * Use paca->ksave as the value of the kernel stack pointer,
- * because this is valid at all times.
- * The >> 27 (rather than >> 28) is so that the LSB is the
- * valid bit - this way we check valid and ESID in one compare.
- * In order to completely close the tiny race in the context
- * switch (between updating r1 and updating paca->ksave),
- * we check against both r1 and paca->ksave.
- */
- srdi r11,r1,27
- ori r11,r11,1
- cmpd r11,r9
- beq- 1b
- ld r11,PACAKSAVE(r13)
- srdi r11,r11,27
- ori r11,r11,1
- cmpd r11,r9
- beq- 1b
-
- /* r13 = paca, r10 = entry */
-
- /* (((ea >> 28) & 0x1fff) << 15) | (ea >> 60) */
- mfspr r9,DAR
- rldicl r11,r9,36,51
- sldi r11,r11,15
- srdi r9,r9,60
- or r11,r11,r9
-
- /* VSID_RANDOMIZER */
- li r9,9
- sldi r9,r9,32
- oris r9,r9,58231
- ori r9,r9,39831
-
- /* vsid = (ordinal * VSID_RANDOMIZER) & VSID_MASK */
- mulld r11,r11,r9
- clrldi r11,r11,28
-
- /* r13 = paca, r10 = entry, r11 = vsid */
-
- /* Put together slb word1 */
- sldi r11,r11,12
-
-BEGIN_FTR_SECTION
- /* set kp and c bits */
- ori r11,r11,0x480
-END_FTR_SECTION_IFCLR(CPU_FTR_16M_PAGE)
-BEGIN_FTR_SECTION
- /* set kp, l and c bits */
- ori r11,r11,0x580
-END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
-
- /* r13 = paca, r10 = entry, r11 = slb word1 */
-
- /* Put together slb word0 */
- mfspr r9,DAR
- clrrdi r9,r9,28 /* get the new esid */
- oris r9,r9,0x800 /* set valid bit */
- rldimi r9,r10,0,52 /* insert entry */
-
- /* r13 = paca, r9 = slb word0, r11 = slb word1 */
-
- /*
- * No need for an isync before or after this slbmte. The exception
- * we enter with and the rfid we exit with are context synchronizing .
- */
- slbmte r11,r9
+ bl .slb_allocate /* handle it */
/* All done -- return from exception. */
+
+ ld r10,PACA_EXSLB+EX_LR(r13)
+ ld r3,PACASLBR3(r13)
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */
+ mtlr r10
+
andi. r10,r12,MSR_RI /* check for unrecoverable exception */
beq- unrecov_slb
- /*
- * Until everyone updates binutils hardwire the POWER4 optimised
- * single field mtcrf
- */
-#if 0
- .machine push
- .machine "power4"
+.machine push
+.machine "power4"
mtcrf 0x80,r9
- .machine pop
-#else
- .long 0x7d380120
-#endif
+ mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
+.machine pop
+ /* Clear RI */
mfmsr r10
clrrdi r10,r10,2
mtmsrd r10,1
Index: working-2.6/include/asm-ppc64/paca.h
===================================================================
--- working-2.6.orig/include/asm-ppc64/paca.h
+++ working-2.6/include/asm-ppc64/paca.h
@@ -78,6 +78,7 @@
u64 exmc[8]; /* used for machine checks */
u64 exslb[8]; /* used for SLB/segment table misses
* on the linear mapping */
+ u64 slb_r3; /* spot to save R3 on SLB miss */
mm_context_t context;
u16 slb_cache[SLB_CACHE_ENTRIES];
u16 slb_cache_ptr;
--
David Gibson | For every complex problem there is a
david AT gibson.dropbear.id.au | solution which is simple, neat and
| wrong.
http://www.ozlabs.org/people/dgibson
** Sent via the linuxppc64-dev mail list. See http://lists.linuxppc.org/
More information about the Linuxppc64-dev
mailing list