RFC: Use bitmaps to track free/user SLB slots
David Gibson
david at gibson.dropbear.id.au
Thu Dec 22 17:42:35 EST 2005
This needs way more testing and thought before being considered for
merging, but here it is in case people are interested. It implements
a new, possibly superior approach to managing SLB entries.
Currently, when we take an SLB miss, we just use round-robin to find a
slot to put the new entry into - the slot located may or may not
already contain a useful translation. When we take an SLB miss on a
user address we record its address in a cache of up to 16 entries. On
context switch, if the cache hasn't overflowed we use it to just flush
the user entries, rather than flushing the whole SLB.
With this patch, instead of maintaining the cache and round-robin
pointer, we keep a bitmap of free SLB slots, and a bitmap of SLB slots
containing user entries. When we take an SLB miss, we find a free
slot from the bitmap (using cntlzd) rather than using round robin. We
fall back to round robin if we use all free slots (though we do this
by manipulating the bitmap, avoiding the need for a separate round
robin counter). The SLB miss handler clears the relevant bit in the
free slots bitmap and updates the relevant bit in the user slots
bitmap. On context switch, we use the user slots bitmap to flush just
those slots containing user entries, and those slots are then added to
the free slots bitmap. The idea, obviously, is to try to reduce the
number of SLB misses by making better use of free SLB slots.
My preliminary tests (on POWER5 LPAR) seem to indicate that this has
essentially no effect (delta<1ns) on the time for a user SLB miss (the
cost of the bitmap manipulation is the same as that for maintaining
the old slb cache). Time for kernel SLB misses is probably slightly
increased; not measured, but I think it should be delta<~5ns. Context
switch time may be increased slightlyl; also not measured yet, but I
think it should be <0.5us at most and quite likely negligible in
comparison to the rest of a context switch. I've no idea what the
impact on SLB miss rates for various workloads might be.
Index: working-2.6/arch/powerpc/mm/slb_low.S
===================================================================
--- working-2.6.orig/arch/powerpc/mm/slb_low.S 2005-12-19 14:18:24.000000000 +1100
+++ working-2.6/arch/powerpc/mm/slb_low.S 2005-12-22 16:55:05.000000000 +1100
@@ -192,17 +192,8 @@ slb_finish_load:
beq 3f
#endif /* CONFIG_PPC_ISERIES */
- ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* use a cpu feature mask if we ever change our slb size */
- cmpldi r10,SLB_NUM_ENTRIES
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-
+ ld r9,PACASLBFREEBITMAP(r13)
+ cntlzd r10,r9
3:
rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
oris r10,r3,SLB_ESID_V at h /* r3 |= SLB_ESID_V */
@@ -215,26 +206,46 @@ slb_finish_load:
*/
slbmte r11,r10
- /* we're done for kernel addresses */
- crclr 4*cr0+eq /* set result to "success" */
- bgelr cr7
+ ld r3,PACASLBUSERBITMAP(r13)
+
+ li r11,1
+ sldi r11,r11,63 /* r11 = 0x8000000000000000 */
+ srd r11,r11,r10
+
+ andc. r9,r9,r11
+ andc r3,r3,r11
+ bne 7f
- /* Update the slb cache */
- lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r3,SLB_CACHE_ENTRIES
- bge 1f
-
- /* still room in the slb cache */
- sldi r11,r3,1 /* r11 = offset * sizeof(u16) */
- rldicl r10,r10,36,28 /* get low 16 bits of the ESID */
- add r11,r11,r13 /* r11 = (u16 *)paca + offset */
- sth r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r3,r3,1 /* offset++ */
- b 2f
-1: /* offset >= SLB_CACHE_ENTRIES */
- li r3,SLB_CACHE_ENTRIES+1
-2:
- sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
+ srdi. r9,r11,1
+ bne 7f
+
+ li r9,1
+ rotrdi r9,r9,SLB_NUM_BOLTED+1
+
+7: bge cr7,6f
+ or r3,r3,r11
+
+6: std r9,PACASLBFREEBITMAP(r13)
+ std r3,PACASLBUSERBITMAP(r13)
crclr 4*cr0+eq /* set result to "success" */
blr
+/* void slb_flush_user_slots(u64 slots) */
+_GLOBAL(slb_flush_user_slots)
+ li r6,-1
+ srdi r6,r6,1 /* r6 = 0x7fffffffffffffff */
+
+1:
+ cmpldi r3,0
+ beqlr /* Nothing left, we're done */
+
+ cntlzd r4,r3
+ slbmfee r5,r4
+ /* V bit from slbmfee becomes class bit for slbie, since user
+ * SLBEs have the class bit set */
+ slbie r5
+
+ srd r7,r6,r4 /* r7 = bits we still care about */
+ and r3,r3,r7
+
+ b 1b
Index: working-2.6/include/asm-powerpc/paca.h
===================================================================
--- working-2.6.orig/include/asm-powerpc/paca.h 2005-12-22 16:30:32.000000000 +1100
+++ working-2.6/include/asm-powerpc/paca.h 2005-12-22 16:56:33.000000000 +1100
@@ -83,15 +83,14 @@ struct paca_struct {
#endif /* CONFIG_PPC_64K_PAGES */
mm_context_t context;
- u16 slb_cache[SLB_CACHE_ENTRIES];
- u16 slb_cache_ptr;
+ u64 slb_free_bitmap;
+ u64 slb_user_bitmap;
/*
* then miscellaneous read-write fields
*/
struct task_struct *__current; /* Pointer to current */
u64 kstack; /* Saved Kernel stack addr */
- u64 stab_rr; /* stab/slb round-robin counter */
u64 saved_r1; /* r1 save for RTAS calls */
u64 saved_msr; /* MSR saved here by enter_rtas */
u8 proc_enabled; /* irq soft-enable flag */
Index: working-2.6/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- working-2.6.orig/arch/powerpc/kernel/asm-offsets.c 2005-12-22 16:30:32.000000000 +1100
+++ working-2.6/arch/powerpc/kernel/asm-offsets.c 2005-12-22 16:56:39.000000000 +1100
@@ -117,12 +117,11 @@ int main(void)
DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
- DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled));
- DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
- DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
+ DEFINE(PACASLBFREEBITMAP, offsetof(struct paca_struct, slb_free_bitmap));
+ DEFINE(PACASLBUSERBITMAP, offsetof(struct paca_struct, slb_user_bitmap));
DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
#ifdef CONFIG_PPC_64K_PAGES
DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
Index: working-2.6/arch/powerpc/mm/slb.c
===================================================================
--- working-2.6.orig/arch/powerpc/mm/slb.c 2005-12-19 14:18:24.000000000 +1100
+++ working-2.6/arch/powerpc/mm/slb.c 2005-12-22 16:57:10.000000000 +1100
@@ -32,6 +32,7 @@
extern void slb_allocate_realmode(unsigned long ea);
extern void slb_allocate_user(unsigned long ea);
+extern void slb_flush_user_slots(u64 slots);
static void slb_allocate(unsigned long ea)
{
@@ -92,35 +93,27 @@ static void slb_flush_and_rebolt(void)
"r"(mk_vsid_data(ksp_esid_data, lflags)),
"r"(ksp_esid_data)
: "memory");
+
+ get_paca()->slb_user_bitmap = 0;
}
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
- unsigned long offset = get_paca()->slb_cache_ptr;
- unsigned long esid_data = 0;
+ u64 slots = get_paca()->slb_user_bitmap;
unsigned long pc = KSTK_EIP(tsk);
unsigned long stack = KSTK_ESP(tsk);
unsigned long unmapped_base;
- if (offset <= SLB_CACHE_ENTRIES) {
- int i;
- asm volatile("isync" : : : "memory");
- for (i = 0; i < offset; i++) {
- esid_data = ((unsigned long)get_paca()->slb_cache[i]
- << SID_SHIFT) | SLBIE_C;
- asm volatile("slbie %0" : : "r" (esid_data));
- }
- asm volatile("isync" : : : "memory");
- } else {
- slb_flush_and_rebolt();
- }
+ slb_flush_user_slots(slots);
/* Workaround POWER5 < DD2.1 issue */
- if (offset == 1 || offset > SLB_CACHE_ENTRIES)
- asm volatile("slbie %0" : : "r" (esid_data));
+ if (slots) {
+ asm volatile("slbie %0" : : "r" (0xa000000000000000));
+ get_paca()->slb_free_bitmap |= slots;
+ }
- get_paca()->slb_cache_ptr = 0;
+ get_paca()->slb_user_bitmap = 0;
get_paca()->context = mm->context;
#ifdef CONFIG_PPC_64K_PAGES
get_paca()->pgdir = mm->pgd;
@@ -225,6 +218,4 @@ void slb_initialize(void)
asm volatile("isync":::"memory");
}
#endif /* CONFIG_PPC_ISERIES */
-
- get_paca()->stab_rr = SLB_NUM_BOLTED;
}
Index: working-2.6/arch/powerpc/kernel/paca.c
===================================================================
--- working-2.6.orig/arch/powerpc/kernel/paca.c 2005-12-22 16:30:32.000000000 +1100
+++ working-2.6/arch/powerpc/kernel/paca.c 2005-12-22 16:30:32.000000000 +1100
@@ -64,7 +64,8 @@ struct lppaca lppaca[] = {
.stab_real = (asrr), /* Real pointer to segment table */ \
.stab_addr = (asrv), /* Virt pointer to segment table */ \
.cpu_start = (start), /* Processor start */ \
- .hw_cpu_id = 0xffff,
+ .hw_cpu_id = 0xffff, \
+ .slb_free_bitmap = (-1UL >> SLB_NUM_BOLTED),
#ifdef CONFIG_PPC_ISERIES
#define PACA_INIT_ISERIES(number) \
Index: working-2.6/arch/powerpc/mm/stab.c
===================================================================
--- working-2.6.orig/arch/powerpc/mm/stab.c 2005-12-19 14:18:24.000000000 +1100
+++ working-2.6/arch/powerpc/mm/stab.c 2005-12-22 16:58:39.000000000 +1100
@@ -28,6 +28,7 @@ struct stab_entry {
};
#define NR_STAB_CACHE_ENTRIES 8
+DEFINE_PER_CPU(unsigned long, stab_rr);
DEFINE_PER_CPU(long, stab_cache_ptr);
DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
@@ -70,7 +71,7 @@ static int make_ste(unsigned long stab,
* Could not find empty entry, pick one with a round robin selection.
* Search all entries in the two groups.
*/
- castout_entry = get_paca()->stab_rr;
+ castout_entry = __get_cpu_var(stab_rr);
for (i = 0; i < 16; i++) {
if (castout_entry < 8) {
global_entry = (esid & 0x1f) << 3;
@@ -89,7 +90,7 @@ static int make_ste(unsigned long stab,
castout_entry = (castout_entry + 1) & 0xf;
}
- get_paca()->stab_rr = (castout_entry + 1) & 0xf;
+ __get_cpu_var(stab_rr) = (castout_entry + 1) & 0xf;
/* Modify the old entry to the new value. */
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
More information about the Linuxppc64-dev
mailing list