RFC: Use bitmaps to track free/user SLB slots

David Gibson david at gibson.dropbear.id.au
Thu Dec 22 17:42:35 EST 2005


This needs way more testing and thought before being considered for
merging, but here it is in case people are interested.  It implements
a new, possibly superior approach to managing SLB entries.

Currently, when we take an SLB miss, we just use round-robin to find a
slot to put the new entry into - the slot located may or may not
already contain a useful translation.  When we take an SLB miss on a
user address we record its address in a cache of up to 16 entries.  On
context switch, if the cache hasn't overflowed we use it to just flush
the user entries, rather than flushing the whole SLB.

With this patch, instead of maintaining the cache and round-robin
pointer, we keep a bitmap of free SLB slots, and a bitmap of SLB slots
containing user entries.  When we take an SLB miss, we find a free
slot from the bitmap (using cntlzd) rather than using round robin.  We
fall back to round robin if we use all free slots (though we do this
by manipulating the bitmap, avoiding the need for a separate round
robin counter).  The SLB miss handler clears the relevant bit in the
free slots bitmap and updates the relevant bit in the user slots
bitmap.  On context switch, we use the user slots bitmap to flush just
those slots containing user entries, and those slots are then added to
the free slots bitmap.  The idea, obviously, is to try to reduce the
number of SLB misses by making better use of free SLB slots.

My preliminary tests (on POWER5 LPAR) seem to indicate that this has
essentially no effect (delta<1ns) on the time for a user SLB miss (the
cost of the bitmap manipulation is the same as that for maintaining
the old slb cache).  Time for kernel SLB misses is probably slightly
increased; not measured, but I think it should be delta<~5ns.  Context
switch time may be increased slightlyl; also not measured yet, but I
think it should be <0.5us at most and quite likely negligible in
comparison to the rest of a context switch.  I've no idea what the
impact on SLB miss rates for various workloads might be.

Index: working-2.6/arch/powerpc/mm/slb_low.S
===================================================================
--- working-2.6.orig/arch/powerpc/mm/slb_low.S	2005-12-19 14:18:24.000000000 +1100
+++ working-2.6/arch/powerpc/mm/slb_low.S	2005-12-22 16:55:05.000000000 +1100
@@ -192,17 +192,8 @@ slb_finish_load:
 	beq	3f
 #endif /* CONFIG_PPC_ISERIES */
 
-	ld	r10,PACASTABRR(r13)
-	addi	r10,r10,1
-	/* use a cpu feature mask if we ever change our slb size */
-	cmpldi	r10,SLB_NUM_ENTRIES
-
-	blt+	4f
-	li	r10,SLB_NUM_BOLTED
-
-4:
-	std	r10,PACASTABRR(r13)
-
+	ld	r9,PACASLBFREEBITMAP(r13)
+	cntlzd	r10,r9
 3:
 	rldimi	r3,r10,0,36		/* r3= EA[0:35] | entry */
 	oris	r10,r3,SLB_ESID_V at h	/* r3 |= SLB_ESID_V */
@@ -215,26 +206,46 @@ slb_finish_load:
 	 */
 	slbmte	r11,r10
 
-	/* we're done for kernel addresses */
-	crclr	4*cr0+eq		/* set result to "success" */
-	bgelr	cr7
+	ld	r3,PACASLBUSERBITMAP(r13)
+
+	li	r11,1
+	sldi	r11,r11,63		/* r11 = 0x8000000000000000 */
+	srd	r11,r11,r10
+
+	andc.	r9,r9,r11
+	andc	r3,r3,r11
+	bne	7f
 
-	/* Update the slb cache */
-	lhz	r3,PACASLBCACHEPTR(r13)	/* offset = paca->slb_cache_ptr */
-	cmpldi	r3,SLB_CACHE_ENTRIES
-	bge	1f
-
-	/* still room in the slb cache */
-	sldi	r11,r3,1		/* r11 = offset * sizeof(u16) */
-	rldicl	r10,r10,36,28		/* get low 16 bits of the ESID */
-	add	r11,r11,r13		/* r11 = (u16 *)paca + offset */
-	sth	r10,PACASLBCACHE(r11)	/* paca->slb_cache[offset] = esid */
-	addi	r3,r3,1			/* offset++ */
-	b	2f
-1:					/* offset >= SLB_CACHE_ENTRIES */
-	li	r3,SLB_CACHE_ENTRIES+1
-2:
-	sth	r3,PACASLBCACHEPTR(r13)	/* paca->slb_cache_ptr = offset */
+	srdi.	r9,r11,1
+	bne	7f
+
+	li	r9,1
+	rotrdi	r9,r9,SLB_NUM_BOLTED+1
+
+7:	bge	cr7,6f
+	or	r3,r3,r11
+
+6:	std	r9,PACASLBFREEBITMAP(r13)
+	std	r3,PACASLBUSERBITMAP(r13)
 	crclr	4*cr0+eq		/* set result to "success" */
 	blr
 
+/* void slb_flush_user_slots(u64 slots) */
+_GLOBAL(slb_flush_user_slots)
+	li	r6,-1
+	srdi	r6,r6,1			/* r6 = 0x7fffffffffffffff */
+
+1:
+	cmpldi	r3,0
+	beqlr				/* Nothing left, we're done */
+
+	cntlzd	r4,r3
+	slbmfee	r5,r4
+	/* V bit from slbmfee becomes class bit for slbie, since user
+	 * SLBEs have the class bit set */
+	slbie	r5
+
+	srd	r7,r6,r4		/* r7 = bits we still care about */
+	and	r3,r3,r7
+
+	b	1b
Index: working-2.6/include/asm-powerpc/paca.h
===================================================================
--- working-2.6.orig/include/asm-powerpc/paca.h	2005-12-22 16:30:32.000000000 +1100
+++ working-2.6/include/asm-powerpc/paca.h	2005-12-22 16:56:33.000000000 +1100
@@ -83,15 +83,14 @@ struct paca_struct {
 #endif /* CONFIG_PPC_64K_PAGES */
 
 	mm_context_t context;
-	u16 slb_cache[SLB_CACHE_ENTRIES];
-	u16 slb_cache_ptr;
+	u64 slb_free_bitmap;
+	u64 slb_user_bitmap;
 
 	/*
 	 * then miscellaneous read-write fields
 	 */
 	struct task_struct *__current;	/* Pointer to current */
 	u64 kstack;			/* Saved Kernel stack addr */
-	u64 stab_rr;			/* stab/slb round-robin counter */
 	u64 saved_r1;			/* r1 save for RTAS calls */
 	u64 saved_msr;			/* MSR saved here by enter_rtas */
 	u8 proc_enabled;		/* irq soft-enable flag */
Index: working-2.6/arch/powerpc/kernel/asm-offsets.c
===================================================================
--- working-2.6.orig/arch/powerpc/kernel/asm-offsets.c	2005-12-22 16:30:32.000000000 +1100
+++ working-2.6/arch/powerpc/kernel/asm-offsets.c	2005-12-22 16:56:39.000000000 +1100
@@ -117,12 +117,11 @@ int main(void)
 	DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
 	DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real));
 	DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr));
-	DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
 	DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
 	DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
 	DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled));
-	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
-	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
+	DEFINE(PACASLBFREEBITMAP, offsetof(struct paca_struct, slb_free_bitmap));
+	DEFINE(PACASLBUSERBITMAP, offsetof(struct paca_struct, slb_user_bitmap));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_64K_PAGES
 	DEFINE(PACAPGDIR, offsetof(struct paca_struct, pgdir));
Index: working-2.6/arch/powerpc/mm/slb.c
===================================================================
--- working-2.6.orig/arch/powerpc/mm/slb.c	2005-12-19 14:18:24.000000000 +1100
+++ working-2.6/arch/powerpc/mm/slb.c	2005-12-22 16:57:10.000000000 +1100
@@ -32,6 +32,7 @@
 
 extern void slb_allocate_realmode(unsigned long ea);
 extern void slb_allocate_user(unsigned long ea);
+extern void slb_flush_user_slots(u64 slots);
 
 static void slb_allocate(unsigned long ea)
 {
@@ -92,35 +93,27 @@ static void slb_flush_and_rebolt(void)
 		        "r"(mk_vsid_data(ksp_esid_data, lflags)),
 		        "r"(ksp_esid_data)
 		     : "memory");
+
+	get_paca()->slb_user_bitmap = 0;
 }
 
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-	unsigned long offset = get_paca()->slb_cache_ptr;
-	unsigned long esid_data = 0;
+	u64 slots = get_paca()->slb_user_bitmap;
 	unsigned long pc = KSTK_EIP(tsk);
 	unsigned long stack = KSTK_ESP(tsk);
 	unsigned long unmapped_base;
 
-	if (offset <= SLB_CACHE_ENTRIES) {
-		int i;
-		asm volatile("isync" : : : "memory");
-		for (i = 0; i < offset; i++) {
-			esid_data = ((unsigned long)get_paca()->slb_cache[i]
-				<< SID_SHIFT) | SLBIE_C;
-			asm volatile("slbie %0" : : "r" (esid_data));
-		}
-		asm volatile("isync" : : : "memory");
-	} else {
-		slb_flush_and_rebolt();
-	}
+	slb_flush_user_slots(slots);
 
 	/* Workaround POWER5 < DD2.1 issue */
-	if (offset == 1 || offset > SLB_CACHE_ENTRIES)
-		asm volatile("slbie %0" : : "r" (esid_data));
+	if (slots) {
+		asm volatile("slbie %0" : : "r" (0xa000000000000000));
+		get_paca()->slb_free_bitmap |= slots;
+	}
 
-	get_paca()->slb_cache_ptr = 0;
+	get_paca()->slb_user_bitmap = 0;
 	get_paca()->context = mm->context;
 #ifdef CONFIG_PPC_64K_PAGES
 	get_paca()->pgdir = mm->pgd;
@@ -225,6 +218,4 @@ void slb_initialize(void)
 	asm volatile("isync":::"memory");
  }
 #endif /* CONFIG_PPC_ISERIES */
-
-	get_paca()->stab_rr = SLB_NUM_BOLTED;
 }
Index: working-2.6/arch/powerpc/kernel/paca.c
===================================================================
--- working-2.6.orig/arch/powerpc/kernel/paca.c	2005-12-22 16:30:32.000000000 +1100
+++ working-2.6/arch/powerpc/kernel/paca.c	2005-12-22 16:30:32.000000000 +1100
@@ -64,7 +64,8 @@ struct lppaca lppaca[] = {
 	.stab_real = (asrr), 		/* Real pointer to segment table */ \
 	.stab_addr = (asrv),		/* Virt pointer to segment table */ \
 	.cpu_start = (start),		/* Processor start */		    \
-	.hw_cpu_id = 0xffff,
+	.hw_cpu_id = 0xffff,						    \
+	.slb_free_bitmap = (-1UL >> SLB_NUM_BOLTED),
 
 #ifdef CONFIG_PPC_ISERIES
 #define PACA_INIT_ISERIES(number)					    \
Index: working-2.6/arch/powerpc/mm/stab.c
===================================================================
--- working-2.6.orig/arch/powerpc/mm/stab.c	2005-12-19 14:18:24.000000000 +1100
+++ working-2.6/arch/powerpc/mm/stab.c	2005-12-22 16:58:39.000000000 +1100
@@ -28,6 +28,7 @@ struct stab_entry {
 };
 
 #define NR_STAB_CACHE_ENTRIES 8
+DEFINE_PER_CPU(unsigned long, stab_rr);
 DEFINE_PER_CPU(long, stab_cache_ptr);
 DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]);
 
@@ -70,7 +71,7 @@ static int make_ste(unsigned long stab, 
 	 * Could not find empty entry, pick one with a round robin selection.
 	 * Search all entries in the two groups.
 	 */
-	castout_entry = get_paca()->stab_rr;
+	castout_entry = __get_cpu_var(stab_rr);
 	for (i = 0; i < 16; i++) {
 		if (castout_entry < 8) {
 			global_entry = (esid & 0x1f) << 3;
@@ -89,7 +90,7 @@ static int make_ste(unsigned long stab, 
 		castout_entry = (castout_entry + 1) & 0xf;
 	}
 
-	get_paca()->stab_rr = (castout_entry + 1) & 0xf;
+	__get_cpu_var(stab_rr) = (castout_entry + 1) & 0xf;
 
 	/* Modify the old entry to the new value. */
 


-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson



More information about the Linuxppc64-dev mailing list