[PATCH] powerpc/64: implement a slice mask cache
Nicholas Piggin
npiggin at gmail.com
Fri Jul 22 22:57:28 AEST 2016
Calculating the slice mask can become a signifcant overhead for
get_unmapped_area. The mask is relatively small and does not change
frequently, so we can cache it in the mm context.
This saves about 30% kernel time on a 4K user address allocation
in a microbenchmark.
Comments on the approach taken? I think there is the option for fixed
allocations to avoid some of the slice calculation entirely, but first
I think it will be good to have a general speedup that covers all
mmaps.
Cc: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Cc: Anton Blanchard <anton at samba.org>
---
arch/powerpc/include/asm/book3s/64/mmu.h | 8 +++++++
arch/powerpc/mm/slice.c | 39 ++++++++++++++++++++++++++++++--
2 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5854263..0d15af4 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -71,6 +71,14 @@ typedef struct {
#ifdef CONFIG_PPC_MM_SLICES
u64 low_slices_psize; /* SLB page size encodings */
unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ struct slice_mask mask_4k;
+# ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+# endif
+# ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+# endif
#else
u16 sllp; /* SLB page size encoding */
#endif
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 2b27458..559ea5f 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -147,7 +147,7 @@ static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
return ret;
}
-static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+static struct slice_mask calc_slice_mask_for_size(struct mm_struct *mm, int psize)
{
unsigned char *hpsizes;
int index, mask_index;
@@ -171,6 +171,36 @@ static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
return ret;
}
+static void recalc_slice_mask_cache(struct mm_struct *mm)
+{
+ mm->context.mask_4k = calc_slice_mask_for_size(mm, MMU_PAGE_4K);
+#ifdef CONFIG_PPC_64K_PAGES
+ mm->context.mask_64k = calc_slice_mask_for_size(mm, MMU_PAGE_64K);
+#endif
+# ifdef CONFIG_HUGETLB_PAGE
+ /* Radix does not come here */
+ mm->context.mask_16m = calc_slice_mask_for_size(mm, MMU_PAGE_16M);
+ mm->context.mask_16g = calc_slice_mask_for_size(mm, MMU_PAGE_16G);
+# endif
+}
+
+static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
+{
+ if (psize == MMU_PAGE_4K)
+ return mm->context.mask_4k;
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return mm->context.mask_64k;
+#endif
+# ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return mm->context.mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return mm->context.mask_16g;
+# endif
+ BUG();
+}
+
static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
{
return (mask.low_slices & available.low_slices) == mask.low_slices &&
@@ -233,6 +263,8 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
spin_unlock_irqrestore(&slice_convert_lock, flags);
+ recalc_slice_mask_cache(mm);
+
copro_flush_all_slbs(mm);
}
@@ -625,7 +657,7 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
goto bail;
mm->context.user_psize = psize;
- wmb();
+ wmb(); /* Why? */
lpsizes = mm->context.low_slices_psize;
for (i = 0; i < SLICE_NUM_LOW; i++)
@@ -652,6 +684,9 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
mm->context.low_slices_psize,
mm->context.high_slices_psize);
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+ recalc_slice_mask_cache(mm);
+ return;
bail:
spin_unlock_irqrestore(&slice_convert_lock, flags);
}
--
2.8.1
More information about the Linuxppc-dev
mailing list