[PATCH v3 5/6] powerpc/mm: Reduce memory usage for mm_context_t for radix

Wed Apr 17 23:03:50 AEST 2019

Currently, our mm_context_t on book3s64 include all hash specific
context details like slice mask and subpage protection details. We
can skip allocating these with radix translation. This will help us to save
8K per mm_context with radix translation.

With the patch applied we have

sizeof(mm_context_t)  = 136
sizeof(struct hash_mm_context)  = 8288

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 33 ++++++++++++-
 arch/powerpc/include/asm/book3s/64/mmu.h      | 49 +++++--------------
 arch/powerpc/kernel/setup-common.c            |  6 +++
 arch/powerpc/mm/hash_utils_64.c               |  4 +-
 arch/powerpc/mm/mmu_context_book3s64.c        | 16 +++++-
 5 files changed, 68 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index eb36fbfe4ef5..4481bedbb5be 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -658,7 +658,7 @@ extern void slb_set_size(u16 size);
 /* 4 bits per slice and we have one slice per 1TB */
 #define SLICE_ARRAY_SIZE	(H_PGTABLE_RANGE >> 41)
 #define LOW_SLICE_ARRAY_SZ	(BITS_PER_LONG / BITS_PER_BYTE)
-#define TASK_SLICE_ARRAY_SZ(x)	((x)->slb_addr_limit >> 41)
+#define TASK_SLICE_ARRAY_SZ(x)	((x)->hash_context->slb_addr_limit >> 41)
 #ifndef __ASSEMBLY__
 
 #ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -693,6 +693,37 @@ static inline void subpage_prot_free(struct mm_struct *mm) {}
 static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
 #endif /* CONFIG_PPC_SUBPAGE_PROT */
 
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+	u64 low_slices;
+	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+	u16 user_psize; /* page size index */
+
+	/* SLB page size encodings*/
+	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+	unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+	struct slice_mask mask_64k;
+#endif
+	struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+	struct slice_mask mask_16m;
+	struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+	struct subpage_prot_table spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
 #if 0
 /*
  * The code below is equivalent to this function for arguments
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 28213a36fef7..3bc94baf5ed5 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -89,16 +89,6 @@ struct spinlock;
 /* Maximum possible number of NPUs in a system. */
 #define NV_MAX_NPUS 8
 
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
-	u64 low_slices;
-	DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
-
 typedef struct {
 	union {
 		/*
@@ -112,7 +102,6 @@ typedef struct {
 		mm_context_id_t id;
 		mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
 	};
-	u16 user_psize;		/* page size index */
 
 	/* Number of bits in the mm_cpumask */
 	atomic_t active_cpus;
@@ -122,23 +111,9 @@ typedef struct {
 
 	/* NPU NMMU context */
 	struct npu_context *npu_context;
+	struct hash_mm_context *hash_context;
 
-	 /* SLB page size encodings*/
-	unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
-	unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
-	unsigned long slb_addr_limit;
-# ifdef CONFIG_PPC_64K_PAGES
-	struct slice_mask mask_64k;
-# endif
-	struct slice_mask mask_4k;
-# ifdef CONFIG_HUGETLB_PAGE
-	struct slice_mask mask_16m;
-	struct slice_mask mask_16g;
-# endif
 	unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-	struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
 	/*
 	 * pagetable fragment support
 	 */
@@ -161,62 +136,62 @@ typedef struct {
 
 static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
 {
-	return ctx->user_psize;
+	return ctx->hash_context->user_psize;
 }
 
 static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
 {
-	ctx->user_psize = user_psize;
+	ctx->hash_context->user_psize = user_psize;
 }
 
 static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
 {
-	return ctx->low_slices_psize;
+	return ctx->hash_context->low_slices_psize;
 }
 
 static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
 {
-	return ctx->high_slices_psize;
+	return ctx->hash_context->high_slices_psize;
 }
 
 static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
 {
-	return ctx->slb_addr_limit;
+	return ctx->hash_context->slb_addr_limit;
 }
 
 static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
 {
-	ctx->slb_addr_limit = limit;
+	ctx->hash_context->slb_addr_limit = limit;
 }
 
 #ifdef CONFIG_PPC_64K_PAGES
 static inline struct slice_mask *mm_ctx_slice_mask_64k(mm_context_t *ctx)
 {
-	return &ctx->mask_64k;
+	return &ctx->hash_context->mask_64k;
 }
 #endif
 
 static inline struct slice_mask *mm_ctx_slice_mask_4k(mm_context_t *ctx)
 {
-	return &ctx->mask_4k;
+	return &ctx->hash_context->mask_4k;
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
 static inline struct slice_mask *mm_ctx_slice_mask_16m(mm_context_t *ctx)
 {
-	return &ctx->mask_16m;
+	return &ctx->hash_context->mask_16m;
 }
 
 static inline struct slice_mask *mm_ctx_slice_mask_16g(mm_context_t *ctx)
 {
-	return &ctx->mask_16g;
+	return &ctx->hash_context->mask_16g;
 }
 #endif
 
 #ifdef CONFIG_PPC_SUBPAGE_PROT
 static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
 {
-	return &ctx->spt;
+	return &ctx->hash_context->spt;
 }
 #endif
 
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index a07de8608484..21b1ce200b22 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -947,6 +947,12 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.end_data = (unsigned long) _edata;
 	init_mm.brk = klimit;
 
+#ifdef CONFIG_PPC_MM_SLICES
+#if defined(CONFIG_PPC_8xx)
+	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+#endif
+#endif
+
 #ifdef CONFIG_SPAPR_TCE_IOMMU
 	mm_iommu_init(&init_mm);
 #endif
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 2cb3a456f5b5..04ac7c36d380 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -968,6 +968,7 @@ void __init hash__early_init_devtree(void)
 	htab_scan_page_sizes();
 }
 
+struct hash_mm_context init_hash_mm_context;
 void __init hash__early_init_mmu(void)
 {
 #ifndef CONFIG_PPC_64K_PAGES
@@ -1041,7 +1042,8 @@ void __init hash__early_init_mmu(void)
 	 */
 	htab_initialize();
 
-	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
+	init_mm.context.hash_context = &init_hash_mm_context;
+	init_mm.context.hash_context->slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
 
 	pr_info("Initializing hash mmu with SLB\n");
 	/* Initialize SLB management */
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index f720c5cc0b5e..6eef5a36b2e9 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -63,6 +63,12 @@ static int hash__init_new_context(struct mm_struct *mm)
 	if (index < 0)
 		return index;
 
+	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context), GFP_KERNEL);
+	if (!mm->context.hash_context) {
+		ida_free(&mmu_context_ida, index);
+		return -ENOMEM;
+	}
+
 	/*
 	 * The old code would re-promote on fork, we don't do that when using
 	 * slices as it could cause problem promoting slices that have been
@@ -77,8 +83,14 @@ static int hash__init_new_context(struct mm_struct *mm)
 	 * We should not be calling init_new_context() on init_mm. Hence a
 	 * check against 0 is OK.
 	 */
-	if (mm->context.id == 0)
+	if (mm->context.id == 0) {
+		memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
 		slice_init_new_context_exec(mm);
+	} else {
+		/* This is fork. Copy hash_context details from current->mm */
+		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+
+	}
 
 	subpage_prot_init_new_context(mm);
 
@@ -118,6 +130,7 @@ static int radix__init_new_context(struct mm_struct *mm)
 	asm volatile("ptesync;isync" : : : "memory");
 
 	mm->context.npu_context = NULL;
+	mm->context.hash_context = NULL;
 
 	return index;
 }
@@ -162,6 +175,7 @@ static void destroy_contexts(mm_context_t *ctx)
 		if (context_id)
 			ida_free(&mmu_context_ida, context_id);
 	}
+	kfree(ctx->hash_context);
 }
 
 static void pmd_frag_destroy(void *pmd_frag)
-- 
2.20.1