[RFC PATCH V1 03/33] powerpc/mm: Switch book3s 64 with 64K page size to 4 level page table

Balbir Singh bsingharora at gmail.com
Wed Jan 13 19:52:10 AEDT 2016


On Tue, 12 Jan 2016 12:45:38 +0530
"Aneesh Kumar K.V" <aneesh.kumar at linux.vnet.ibm.com> wrote:

> This is needed so that we can support both hash and radix page table
> using single kernel. Radix kernel uses a 4 level table.
> 
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
> ---
>  arch/powerpc/Kconfig                          |  1 +
>  arch/powerpc/include/asm/book3s/64/hash-4k.h  | 33
> +--------------------------
> arch/powerpc/include/asm/book3s/64/hash-64k.h | 20 +++++++++-------
> arch/powerpc/include/asm/book3s/64/hash.h     |  8 +++++++
> arch/powerpc/include/asm/book3s/64/pgtable.h  | 25
> +++++++++++++++++++- arch/powerpc/include/asm/pgalloc-64.h         |
> 24 ++++++++++++++++--- arch/powerpc/include/asm/pgtable-types.h
> | 13 +++++++---- arch/powerpc/mm/init_64.c                     | 21
> ++++++++++++----- 8 files changed, 90 insertions(+), 55 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 378f1127ca98..618afea4c9fc 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -303,6 +303,7 @@ config ZONE_DMA32

snip
> -
>  #define PTE_INDEX_SIZE  8
> -#define PMD_INDEX_SIZE  10
> -#define PUD_INDEX_SIZE	0
> +#define PMD_INDEX_SIZE  5
> +#define PUD_INDEX_SIZE	5
>  #define PGD_INDEX_SIZE  12
>  

OK, so PMD index split from 10 to 5 and 5 to PMD/PUD? What is the plan
for huge pages, I saw you mentioned it was a TODO

>  #define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
>  #define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
> +#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
>  #define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
>  
>  /* With 4k base page size, hugepage PTEs go at the PMD level */
> @@ -20,8 +19,13 @@
>  #define PMD_SIZE	(1UL << PMD_SHIFT)
>  #define PMD_MASK	(~(PMD_SIZE-1))
>  
> +/* PUD_SHIFT determines what a third-level page table entry can map
> */ +#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
> +#define PUD_SIZE	(1UL << PUD_SHIFT)
> +#define PUD_MASK	(~(PUD_SIZE-1))
> +
>  /* PGDIR_SHIFT determines what a third-level page table entry can
> map */ -#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
> +#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
>  #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
>  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
>  
> @@ -61,6 +65,8 @@
>  #define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
>  /* Bits to mask out from a PGD/PUD to get to the PMD page */

The comment looks like it applied to PMD and not PUD.
>  #define PUD_MASKED_BITS		0x1ff

Given that PUD is now 5 bits, this should be 0x1f?

> +/* FIXME!! check this */
> +#define PGD_MASKED_BITS		0
>  

PGD_MASKED_BITS is 0? Shouldn't it be 0xfe

>  #ifndef __ASSEMBLY__
>  
> @@ -130,11 +136,9 @@ extern bool __rpte_sub_valid(real_pte_t rpte,
> unsigned long index); #else
>  #define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
>  #endif
> +#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
>  #define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
>  
> -#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
> -#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
> -
>  #ifdef CONFIG_HUGETLB_PAGE
>  /*
>   * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can
> have diff --git a/arch/powerpc/include/asm/book3s/64/hash.h
> b/arch/powerpc/include/asm/book3s/64/hash.h index
> f46974d0134a..9ff1e056acef 100644 ---
> a/arch/powerpc/include/asm/book3s/64/hash.h +++
> b/arch/powerpc/include/asm/book3s/64/hash.h @@ -226,6 +226,7 @@
>  #define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
>  
>  #define pgd_index(address) (((address) >> (PGDIR_SHIFT)) &
> (PTRS_PER_PGD - 1)) +#define pud_index(address) (((address) >>
> (PUD_SHIFT)) & (PTRS_PER_PUD - 1)) #define pmd_index(address)
> (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1)) #define
> pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1)) 
> @@ -354,8 +355,15 @@ static inline void __ptep_set_access_flags(pte_t
> *ptep, pte_t entry) :"cc");
>  }
>  
> +static inline int pgd_bad(pgd_t pgd)
> +{
> +	return (pgd_val(pgd) == 0);
> +}
> +
>  #define __HAVE_ARCH_PTE_SAME
>  #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) &
> ~_PAGE_HPTEFLAGS) == 0) +#define pgd_page_vaddr(pgd)
> (pgd_val(pgd) & ~PGD_MASKED_BITS) +
>  
>  /* Generic accessors to PTE bits */
>  static inline int pte_write(pte_t pte)
> { return !!(pte_val(pte) & _PAGE_RW);} diff --git
> a/arch/powerpc/include/asm/book3s/64/pgtable.h
> b/arch/powerpc/include/asm/book3s/64/pgtable.h index
> e7162dba987e..8f639401c7ba 100644 ---
> a/arch/powerpc/include/asm/book3s/64/pgtable.h +++
> b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -111,6 +111,26 @@
> static inline void pgd_set(pgd_t *pgdp, unsigned long val) *pgdp =
> __pgd(val); } 
> +static inline void pgd_clear(pgd_t *pgdp)
> +{
> +	*pgdp = __pgd(0);
> +}
> +
> +#define pgd_none(pgd)		(!pgd_val(pgd))
> +#define pgd_present(pgd)	(!pgd_none(pgd))
> +
> +static inline pte_t pgd_pte(pgd_t pgd)
> +{
> +	return __pte(pgd_val(pgd));
> +}
> +
> +static inline pgd_t pte_pgd(pte_t pte)
> +{
> +	return __pgd(pte_val(pte));
> +}
> +
> +extern struct page *pgd_page(pgd_t pgd);
> +
>  /*
>   * Find an entry in a page-table-directory.  We combine the address
> region
>   * (the high order N bits) and the pgd portion of the address.
> @@ -118,9 +138,10 @@ static inline void pgd_set(pgd_t *pgdp, unsigned
> long val) 
>  #define pgd_offset(mm, address)	 ((mm)->pgd +
> pgd_index(address)) 
> +#define pud_offset(pgdp, addr)	\
> +	(((pud_t *) pgd_page_vaddr(*(pgdp))) + pud_index(addr))
>  #define pmd_offset(pudp,addr) \
>  	(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
> -
>  #define pte_offset_kernel(dir,addr) \
>  	(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
>  
> @@ -135,6 +156,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned
> long val) pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__,
> pte_val(e)) #define pmd_ERROR(e) \
>  	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__,
> pmd_val(e)) +#define pud_ERROR(e) \
> +	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__,
> pud_val(e)) #define pgd_ERROR(e) \
>  	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__,
> pgd_val(e)) 
> diff --git a/arch/powerpc/include/asm/pgalloc-64.h
> b/arch/powerpc/include/asm/pgalloc-64.h index
> 69ef28a81733..014489a619d0 100644 ---
> a/arch/powerpc/include/asm/pgalloc-64.h +++
> b/arch/powerpc/include/asm/pgalloc-64.h @@ -171,7 +171,25 @@ extern
> void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int
> shift); extern void __tlb_remove_table(void *_table); #endif
>  
> -#define pud_populate(mm, pud, pmd)	pud_set(pud, (unsigned
> long)pmd) +#ifndef __PAGETABLE_PUD_FOLDED
> +/* book3s 64 is 4 level page table */
> +#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, PUD)
> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned
> long addr) +{
> +	return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
> +				GFP_KERNEL|__GFP_REPEAT);
> +}
> +
> +static inline void pud_free(struct mm_struct *mm, pud_t *pud)
> +{
> +	kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
> +}
> +#endif
> +
> +static inline void pud_populate(struct mm_struct *mm, pud_t *pud,
> pmd_t *pmd) +{
> +	pud_set(pud, (unsigned long)pmd);
> +}
>  
>  static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t
> *pmd, pte_t *pte)
> @@ -233,11 +251,11 @@ static inline void pmd_free(struct mm_struct
> *mm, pmd_t *pmd) 
>  #define __pmd_free_tlb(tlb, pmd, addr)		      \
>  	pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
> -#ifndef CONFIG_PPC_64K_PAGES
> +#ifndef __PAGETABLE_PUD_FOLDED
>  #define __pud_free_tlb(tlb, pud, addr)		      \
>  	pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
>  
> -#endif /* CONFIG_PPC_64K_PAGES */
> +#endif /* __PAGETABLE_PUD_FOLDED */
>  
>  #define check_pgt_cache()	do { } while (0)
>  
> diff --git a/arch/powerpc/include/asm/pgtable-types.h
> b/arch/powerpc/include/asm/pgtable-types.h index
> 71487e1ca638..43140f8b0592 100644 ---
> a/arch/powerpc/include/asm/pgtable-types.h +++
> b/arch/powerpc/include/asm/pgtable-types.h @@ -21,15 +21,18 @@ static
> inline unsigned long pmd_val(pmd_t x) return x.pmd;
>  }
>  
> -/* PUD level exusts only on 4k pages */
> -#ifndef CONFIG_PPC_64K_PAGES
> +/*
> + * 64 bit hash always use 4 level table. Everybody else use 4 level
> + * only for 4K page size.
> + */
> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
>  typedef struct { unsigned long pud; } pud_t;

>  #define __pud(x)	((pud_t) { (x) })
>  static inline unsigned long pud_val(pud_t x)
>  {
>  	return x.pud;
>  }
> -#endif /* !CONFIG_PPC_64K_PAGES */
> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
>  #endif /* CONFIG_PPC64 */
>  
>  /* PGD level */
> @@ -66,14 +69,14 @@ static inline unsigned long pmd_val(pmd_t pmd)
>  	return pmd;
>  }
>  
> -#ifndef CONFIG_PPC_64K_PAGES
> +#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
>  typedef unsigned long pud_t;



>  #define __pud(x)	(x)
>  static inline unsigned long pud_val(pud_t pud)
>  {
>  	return pud;
>  }
> -#endif /* !CONFIG_PPC_64K_PAGES */
> +#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
>  #endif /* CONFIG_PPC64 */
>  
>  typedef unsigned long pgd_t;
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index 379a6a90644b..8ce1ec24d573 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -85,6 +85,11 @@ static void pgd_ctor(void *addr)
>  	memset(addr, 0, PGD_TABLE_SIZE);
>  }
>  
> +static void pud_ctor(void *addr)
> +{
> +	memset(addr, 0, PUD_TABLE_SIZE);
> +}
> +
>  static void pmd_ctor(void *addr)
>  {
>  	memset(addr, 0, PMD_TABLE_SIZE);
> @@ -138,14 +143,18 @@ void pgtable_cache_init(void)
>  {
>  	pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
>  	pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
> +	/*
> +	 * In all current configs, when the PUD index exists it's the
> +	 * same size as either the pgd or pmd index except with THP
> enabled
> +	 * on book3s 64
> +	 */
> +	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
> +		pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
> +
>  	if (!PGT_CACHE(PGD_INDEX_SIZE)
> || !PGT_CACHE(PMD_CACHE_INDEX)) panic("Couldn't allocate pgtable
> caches");
> -	/* In all current configs, when the PUD index exists it's the
> -	 * same size as either the pgd or pmd index.  Verify that the
> -	 * initialization above has also created a PUD cache.  This
> -	 * will need re-examiniation if we add new possibilities for
> -	 * the pagetable layout. */
> -	BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
> +	if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
> +		panic("Couldn't allocate pud pgtable caches");
>  }
>  
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP



More information about the Linuxppc-dev mailing list