[PATCH V6 20/35] powerpc/mm: Don't track subpage valid bit in pte_t
Anshuman Khandual
khandual at linux.vnet.ibm.com
Thu Dec 3 18:13:06 AEDT 2015
On 12/01/2015 09:06 AM, Aneesh Kumar K.V wrote:
> This free up 11 bits in pte_t. In the later patch we also change
> the pte_t format so that we can start supporting migration pte
> at pmd level. We now track 4k subpage valid bit as below
>
> If we have _PAGE_COMBO set, we override the _PAGE_F_GIX_SHIFT
> and _PAGE_F_SECOND. Together we have 4 bits, each of them
> used to indicate whether any of the 4 4k subpage in that group
> is valid. ie,
>
> [ group 1 bit ] [ group 2 bit ] ..... [ group 4 ]
> [ subpage 1 - 4] [ subpage 5- 8] ..... [ subpage 13 - 16]
>
> We still track each 4k subpage slot number and secondary hash
> information in the second half of pgtable_t. Removing the subpage
> tracking have some significant overhead on aim9 and ebizzy benchmark and
> to support THP with 4K subpage, we do need a pgtable_t of 4096 bytes.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
We removed 16 bits which used to track individual sub pages and added
back 4 bits to track newly created sub page groups. So we save 12 bits
there. How it is 11 bits ? Have we added back one more bit into pte_t
some where for sub page purpose which I am missing ?
> ---
> arch/powerpc/include/asm/book3s/64/hash-4k.h | 10 +-------
> arch/powerpc/include/asm/book3s/64/hash-64k.h | 35 ++++++---------------------
> arch/powerpc/include/asm/book3s/64/hash.h | 10 ++++----
> arch/powerpc/mm/hash64_64k.c | 34 ++++++++++++++++++++++++--
> arch/powerpc/mm/hash_low_64.S | 6 +----
> arch/powerpc/mm/hugetlbpage-hash64.c | 5 +---
> arch/powerpc/mm/pgtable_64.c | 2 +-
> 7 files changed, 48 insertions(+), 54 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
> index 537eacecf6e9..75e8b9326e4b 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
> @@ -47,17 +47,9 @@
> /* Bits to mask out from a PGD to get to the PUD page */
> #define PGD_MASKED_BITS 0
>
> -/* PTE bits */
> -#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
> -#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
> -#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
> -#define _PAGE_F_SECOND _PAGE_SECONDARY
> -#define _PAGE_F_GIX _PAGE_GROUP_IX
> -#define _PAGE_SPECIAL 0x10000 /* software: special page */
> -
> /* PTE flags to conserve for HPTE identification */
> #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
> - _PAGE_SECONDARY | _PAGE_GROUP_IX)
> + _PAGE_F_SECOND | _PAGE_F_GIX)
>
> /* shift to put page number into pte */
> #define PTE_RPN_SHIFT (17)
> diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> index ee073822145d..a268416ca4a4 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
> @@ -31,33 +31,13 @@
> /* Bits to mask out from a PGD/PUD to get to the PMD page */
> #define PUD_MASKED_BITS 0x1ff
>
> -/* Additional PTE bits (don't change without checking asm in hash_low.S) */
> -#define _PAGE_SPECIAL 0x00000400 /* software: special page */
> -#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */
> -#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
> -#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
> -#define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */
> -
> -/* For 64K page, we don't have a separate _PAGE_HASHPTE bit. Instead,
> - * we set that to be the whole sub-bits mask. The C code will only
> - * test this, so a multi-bit mask will work. For combo pages, this
> - * is equivalent as effectively, the old _PAGE_HASHPTE was an OR of
> - * all the sub bits. For real 64k pages, we now have the assembly set
> - * _PAGE_HPTE_SUB0 in addition to setting the HIDX bits which overlap
> - * that mask. This is fine as long as the HIDX bits are never set on
> - * a PTE that isn't hashed, which is the case today.
> - *
> - * A little nit is for the huge page C code, which does the hashing
> - * in C, we need to provide which bit to use.
> - */
> -#define _PAGE_HASHPTE _PAGE_HPTE_SUB
> -
> -/* Note the full page bits must be in the same location as for normal
> - * 4k pages as the same assembly will be used to insert 64K pages
> - * whether the kernel has CONFIG_PPC_64K_PAGES or not
> +#define _PAGE_COMBO 0x00020000 /* this is a combo 4k page */
> +#define _PAGE_4K_PFN 0x00040000 /* PFN is for a single 4k page */
> +/*
> + * Used to track subpage group valid if _PAGE_COMBO is set
> + * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
> */
> -#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
> -#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
> +#define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND)
>
> /* PTE flags to conserve for HPTE identification */
> #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO)
> @@ -103,8 +83,7 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
> }
>
> #define __rpte_to_pte(r) ((r).pte)
> -#define __rpte_sub_valid(rpte, index) \
> - (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
> +extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
> /*
> * Trick: we set __end to va + 64k, which happens works for
> * a 16M page as well as we want only one iteration
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
> index 48237e66e823..2f2034621a69 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -81,7 +81,12 @@
> #define _PAGE_DIRTY 0x0080 /* C: page changed */
> #define _PAGE_ACCESSED 0x0100 /* R: page referenced */
> #define _PAGE_RW 0x0200 /* software: user write access allowed */
> +#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
> #define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
> +#define _PAGE_F_GIX 0x7000 /* full page: hidx bits */
> +#define _PAGE_F_GIX_SHIFT 12
> +#define _PAGE_F_SECOND 0x8000 /* Whether to use secondary hash or not */
> +#define _PAGE_SPECIAL 0x10000 /* software: special page */
>
> /* No separate kernel read-only */
> #define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
> @@ -210,11 +215,6 @@
>
> #define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
> #define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
> -/*
> - * We save the slot number & secondary bit in the second half of the
> - * PTE page. We use the 8 bytes per each pte entry.
> - */
> -#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
>
> #ifndef __ASSEMBLY__
> #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index 9ffeae2cbb57..f1b86ba63430 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -15,6 +15,35 @@
> #include <linux/mm.h>
> #include <asm/machdep.h>
> #include <asm/mmu.h>
> +/*
> + * index from 0 - 15
> + */
> +bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
> +{
> + unsigned long g_idx;
> + unsigned long ptev = pte_val(rpte.pte);
> +
> + g_idx = (ptev & _PAGE_COMBO_VALID) >> _PAGE_F_GIX_SHIFT;
> + index = index >> 2;
> + if (g_idx & (0x1 << index))
> + return true;
> + else
> + return false;
> +}
This function checks for validity of the sub page group not individual
sub page index. Because we dont track individual sub page index validity
any more, wondering if that is even possible.
> +/*
> + * index from 0 - 15
> + */
> +static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index)
> +{
> + unsigned long g_idx;
> +
> + if (!(ptev & _PAGE_COMBO))
> + return ptev;
> + index = index >> 2;
> + g_idx = 0x1 << index;
> +
> + return ptev | (g_idx << _PAGE_F_GIX_SHIFT);
> +}
>
> int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
> pte_t *ptep, unsigned long trap, unsigned long flags,
> @@ -102,7 +131,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
> */
> if (!(old_pte & _PAGE_COMBO)) {
> flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
> - old_pte &= ~_PAGE_HPTE_SUB;
> + old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
Or use _PAGE_COMBO_VALID directly instead ?
> goto htab_insert_hpte;
> }
> /*
> @@ -192,7 +221,8 @@ repeat:
> /* __real_pte use pte_val() any idea why ? FIXME!! */
> rpte.hidx &= ~(0xfUL << (subpg_index << 2));
> *hidxp = rpte.hidx | (slot << (subpg_index << 2));
> - new_pte |= (_PAGE_HPTE_SUB0 >> subpg_index);
> + new_pte = mark_subptegroup_valid(new_pte, subpg_index);
> + new_pte |= _PAGE_HASHPTE;
> /*
> * check __real_pte for details on matching smp_rmb()
> */
> diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
> index 6b4d4c1d0628..359839a57f26 100644
> --- a/arch/powerpc/mm/hash_low_64.S
> +++ b/arch/powerpc/mm/hash_low_64.S
> @@ -285,7 +285,7 @@ htab_modify_pte:
>
> /* Secondary group ? if yes, get a inverted hash value */
> mr r5,r28
> - andi. r0,r31,_PAGE_SECONDARY
> + andi. r0,r31,_PAGE_F_SECOND
> beq 1f
> not r5,r5
> 1:
> @@ -473,11 +473,7 @@ ht64_insert_pte:
> lis r0,_PAGE_HPTEFLAGS at h
> ori r0,r0,_PAGE_HPTEFLAGS at l
> andc r30,r30,r0
> -#ifdef CONFIG_PPC_64K_PAGES
> - oris r30,r30,_PAGE_HPTE_SUB0 at h
> -#else
> ori r30,r30,_PAGE_HASHPTE
> -#endif
> /* Phyical address in r5 */
> rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
> sldi r5,r5,PAGE_SHIFT
> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
> index d94b1af53a93..7584e8445512 100644
> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
> @@ -91,11 +91,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
> pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
>
> /* clear HPTE slot informations in new PTE */
> -#ifdef CONFIG_PPC_64K_PAGES
> - new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
> -#else
> new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
> -#endif
> +
> /* Add in WIMG bits */
> rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
> _PAGE_COHERENT | _PAGE_GUARDED));
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index d692ae31cfc7..3967e3cce03e 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -625,7 +625,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
> "1: ldarx %0,0,%3\n\
> andi. %1,%0,%6\n\
> bne- 1b \n\
> - ori %1,%0,%4 \n\
> + oris %1,%0,%4 at h \n\
Why is this change required ?
More information about the Linuxppc-dev
mailing list