[PATCH 05/14] powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED
Michael Neuling
mikey at neuling.org
Tue Mar 22 17:05:27 AEDT 2016
On Mon, 2016-03-07 at 19:09 +0530, Aneesh Kumar K.V wrote:
> _PAGE_PRIVILEGED means the page can be accessed only by kernel. This is done
> to keep pte bits similar to PowerISA 3.0 radix PTE format. User
> pages are now makred by clearing _PAGE_PRIVILEGED bit.
>
> Previously we allowed kernel to have a privileged page
> in the lower address range(USER_REGION). With this patch such access
> is denied.
>
> We also prevent a kernel access to a non-privileged page in
> higher address range (ie, REGION_ID != 0). Both the above access
> scenario should never happen.
A few comments below. I didn't find any issues, just some potential
cleanups.
Mikey
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar at linux.vnet.ibm.com>
> ---
> arch/powerpc/include/asm/book3s/64/hash.h | 34 ++++++++++++++--------------
> arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++-
> arch/powerpc/mm/hash64_4k.c | 2 +-
> arch/powerpc/mm/hash64_64k.c | 4 ++--
> arch/powerpc/mm/hash_utils_64.c | 17 ++++++++------
> arch/powerpc/mm/hugepage-hash64.c | 2 +-
> arch/powerpc/mm/hugetlbpage-hash64.c | 3 ++-
> arch/powerpc/mm/hugetlbpage.c | 2 +-
> arch/powerpc/mm/pgtable.c | 15 ++++++++++--
> arch/powerpc/mm/pgtable_64.c | 15 +++++++++---
> arch/powerpc/platforms/cell/spufs/fault.c | 2 +-
> drivers/misc/cxl/fault.c | 5 ++--
> 12 files changed, 80 insertions(+), 39 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
> index f092d83fa623..fbefbaa92736 100644
> --- a/arch/powerpc/include/asm/book3s/64/hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/hash.h
> @@ -20,7 +20,7 @@
> #define _PAGE_READ 0x00004 /* read access allowed */
> #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
> #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
> -#define _PAGE_USER 0x00008 /* page may be accessed by userspace */
> +#define _PAGE_PRIVILEGED 0x00008 /* page can only be access by kernel */
/* page can only be accessed by kernel */
Or just
/* kernel access only */
> #define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */
> /* M (memory coherence) is always set in the HPTE, so we don't need it here */
> #define _PAGE_COHERENT 0x0
> @@ -114,10 +114,13 @@
> #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
> #endif /* CONFIG_PPC_MM_SLICES */
>
> -/* No separate kernel read-only */
> -#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
> +/*
> + * No separate kernel read-only, user access blocked by key
> + */
> +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
> #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
> -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
> +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \
> + _PAGE_RW | _PAGE_EXEC)
>
> /* Strong Access Ordering */
> #define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
> @@ -149,7 +152,7 @@
> */
> #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
> _PAGE_WRITETHRU | _PAGE_4K_PFN | \
> - _PAGE_USER | _PAGE_ACCESSED | _PAGE_READ |\
> + _PAGE_PRIVILEGED | _PAGE_ACCESSED | _PAGE_READ |\
> _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
> _PAGE_SOFT_DIRTY)
> /*
> @@ -171,16 +174,13 @@
> *
> * Note due to the way vm flags are laid out, the bits are XWR
> */
> -#define PAGE_NONE __pgprot(_PAGE_BASE)
> -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
> -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
> - _PAGE_EXEC)
> -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> - _PAGE_EXEC)
> -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ)
> -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \
> - _PAGE_EXEC)
> +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
> +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW)
> +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC)
> +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ)
> +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
> +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
> +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
Eyeballing these, they seemed to have been converted ok
>
> #define __P000 PAGE_NONE
> #define __P001 PAGE_READONLY
> @@ -421,8 +421,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte)
> */
> static inline int pte_protnone(pte_t pte)
> {
> - return (pte_val(pte) &
> - (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
> + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) ==
> + (_PAGE_PRESENT | _PAGE_PRIVILEGED);
> }
> #endif /* CONFIG_NUMA_BALANCING */
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 4ac6221802ad..97d06de8dbf6 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd);
>
> static inline bool pte_user(pte_t pte)
> {
> - return (pte_val(pte) & _PAGE_USER);
> + return !(pte_val(pte) & _PAGE_PRIVILEGED);
This function might be usable in some places you have in the patch.
> }
>
> #ifdef CONFIG_MEM_SOFT_DIRTY
> @@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
> }
> #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
>
> +static inline bool check_pte_access(unsigned long access, unsigned long ptev)
> +{
> + /*
> + * This check for _PAGE_RWX and _PAG_PRESENT bits
> + */
> + if (access & ~ptev)
Is this really doing what the comment says?
Also small typo _PAG_ => _PAGE_
> + return false;
> + /*
> + * This check for access to privilege space
> + */
> + if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
> + return false;
> +
> + return true;
> +}
> +
> void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
> void pgtable_cache_init(void);
>
> diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
> index 7ebac279d38e..42ba12c184e1 100644
> --- a/arch/powerpc/mm/hash64_4k.c
> +++ b/arch/powerpc/mm/hash64_4k.c
> @@ -38,7 +38,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
> if (unlikely(old_pte & _PAGE_BUSY))
> return 0;
> /* If PTE permissions don't match, take page fault */
Is this comment still relevant? Same below.
> - if (unlikely(access & ~old_pte))
> + if (unlikely(!check_pte_access(access, old_pte)))
> return 1;
> /*
> * Try to lock the PTE, add ACCESSED and DIRTY if it was
> diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
> index 83ac9f658733..f33b410d6c8a 100644
> --- a/arch/powerpc/mm/hash64_64k.c
> +++ b/arch/powerpc/mm/hash64_64k.c
> @@ -70,7 +70,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
> if (unlikely(old_pte & _PAGE_BUSY))
> return 0;
> /* If PTE permissions don't match, take page fault */
> - if (unlikely(access & ~old_pte))
> + if (unlikely(!check_pte_access(access, old_pte)))
> return 1;
> /*
> * Try to lock the PTE, add ACCESSED and DIRTY if it was
> @@ -241,7 +241,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
> if (unlikely(old_pte & _PAGE_BUSY))
> return 0;
> /* If PTE permissions don't match, take page fault */
> - if (unlikely(access & ~old_pte))
> + if (unlikely(!check_pte_access(access, old_pte)))
> return 1;
> /*
> * Check if PTE has the cache-inhibit bit set
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index ec37f4b0a8ff..630603f74056 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
> * User area is mapped with PP=0x2 for read/write
> * or PP=0x3 for read-only (including writeable but clean pages).
> */
> - if (pteflags & _PAGE_USER) {
> + if (!(pteflags & _PAGE_PRIVILEGED)) {
Could use pte_user() here? Maybe other places too.
> if (pteflags & _PAGE_RWX)
> rflags |= 0x2;
> if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
> @@ -1086,7 +1086,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
> /* Pre-check access permissions (will be re-checked atomically
> * in __hash_page_XX but this pre-check is a fast path
> */
> - if (access & ~pte_val(*ptep)) {
> + if (!check_pte_access(access, pte_val(*ptep))) {
> DBG_LOW(" no access !\n");
> rc = 1;
> goto bail;
> @@ -1224,12 +1224,15 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
> if (dsisr & DSISR_ISSTORE)
> access |= _PAGE_WRITE;
> /*
> - * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
> - * accessing a userspace segment (even from the kernel). We assume
> - * kernel addresses always have the high bit set.
> + * We set _PAGE_PRIVILEGED only when
> + * kernel mode access kernel space.
> + *
> + * _PAGE_PRIVILEGED is NOT set
> + * 1) when kernel mode access user space
> + * 2) user space access kernel space.
> */
> - if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
> - access |= _PAGE_USER;
> + if (!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID))
> + access |= _PAGE_PRIVILEGED;
This is a bit ugly:
(!(msr & MSR_PR) && !(REGION_ID(ea) == USER_REGION_ID))
You could set _PAGE_PRIVILEGED and then clear it using the same if
statement as before. Might be easier to read. ie
access |= _PAGE_PRIVILEGED;
if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
access &= ~(_PAGE_PRIVILEGED);
>
> if (trap == 0x400)
> access |= _PAGE_EXEC;
> diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
> index 39342638a498..182f1d3fe73c 100644
> --- a/arch/powerpc/mm/hugepage-hash64.c
> +++ b/arch/powerpc/mm/hugepage-hash64.c
> @@ -41,7 +41,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
> if (unlikely(old_pmd & _PAGE_BUSY))
> return 0;
> /* If PMD permissions don't match, take page fault */
As above, is this comment still correct? The comment says
"permissions don't match" but the function call is
"check_pte_access()". Seems like a different concept.
> - if (unlikely(access & ~old_pmd))
> + if (unlikely(!check_pte_access(access, old_pmd)))
> return 1;
> /*
> * Try to lock the PTE, add ACCESSED and DIRTY if it was
> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
> index e6e54a04bd32..96765510a49c 100644
> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
> @@ -51,8 +51,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
> if (unlikely(old_pte & _PAGE_BUSY))
> return 0;
> /* If PTE permissions don't match, take page fault */
> - if (unlikely(access & ~old_pte))
> + if (unlikely(!check_pte_access(access, old_pte)))
Same comment again....
> return 1;
> +
> /* Try to lock the PTE, add ACCESSED and DIRTY if it was
> * a write access */
> new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 6e52e722d3f2..7201e9c624d5 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
> end = pte_end;
>
> pte = READ_ONCE(*ptep);
> - mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ;
> + mask = _PAGE_PRESENT | _PAGE_READ;
> if (write)
> mask |= _PAGE_WRITE;
>
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 98b5c03e344d..7b492283d502 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -43,9 +43,20 @@ static inline int is_exec_fault(void)
> */
> static inline int pte_looks_normal(pte_t pte)
> {
> +
> +#if defined(CONFIG_PPC_BOOK3S_64)
> + if ((pte_val(pte) &
> + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
> + _PAGE_PRESENT) {
> + if (!(pte_val(pte) & _PAGE_PRIVILEGED))
> + return 1;
> + }
> + return 0;
> +#else
> return (pte_val(pte) &
> - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
> - (_PAGE_PRESENT | _PAGE_USER);
> + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
> + (_PAGE_PRESENT | _PAGE_USER);
> +#endif
> }
>
> static struct page *maybe_pte_to_page(pte_t pte)
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index 00d8d985bba3..441905f7bba4 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
> if (flags & _PAGE_WRITE)
> flags |= _PAGE_DIRTY;
>
> - /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
> - flags &= ~(_PAGE_USER | _PAGE_EXEC);
> + /* we don't want to let _PAGE_EXEC leak out */
> + flags &= ~_PAGE_EXEC;
> + /*
> + * Force kernel mapping.
> + */
> +#if defined(CONFIG_PPC_BOOK3S_64)
> + flags |= _PAGE_PRIVILEGED;
> +#else
> + flags &= ~_PAGE_USER;
> +#endif
> +
>
> #ifdef _PAGE_BAP_SR
> /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
> @@ -669,7 +678,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma,
> * the translation is still valid, because we will withdraw
> * pgtable_t after this.
> */
> - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0);
> + pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
> }
>
>
> diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
> index c3a3bf1745b7..e29e4d5afa2d 100644
> --- a/arch/powerpc/platforms/cell/spufs/fault.c
> +++ b/arch/powerpc/platforms/cell/spufs/fault.c
You need to CC the spufs maintainer for this one.
> @@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx)
> /* we must not hold the lock when entering copro_handle_mm_fault */
> spu_release(ctx);
>
> - access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER);
> + access = (_PAGE_PRESENT | _PAGE_READ);
> access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL;
> local_irq_save(flags);
> ret = hash_page(ea, access, 0x300, dsisr);
> diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c
> index a3d5e1e16c21..23e37cd41e64 100644
> --- a/drivers/misc/cxl/fault.c
> +++ b/drivers/misc/cxl/fault.c
You need to CC both cxl driver maintainers for this one.
> @@ -152,8 +152,9 @@ static void cxl_handle_page_fault(struct cxl_context *ctx,
> access = _PAGE_PRESENT | _PAGE_READ;
> if (dsisr & CXL_PSL_DSISR_An_S)
> access |= _PAGE_WRITE;
> - if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
> - access |= _PAGE_USER;
> +
> + if (ctx->kernel && (dar & (1ULL << 63)))
> + access |= _PAGE_PRIVILEGED;
>
> if (dsisr & DSISR_NOHPTE)
> inv_flags |= HPTE_NOHPTE_UPDATE;
More information about the Linuxppc-dev
mailing list