[PATCH v11 1/9] powerpc/mm: Implement set_memory() routines

Christophe Leroy christophe.leroy at csgroup.eu
Thu Apr 29 17:32:33 AEST 2021



Le 29/04/2021 à 05:15, Jordan Niethe a écrit :
> From: Russell Currey <ruscur at russell.cc>
> 
> The set_memory_{ro/rw/nx/x}() functions are required for
> STRICT_MODULE_RWX, and are generally useful primitives to have.  This
> implementation is designed to be generic across powerpc's many MMUs.
> It's possible that this could be optimised to be faster for specific
> MMUs.
> 
> This implementation does not handle cases where the caller is attempting
> to change the mapping of the page it is executing from, or if another
> CPU is concurrently using the page being altered.  These cases likely
> shouldn't happen, but a more complex implementation with MMU-specific code
> could safely handle them.
> 
> On hash, the linear mapping is not kept in the linux pagetable, so this
> will not change the protection if used on that range. Currently these
> functions are not used on the linear map so just WARN for now.
> 
> Reviewed-by: Daniel Axtens <dja at axtens.net>
> Signed-off-by: Russell Currey <ruscur at russell.cc>
> Signed-off-by: Christophe Leroy <christophe.leroy at csgroup.eu>
> [jpn: - Allow set memory functions to be used without Strict RWX
>        - Hash: Disallow certain regions
>        - Have change_page_attr() take function pointers to manipulate ptes

Did you look at the resulting generated code ? I find it awful.

pte manipulation helpers are meant to be inlined. Here you force the compiler to outline them. This 
also means that the input and output goes through memory.

And now set_memory_xx are not tiny inlined functions anymore.

What is the reason you abandonned the way it was done up to now, through the use of an 'action' 
value ? With the previous approach the generated code was a lot lighter.

>        - Radix: Add ptesync after set_pte_at()]
> Signed-off-by: Jordan Niethe <jniethe5 at gmail.com>
> ---
> v10: WARN if trying to change the hash linear map
> v11: - Update copywrite dates
>       - Allow set memory functions to be used without Strict RWX
>       - Hash: Disallow certain regions and add comment explaining why
>       - Have change_page_attr() take function pointers to manipulate ptes
>       - Clarify change_page_attr()'s comment
>       - Radix: Add ptesync after set_pte_at()
> ---
>   arch/powerpc/Kconfig                  |   1 +
>   arch/powerpc/include/asm/set_memory.h |  10 +++
>   arch/powerpc/mm/Makefile              |   2 +-
>   arch/powerpc/mm/pageattr.c            | 105 ++++++++++++++++++++++++++
>   4 files changed, 117 insertions(+), 1 deletion(-)
>   create mode 100644 arch/powerpc/include/asm/set_memory.h
>   create mode 100644 arch/powerpc/mm/pageattr.c
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index cb2d44ee4e38..94c34932a74b 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -138,6 +138,7 @@ config PPC
>   	select ARCH_HAS_MEMBARRIER_CALLBACKS
>   	select ARCH_HAS_MEMBARRIER_SYNC_CORE
>   	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
> +	select ARCH_HAS_SET_MEMORY
>   	select ARCH_HAS_STRICT_KERNEL_RWX	if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
>   	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
>   	select ARCH_HAS_UACCESS_FLUSHCACHE
> diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
> new file mode 100644
> index 000000000000..d1cd69b1a43a
> --- /dev/null
> +++ b/arch/powerpc/include/asm/set_memory.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_POWERPC_SET_MEMORY_H
> +#define _ASM_POWERPC_SET_MEMORY_H
> +
> +int set_memory_ro(unsigned long addr, int numpages);
> +int set_memory_rw(unsigned long addr, int numpages);
> +int set_memory_nx(unsigned long addr, int numpages);
> +int set_memory_x(unsigned long addr, int numpages);
> +
> +#endif
> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
> index c3df3a8501d4..9142cf1fb0d5 100644
> --- a/arch/powerpc/mm/Makefile
> +++ b/arch/powerpc/mm/Makefile
> @@ -5,7 +5,7 @@
>   
>   ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
>   
> -obj-y				:= fault.o mem.o pgtable.o mmap.o maccess.o \
> +obj-y				:= fault.o mem.o pgtable.o mmap.o maccess.o pageattr.o \
>   				   init_$(BITS).o pgtable_$(BITS).o \
>   				   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
>   				   init-common.o mmu_context.o drmem.o \
> diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
> new file mode 100644
> index 000000000000..3b4aa72e555e
> --- /dev/null
> +++ b/arch/powerpc/mm/pageattr.c
> @@ -0,0 +1,105 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/*
> + * MMU-generic set_memory implementation for powerpc
> + *
> + * Copyright 2019-2021, IBM Corporation.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/set_memory.h>
> +
> +#include <asm/mmu.h>
> +#include <asm/page.h>
> +#include <asm/pgtable.h>
> +
> +
> +/*
> + * Updates the attributes of a page in three steps:
> + *
> + * 1. invalidate the page table entry
> + * 2. flush the TLB
> + * 3. install the new entry with the updated attributes
> + *
> + * Invalidating the pte means there are situations where this will not work
> + * when in theory it should.
> + * For example:
> + * - removing write from page whilst it is being executed
> + * - setting a page read-only whilst it is being read by another CPU
> + *
> + */
> +static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
> +{
> +	pte_t (*fn)(pte_t) = data;
> +	pte_t pte;
> +
> +	spin_lock(&init_mm.page_table_lock);
> +
> +	/* invalidate the PTE so it's safe to modify */
> +	pte = ptep_get_and_clear(&init_mm, addr, ptep);
> +	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
> +
> +	/* modify the PTE bits as desired, then apply */
> +	pte = fn(pte);
> +
> +	set_pte_at(&init_mm, addr, ptep, pte);
> +
> +	/* See ptesync comment in radix__set_pte_at() */
> +	if (radix_enabled())
> +		asm volatile("ptesync": : :"memory");
> +	spin_unlock(&init_mm.page_table_lock);
> +
> +	return 0;
> +}
> +
> +static int change_memory_attr(unsigned long addr, int numpages, pte_t (*fn)(pte_t))
> +{
> +	unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
> +	unsigned long size = numpages * PAGE_SIZE;
> +
> +	if (!numpages)
> +		return 0;
> +
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	/*
> +	 * On hash, the linear mapping is not in the Linux page table so
> +	 * apply_to_existing_page_range() will have no effect. If in the future
> +	 * the set_memory_* functions are used on the linear map this will need
> +	 * to be updated.
> +	 */
> +	if (!radix_enabled()) {
> +		int region = get_region_id(addr);
> +
> +		if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
> +			return -EINVAL;
> +	}
> +#endif
> +
> +	return apply_to_existing_page_range(&init_mm, start, size,
> +					    change_page_attr, fn);
> +}
> +
> +int set_memory_ro(unsigned long addr, int numpages)
> +{
> +	return change_memory_attr(addr, numpages, pte_wrprotect);
> +}
> +
> +static pte_t pte_mkdirtywrite(pte_t pte)
> +{
> +	return pte_mkwrite(pte_mkdirty(pte));
> +}
> +
> +int set_memory_rw(unsigned long addr, int numpages)
> +{
> +	return change_memory_attr(addr, numpages, pte_mkdirtywrite);
> +}
> +
> +int set_memory_nx(unsigned long addr, int numpages)
> +{
> +	return change_memory_attr(addr, numpages, pte_exprotect);
> +}
> +
> +int set_memory_x(unsigned long addr, int numpages)
> +{
> +	return change_memory_attr(addr, numpages, pte_mkexec);
> +}
> 


More information about the Linuxppc-dev mailing list