[PATCH v2] powerpc/64s/radix: Enable huge vmalloc mappings

Christophe Leroy christophe.leroy at csgroup.eu
Mon May 3 03:11:20 AEST 2021



Le 02/05/2021 à 13:00, Nicholas Piggin a écrit :
> This reduces TLB misses by nearly 30x on a `git diff` workload on a
> 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
> to vfs hashes being allocated with 2MB pages.
> 
> Acked-by: Michael Ellerman <mpe at ellerman.id.au>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> Since v1:
> - Don't define MODULES_VADDR which has some other side effect (e.g.,
>    ptdump).
> - Fixed (hopefully) kbuild warning.
> - Keep __vmalloc_node_range call on 3 lines.
> 
>   .../admin-guide/kernel-parameters.txt          |  2 ++
>   arch/powerpc/Kconfig                           |  1 +
>   arch/powerpc/kernel/module.c                   | 18 +++++++++++++-----
>   3 files changed, 16 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 1c0a3cf6fcc9..1be38b25c485 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -3250,6 +3250,8 @@
>   
>   	nohugeiomap	[KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
>   
> +	nohugevmalloc	[PPC] Disable kernel huge vmalloc mappings.
> +
>   	nosmt		[KNL,S390] Disable symmetric multithreading (SMT).
>   			Equivalent to smt=1.
>   
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 1e6230bea09d..c547a9d6a2dd 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -185,6 +185,7 @@ config PPC
>   	select GENERIC_VDSO_TIME_NS
>   	select HAVE_ARCH_AUDITSYSCALL
>   	select HAVE_ARCH_HUGE_VMAP		if PPC_BOOK3S_64 && PPC_RADIX_MMU
> +	select HAVE_ARCH_HUGE_VMALLOC		if HAVE_ARCH_HUGE_VMAP
>   	select HAVE_ARCH_JUMP_LABEL
>   	select HAVE_ARCH_JUMP_LABEL_RELATIVE
>   	select HAVE_ARCH_KASAN			if PPC32 && PPC_PAGE_SHIFT <= 14
> diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
> index fab84024650c..ea1fa55a6897 100644
> --- a/arch/powerpc/kernel/module.c
> +++ b/arch/powerpc/kernel/module.c
> @@ -8,6 +8,7 @@
>   #include <linux/moduleloader.h>
>   #include <linux/err.h>
>   #include <linux/vmalloc.h>
> +#include <linux/mm.h>
>   #include <linux/bug.h>
>   #include <asm/module.h>
>   #include <linux/uaccess.h>
> @@ -88,17 +89,22 @@ int module_finalize(const Elf_Ehdr *hdr,
>   	return 0;
>   }
>   
> -#ifdef MODULES_VADDR
>   static __always_inline void *
>   __module_alloc(unsigned long size, unsigned long start, unsigned long end)
>   {
> -	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
> -				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
> -				    __builtin_return_address(0));
> +	/*
> +	 * Don't do huge page allocations for modules yet until more testing
> +	 * is done. STRICT_MODULE_RWX may require extra work to support this
> +	 * too.
> +	 */
> +	return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
> +				    VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
> +				    NUMA_NO_NODE, __builtin_return_address(0));
>   }
>   
>   void *module_alloc(unsigned long size)
>   {
> +#ifdef CONFIG_PPC32

What then happens to PPC32 platforms that doesn't define MODULES_VADDR, for instance 4xx or booke ?

I think it should be:

#ifdef MODULES_VADDR

>   	unsigned long limit = (unsigned long)_etext - SZ_32M;
>   	void *ptr = NULL;
>   
> @@ -112,5 +118,7 @@ void *module_alloc(unsigned long size)
>   		ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
>   
>   	return ptr;
> -}
> +#else
> +	return __module_alloc(size, VMALLOC_START, VMALLOC_END);
>   #endif
> +}
> 


More information about the Linuxppc-dev mailing list