[PATCH 1/2] radix/kfence: map __kfence_pool at page granularity

Ritesh Harjani (IBM) ritesh.list at gmail.com
Wed May 1 15:45:48 AEST 2024


Hari Bathini <hbathini at linux.ibm.com> writes:

> When KFENCE is enabled, total system memory is mapped at page level
> granularity. But in radix MMU mode, ~3GB additional memory is needed
> to map 100GB of system memory at page level granularity when compared
> to using 2MB direct mapping. This is not desired considering KFENCE is
> designed to be enabled in production kernels [1]. Also, mapping memory
> allocated for KFENCE pool at page granularity seems sufficient enough
> to enable KFENCE support. So, allocate __kfence_pool during bootup and
> map it at page granularity instead of mapping all system memory at
> page granularity.
>
> Without patch:
>     # cat /proc/meminfo
>     MemTotal:       101201920 kB
>
> With patch:
>     # cat /proc/meminfo
>     MemTotal:       104483904 kB
>
> All kfence_test.c testcases passed with this patch.
>
> [1] https://lore.kernel.org/all/20201103175841.3495947-2-elver@google.com/
>
> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> ---
>  arch/powerpc/include/asm/kfence.h        |  5 ++++
>  arch/powerpc/mm/book3s64/radix_pgtable.c | 34 ++++++++++++++++++------
>  arch/powerpc/mm/init_64.c                | 14 ++++++++++

New at this. But the patch looked interesting, hence my review comments.

>  3 files changed, 45 insertions(+), 8 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 424ceef82ae6..18ec2b06ba1e 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -8,6 +8,7 @@
>  #ifndef __ASM_POWERPC_KFENCE_H
>  #define __ASM_POWERPC_KFENCE_H
>  
> +#include <linux/kfence.h>
>  #include <linux/mm.h>
>  #include <asm/pgtable.h>
>  
> @@ -15,6 +16,10 @@
>  #define ARCH_FUNC_PREFIX "."
>  #endif
>  
> +#ifdef CONFIG_KFENCE
> +extern bool kfence_early_init;
> +#endif
> +
>  static inline bool arch_kfence_init_pool(void)
>  {
>  	return true;

Shouldn't we return false for !kfence_early_init?
Because otherwise, this patch may break the late init case which your
next patch is fixing, and maybe git bisect will break?


> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 15e88f1439ec..fccbf92f279b 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -31,6 +31,7 @@
>  #include <asm/uaccess.h>
>  #include <asm/ultravisor.h>
>  #include <asm/set_memory.h>
> +#include <asm/kfence.h>
>  
>  #include <trace/events/thp.h>
>  
> @@ -291,9 +292,8 @@ static unsigned long next_boundary(unsigned long addr, unsigned long end)
>  	return end;
>  }
>  
> -static int __meminit create_physical_mapping(unsigned long start,
> -					     unsigned long end,
> -					     int nid, pgprot_t _prot)
> +static int __meminit create_physical_mapping(unsigned long start, unsigned long end, int nid,
> +					     pgprot_t _prot, unsigned long mapping_sz_limit)

lines over 80 chars.

>  {
>  	unsigned long vaddr, addr, mapping_size = 0;
>  	bool prev_exec, exec = false;
> @@ -301,7 +301,10 @@ static int __meminit create_physical_mapping(unsigned long start,
>  	int psize;
>  	unsigned long max_mapping_size = memory_block_size;
>  
> -	if (debug_pagealloc_enabled_or_kfence())
> +	if (mapping_sz_limit < max_mapping_size)
> +		max_mapping_size = mapping_sz_limit;
> +
> +	if (debug_pagealloc_enabled())
>  		max_mapping_size = PAGE_SIZE;
>  
>  	start = ALIGN(start, PAGE_SIZE);
> @@ -358,6 +361,7 @@ static int __meminit create_physical_mapping(unsigned long start,
>  
>  static void __init radix_init_pgtable(void)
>  {
> +	phys_addr_t kfence_pool __maybe_unused;
>  	unsigned long rts_field;
>  	phys_addr_t start, end;
>  	u64 i;
> @@ -365,6 +369,13 @@ static void __init radix_init_pgtable(void)
>  	/* We don't support slb for radix */
>  	slb_set_size(0);
>  
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);

What if memblock_phys_alloc() failed? error handling?
> +		memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +	}
> +#endif
> +

Instead of #ifdef CONFIG_KFENCE in the function,
maybe we can define radix_kfence_alloc_pool()? Then we won't need
__maybe_unused too.

>  	/*
>  	 * Create the linear mapping
>  	 */
> @@ -380,10 +391,18 @@ static void __init radix_init_pgtable(void)
>  			continue;
>  		}
>  
> -		WARN_ON(create_physical_mapping(start, end,
> -						-1, PAGE_KERNEL));
> +		WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
>  	}
>  
> +#ifdef CONFIG_KFENCE
> +	if (kfence_early_init) {
> +		create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE, -1,
> +					PAGE_KERNEL, PAGE_SIZE);

Even this can return an error. Maybe WARN_ON_ONCE()? or disabling kfence
for an error?

> +		memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +		__kfence_pool = __va(kfence_pool);
> +	}
> +#endif
> +

This #ifdef can be called as radix_kfence_map_pool() then?


>  	if (!cpu_has_feature(CPU_FTR_HVMODE) &&
>  			cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
>  		/*
> @@ -874,8 +893,7 @@ int __meminit radix__create_section_mapping(unsigned long start,
>  		return -1;
>  	}
>  
> -	return create_physical_mapping(__pa(start), __pa(end),
> -				       nid, prot);
> +	return create_physical_mapping(__pa(start), __pa(end), nid, prot, ~0UL);
>  }
>  
>  int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index d96bbc001e73..8155bfd6c16b 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -64,6 +64,20 @@
>  
>  #include <mm/mmu_decl.h>
>  
> +#ifdef CONFIG_KFENCE
> +bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
> +
> +static int __init parse_kfence_early_init(char *arg)
> +{
> +	int val;
> +
> +	if (get_option(&arg, &val))
> +		kfence_early_init = !!val;
> +	return 0;
> +}
> +early_param("kfence.sample_interval", parse_kfence_early_init);
> +#endif
> +
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP
>  /*
>   * Given an address within the vmemmap, determine the page that
> -- 
> 2.44.0


More information about the Linuxppc-dev mailing list