[PATCH 2/2] radix/kfence: support late __kfence_pool allocation
Ritesh Harjani (IBM)
ritesh.list at gmail.com
Wed May 1 17:03:21 AEST 2024
Hari Bathini <hbathini at linux.ibm.com> writes:
> With commit b33f778bba5ef ("kfence: alloc kfence_pool after system
> startup"), KFENCE pool can be allocated after system startup via the
> page allocator. This can lead to problems as all memory is not mapped
> at page granularity anymore with CONFIG_KFENCE. Address this by direct
> mapping all memory at PMD level and split the mapping for PMD pages
> that overlap with __kfence_pool to page level granularity if and when
> __kfence_pool is allocated after system startup.
>
> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> ---
> arch/powerpc/include/asm/book3s/64/radix.h | 2 +
> arch/powerpc/include/asm/kfence.h | 14 +++++-
> arch/powerpc/mm/book3s64/radix_pgtable.c | 50 +++++++++++++++++++++-
> 3 files changed, 64 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
> index 8f55ff74bb68..0423ddbcf73c 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -340,6 +340,8 @@ extern void radix__vmemmap_remove_mapping(unsigned long start,
> extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
> pgprot_t flags, unsigned int psz);
>
> +extern bool radix_kfence_init_pool(void);
> +
> static inline unsigned long radix__get_tree_size(void)
> {
> unsigned long rts_field;
> diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
> index 18ec2b06ba1e..c5d2fb2f9ecb 100644
> --- a/arch/powerpc/include/asm/kfence.h
> +++ b/arch/powerpc/include/asm/kfence.h
> @@ -18,12 +18,24 @@
>
> #ifdef CONFIG_KFENCE
> extern bool kfence_early_init;
> -#endif
> +
> +static inline bool kfence_alloc_pool_late(void)
> +{
> + return !kfence_early_init;
> +}
Minor nit, but do we need kfence_alloc_pool_late()?
The function name looks confusing. Can we not just use
!kfence_early_init? If not then maybe bool kfence_late_init?
>
> static inline bool arch_kfence_init_pool(void)
> {
> +#ifdef CONFIG_PPC_BOOK3S_64
> + if (radix_enabled())
> + return radix_kfence_init_pool();
Can we directly check...
if (radix_enabled() && !kfence_early_init)
... instead of embedding the check inside radix_kfence_late_init_pool()
> +#endif
> +
> return true;
> }
> +#else
> +static inline bool kfence_alloc_pool_late(void) { return false; }
> +#endif
>
> #ifdef CONFIG_PPC64
> static inline bool kfence_protect_page(unsigned long addr, bool protect)
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index fccbf92f279b..f4374e3e31e1 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -253,6 +253,53 @@ void radix__mark_initmem_nx(void)
> }
> #endif /* CONFIG_STRICT_KERNEL_RWX */
>
> +#ifdef CONFIG_KFENCE
> +static inline int radix_split_pmd_page(pmd_t *pmd, unsigned long addr)
> +{
> + pte_t *pte = pte_alloc_one_kernel(&init_mm);
> + unsigned long pfn = PFN_DOWN(__pa(addr));
Minor nit. Since addr will always be page aligned, so maybe PHYS_PFN() is better
suited. Although it does not matter.
> + int i;
> +
> + if (!pte)
> + return -ENOMEM;
> +
> + for (i = 0; i < PTRS_PER_PTE; i++) {
> + __set_pte_at(&init_mm, addr, pte + i, pfn_pte(pfn + i, PAGE_KERNEL), 0);
> + asm volatile("ptesync": : :"memory");
> + }
Maybe a comment above the loop on why __set_pte_at() is ok for late
kfence init? and why not pte_update()? [1]
[1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r.fsf@linux.ibm.com/
> + pmd_populate_kernel(&init_mm, pmd, pte);
> +
> + flush_tlb_kernel_range(addr, addr + PMD_SIZE);
> + return 0;
> +}
> +
> +bool radix_kfence_init_pool(void)
> +{
> + unsigned int page_psize, pmd_psize;
> + unsigned long addr;
> + pmd_t *pmd;
> +
> + if (!kfence_alloc_pool_late())
> + return true;
> +
> + page_psize = shift_to_mmu_psize(PAGE_SHIFT);
> + pmd_psize = shift_to_mmu_psize(PMD_SHIFT);
> + for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
> + addr += PAGE_SIZE) {
> + pmd = pmd_off_k(addr);
> +
> + if (pmd_leaf(*pmd)) {
> + if (radix_split_pmd_page(pmd, addr & PMD_MASK))
> + return false;
> + update_page_count(pmd_psize, -1);
> + update_page_count(page_psize, PTRS_PER_PTE);
> + }
> + }
> +
> + return true;
> +}
> +#endif
> +
> static inline void __meminit
> print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
> {
> @@ -391,7 +438,8 @@ static void __init radix_init_pgtable(void)
> continue;
> }
>
> - WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL, ~0UL));
> + WARN_ON(create_physical_mapping(start, end, -1, PAGE_KERNEL,
> + kfence_alloc_pool_late() ? PMD_SIZE : ~0UL));
So everytime we have !kfence_early_init to true, we always use PMD_SIZE.
So do we never map 1G mapping for direct map?
> }
>
> #ifdef CONFIG_KFENCE
> --
> 2.44.0
More information about the Linuxppc-dev
mailing list