[PATCH 2/2] powerpc/64s/radix: ioremap use huge page mappings
Christophe Leroy
christophe.leroy at c-s.fr
Fri Jun 7 16:56:57 AEST 2019
Le 07/06/2019 à 08:19, Nicholas Piggin a écrit :
> powerpc/64s does not use ioremap_page_range, so it does not get huge
> vmap iomap mappings automatically. The radix kernel mapping function
> already allows larger page mappings that work with huge vmap, so wire
> that up to allow huge pages to be used for ioremap mappings.
Argh ... I was on the way to merge pgtable_64.c and pgtable_32.c. This
will complicate the task ... Anyway this looks a good improvment.
Any reason to limit that to Radix ?
>
> Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
> ---
> arch/powerpc/include/asm/book3s/64/pgtable.h | 8 +++
> arch/powerpc/mm/pgtable_64.c | 58 ++++++++++++++++++--
> include/linux/io.h | 1 +
> lib/ioremap.c | 2 +-
> 4 files changed, 62 insertions(+), 7 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index ccf00a8b98c6..d7a4f2d80598 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
> #define VMALLOC_START __vmalloc_start
> #define VMALLOC_END __vmalloc_end
>
> +static inline unsigned int ioremap_max_order(void)
> +{
> + if (radix_enabled())
> + return PUD_SHIFT;
> + return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
> +}
> +#define IOREMAP_MAX_ORDER ({ ioremap_max_order();})
Following form doesn't work ?
#define IOREMAP_MAX_ORDER ioremap_max_order()
> +
> extern unsigned long __kernel_virt_start;
> extern unsigned long __kernel_virt_size;
> extern unsigned long __kernel_io_start;
> diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
> index d2d976ff8a0e..cf02b67eee55 100644
> --- a/arch/powerpc/mm/pgtable_64.c
> +++ b/arch/powerpc/mm/pgtable_64.c
> @@ -112,7 +112,7 @@ unsigned long ioremap_bot = IOREMAP_BASE;
> * __ioremap_at - Low level function to establish the page tables
> * for an IO mapping
> */
> -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> +static void __iomem * hash__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
Is this the correct name ?
As far as I understand, this function will be used by nohash/64, looks
strange to call hash__something() a function used by nohash platforms.
> {
> unsigned long i;
>
> @@ -120,6 +120,54 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
> if (pgprot_val(prot) & H_PAGE_4K_PFN)
> return NULL;
>
> + for (i = 0; i < size; i += PAGE_SIZE)
> + if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
> + return NULL;
> +
> + return (void __iomem *)ea;
> +}
> +
> +static int radix__ioremap_page_range(unsigned long addr, unsigned long end,
> + phys_addr_t phys_addr, pgprot_t prot)
> +{
> + while (addr != end) {
> + if (unlikely(ioremap_huge_disabled))
> + goto use_small_page;
I don't like too much a goto in the middle of an if/else set inside a loop.
Couldn't we have two while() loops, one for the !ioremap_huge_disabled()
and one for the ioremap_huge_disabled() case ? It would duplicate some
code but that's only 3 small lines.
Or, when ioremap_huge_disabled(), couldn't it just fallback to the
hash__ioremap_at() function ?
> +
> + if (!(addr & ~PUD_MASK) && !(phys_addr & ~PUD_MASK) &&
> + end - addr >= PUD_SIZE) {
> + if (radix__map_kernel_page(addr, phys_addr, prot, PUD_SIZE))
> + return -ENOMEM;
> + addr += PUD_SIZE;
> + phys_addr += PUD_SIZE;
> +
> + } else if (!(addr & ~PMD_MASK) && !(phys_addr & ~PMD_MASK) &&
> + end - addr >= PMD_SIZE) {
> + if (radix__map_kernel_page(addr, phys_addr, prot, PMD_SIZE))
> + return -ENOMEM;
> + addr += PMD_SIZE;
> + phys_addr += PMD_SIZE;
> +
> + } else {
> +use_small_page:
> + if (radix__map_kernel_page(addr, phys_addr, prot, PAGE_SIZE))
> + return -ENOMEM;
> + addr += PAGE_SIZE;
> + phys_addr += PAGE_SIZE;
> + }
> + }
> + return 0;
> +}
> +
> +static void __iomem * radix__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> +{
> + if (radix__ioremap_page_range((unsigned long)ea, (unsigned long)ea + size, pa, prot))
> + return NULL;
> + return ea;
> +}
> +
> +void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
> +{
> if ((ea + size) >= (void *)IOREMAP_END) {
> pr_warn("Outside the supported range\n");
> return NULL;
> @@ -129,11 +177,9 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
> WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
> WARN_ON(size & ~PAGE_MASK);
>
> - for (i = 0; i < size; i += PAGE_SIZE)
> - if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
> - return NULL;
> -
> - return (void __iomem *)ea;
> + if (radix_enabled())
What about if (radix_enabled() && !ioremap_huge_disabled()) instead ?
> + return radix__ioremap_at(pa, ea, size, prot);
> + return hash__ioremap_at(pa, ea, size, prot);
Can't we just leave the no radix stuff here instead of making that
hash__ioremap_at() function ?
Christophe
> }
>
> /**
> diff --git a/include/linux/io.h b/include/linux/io.h
> index 32e30e8fb9db..423c4294aaa3 100644
> --- a/include/linux/io.h
> +++ b/include/linux/io.h
> @@ -44,6 +44,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
> #endif
>
> #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> +extern int ioremap_huge_disabled;
> void __init ioremap_huge_init(void);
> int arch_ioremap_pud_supported(void);
> int arch_ioremap_pmd_supported(void);
> diff --git a/lib/ioremap.c b/lib/ioremap.c
> index 063213685563..386ff956755f 100644
> --- a/lib/ioremap.c
> +++ b/lib/ioremap.c
> @@ -18,7 +18,7 @@
> static int __read_mostly ioremap_p4d_capable;
> static int __read_mostly ioremap_pud_capable;
> static int __read_mostly ioremap_pmd_capable;
> -static int __read_mostly ioremap_huge_disabled;
> +int __read_mostly ioremap_huge_disabled;
>
> static int __init set_nohugeiomap(char *str)
> {
>
More information about the Linuxppc-dev
mailing list