[RFC PATCH 11/11] powerpc/64s/radix: allocate kernel page tables node-local if possible
Nicholas Piggin
npiggin at gmail.com
Sat Jul 22 11:17:41 AEST 2017
Try to allocate kernel page tables according to the node of
the memory they will map.
---
arch/powerpc/include/asm/book3s/64/hash.h | 2 +-
arch/powerpc/include/asm/book3s/64/radix.h | 2 +-
arch/powerpc/include/asm/sparsemem.h | 2 +-
arch/powerpc/kernel/setup_64.c | 3 +
arch/powerpc/mm/hash_utils_64.c | 2 +-
arch/powerpc/mm/mem.c | 4 +-
arch/powerpc/mm/numa.c | 9 +-
arch/powerpc/mm/pgtable-book3s64.c | 6 +-
arch/powerpc/mm/pgtable-radix.c | 177 +++++++++++++++++++----------
9 files changed, 136 insertions(+), 71 deletions(-)
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 0ce513f2926f..99ca49b0a801 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -199,7 +199,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
extern void hash__vmemmap_remove_mapping(unsigned long start,
unsigned long page_size);
-int hash__create_section_mapping(unsigned long start, unsigned long end);
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid);
int hash__remove_section_mapping(unsigned long start, unsigned long end);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 487709ff6875..d9770ce79ebf 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -315,7 +315,7 @@ static inline unsigned long radix__get_tree_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int radix__create_section_mapping(unsigned long start, unsigned long end);
+int radix__create_section_mapping(unsigned long start, unsigned long end, int nid);
int radix__remove_section_mapping(unsigned long start, unsigned long end);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index c88930c9db7f..5411557d7c1f 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -16,7 +16,7 @@
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end);
+extern int create_section_mapping(unsigned long start, unsigned long end, int nid);
extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 5c89b771ac81..5520fad59cf4 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -269,6 +269,7 @@ static void cpu_ready_for_interrupts(void)
* device-tree is not accessible via normal means at this point.
*/
+void __init early_initmem_init(void);
void __init early_setup(unsigned long dt_ptr)
{
static __initdata struct paca_struct boot_paca;
@@ -313,6 +314,8 @@ void __init early_setup(unsigned long dt_ptr)
apply_feature_fixups();
setup_feature_keys();
+ early_initmem_init();
+
/* Initialize the hash table or TLB handling */
early_init_mmu();
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d3da19cc4867..97bfb356d91d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -777,7 +777,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
}
}
-int hash__create_section_mapping(unsigned long start, unsigned long end)
+int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
int rc = htab_bolt_mapping(start, end, __pa(start),
pgprot_val(PAGE_KERNEL), mmu_linear_psize,
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 8541f18694a4..0542b5f48123 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -117,7 +117,7 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
-int __weak create_section_mapping(unsigned long start, unsigned long end)
+int __weak create_section_mapping(unsigned long start, unsigned long end, int nid)
{
return -ENODEV;
}
@@ -136,7 +136,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
resize_hpt_for_hotplug(memblock_phys_mem_size());
start = (unsigned long)__va(start);
- rc = create_section_mapping(start, start + size);
+ rc = create_section_mapping(start, start + size, nid);
if (rc) {
pr_warning(
"Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n",
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 55e3fa5fcfb0..4660cf5da6d3 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -892,6 +892,12 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
+void __init early_initmem_init(void)
+{
+ if (parse_numa_properties())
+ setup_nonnuma();
+}
+
void __init initmem_init(void)
{
int nid, cpu;
@@ -899,9 +905,6 @@ void __init initmem_init(void)
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
max_pfn = max_low_pfn;
- if (parse_numa_properties())
- setup_nonnuma();
-
memblock_dump_all();
/*
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index 31eed8fa8e99..390e378a0c57 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -130,12 +130,12 @@ void mmu_cleanup_all(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
+int create_section_mapping(unsigned long start, unsigned long end, int nid)
{
if (radix_enabled())
- return radix__create_section_mapping(start, end);
+ return radix__create_section_mapping(start, end, nid);
- return hash__create_section_mapping(start, end);
+ return hash__create_section_mapping(start, end, nid);
}
int remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 897655ed067e..97a3c9c4ea5b 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -34,79 +34,126 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz
return 0;
}
-static __ref void *early_alloc_pgtable(unsigned long size)
+static __ref void *early_alloc_pgtable(unsigned long size, int nid)
{
+ unsigned long pa = 0;
void *pt;
- pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE));
+ if (nid != -1) {
+ pa = memblock_alloc_base_nid(size, size,
+ MEMBLOCK_ALLOC_ANYWHERE,
+ nid, MEMBLOCK_NONE);
+ }
+
+ if (!pa)
+ pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE);
+
+ pt = __va(pa);
memset(pt, 0, size);
return pt;
}
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid);
+
int radix__map_kernel_page(unsigned long ea, unsigned long pa,
pgprot_t flags,
unsigned int map_page_size)
{
+ unsigned long pfn = pa >> PAGE_SHIFT;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
pte_t *ptep;
+
/*
* Make sure task size is correct as per the max adddr
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
- if (slab_is_available()) {
- pgdp = pgd_offset_k(ea);
- pudp = pud_alloc(&init_mm, pgdp, ea);
- if (!pudp)
- return -ENOMEM;
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- pmdp = pmd_alloc(&init_mm, pudp, ea);
- if (!pmdp)
- return -ENOMEM;
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- ptep = pte_alloc_kernel(pmdp, ea);
- if (!ptep)
- return -ENOMEM;
- } else {
- pgdp = pgd_offset_k(ea);
- if (pgd_none(*pgdp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
- pgd_populate(&init_mm, pgdp, pudp);
- }
- pudp = pud_offset(pgdp, ea);
- if (map_page_size == PUD_SIZE) {
- ptep = (pte_t *)pudp;
- goto set_the_pte;
- }
- if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
- pud_populate(&init_mm, pudp, pmdp);
- }
- pmdp = pmd_offset(pudp, ea);
- if (map_page_size == PMD_SIZE) {
- ptep = pmdp_ptep(pmdp);
- goto set_the_pte;
- }
- if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
- ptep = pte_offset_kernel(pmdp, ea);
+ if (unlikely(!slab_is_available()))
+ return early_map_kernel_page(ea, pa, flags, map_page_size, -1);
+
+ /*
+ * Should make page table allocation functions be able to take a
+ * node, so we can place kernel page tables on the right nodes after
+ * boot.
+ */
+ pgdp = pgd_offset_k(ea);
+ pudp = pud_alloc(&init_mm, pgdp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
}
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
set_the_pte:
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags));
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+ smp_wmb();
+ return 0;
+}
+
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ pgd_t *pgdp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ /*
+ * Make sure task size is correct as per the max adddr
+ */
+ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+ if (slab_is_available())
+ return radix__map_kernel_page(ea, pa, flags, map_page_size);
+
+ pgdp = pgd_offset_k(ea);
+ if (pgd_none(*pgdp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid);
+ BUG_ON(pudp == NULL);
+ pgd_populate(&init_mm, pgdp, pudp);
+ }
+ pudp = pud_offset(pgdp, ea);
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid);
+ BUG_ON(pmdp == NULL);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
+ }
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE, nid);
+ BUG_ON(ptep == NULL);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
smp_wmb();
return 0;
}
@@ -165,7 +212,8 @@ static inline void __meminit print_mapping(unsigned long start,
}
static int __meminit create_physical_mapping(unsigned long start,
- unsigned long end)
+ unsigned long end,
+ int nid)
{
unsigned long vaddr, addr, mapping_size = 0;
pgprot_t prot;
@@ -221,7 +269,7 @@ static int __meminit create_physical_mapping(unsigned long start,
else
prot = PAGE_KERNEL;
- rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size);
+ rc = early_map_kernel_page(vaddr, addr, prot, mapping_size, nid);
if (rc)
return rc;
}
@@ -230,7 +278,7 @@ static int __meminit create_physical_mapping(unsigned long start,
return 0;
}
-static void __init radix_init_pgtable(void)
+void __init radix_init_pgtable(void)
{
unsigned long rts_field;
struct memblock_region *reg;
@@ -240,15 +288,17 @@ static void __init radix_init_pgtable(void)
/*
* Create the linear mapping, using standard page size for now
*/
- for_each_memblock(memory, reg)
+ for_each_memblock(memory, reg) {
WARN_ON(create_physical_mapping(reg->base,
- reg->base + reg->size));
+ reg->base + reg->size,
+ reg->nid));
+ }
/*
* Allocate Partition table and process table for the
* host.
*/
BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large.");
- process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
+ process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1);
/*
* Fill in the process table.
*/
@@ -722,9 +772,9 @@ static void remove_pagetable(unsigned long start, unsigned long end)
radix__flush_tlb_kernel_range(start, end);
}
-int __ref radix__create_section_mapping(unsigned long start, unsigned long end)
+int __ref radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- return create_physical_mapping(start, end);
+ return create_physical_mapping(start, end, nid);
}
int radix__remove_section_mapping(unsigned long start, unsigned long end)
@@ -741,8 +791,17 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
{
/* Create a PTE encoding */
unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW;
+ int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+ int ret;
+
+ if (!slab_is_available())
+ ret = early_map_kernel_page(start, phys, __pgprot(flags),
+ page_size, nid);
+ else
+ ret = radix__map_kernel_page(start, phys, __pgprot(flags),
+ page_size);
+ BUG_ON(ret);
- BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size));
return 0;
}
--
2.11.0
More information about the Linuxppc-dev
mailing list