[PATCH v2 6/6] mm/memory_hotplug: Pass nid instead of zone to __remove_pages()

Robin Murphy robin.murphy at arm.com
Tue Aug 27 20:49:13 AEST 2019


On 26/08/2019 11:10, David Hildenbrand wrote:
> The zone parameter is no longer in use. Replace it with the nid, which
> we can now use the nid to limit the number of zones we have to process
> (vie for_each_zone_nid()). The function signature of __remove_pages() now
> looks much more similar to the one of __add_pages().

FWIW I recall this being trivially easy to hit when first playing with 
hotremove development for arm64 - since we only have 3 zones, the page 
flags poison would cause page_zone() to dereference past the end of 
node_zones[] and go all kinds of wrong. This looks like a definite 
improvement in API terms.

For arm64,

Acked-by: Robin Murphy <robin.murphy at arm.com>

Cheers,
Robin.

> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
> Cc: Tony Luck <tony.luck at intel.com>
> Cc: Fenghua Yu <fenghua.yu at intel.com>
> Cc: Benjamin Herrenschmidt <benh at kernel.crashing.org>
> Cc: Paul Mackerras <paulus at samba.org>
> Cc: Michael Ellerman <mpe at ellerman.id.au>
> Cc: Heiko Carstens <heiko.carstens at de.ibm.com>
> Cc: Vasily Gorbik <gor at linux.ibm.com>
> Cc: Christian Borntraeger <borntraeger at de.ibm.com>
> Cc: Yoshinori Sato <ysato at users.sourceforge.jp>
> Cc: Rich Felker <dalias at libc.org>
> Cc: Dave Hansen <dave.hansen at linux.intel.com>
> Cc: Andy Lutomirski <luto at kernel.org>
> Cc: Peter Zijlstra <peterz at infradead.org>
> Cc: Thomas Gleixner <tglx at linutronix.de>
> Cc: Ingo Molnar <mingo at redhat.com>
> Cc: Borislav Petkov <bp at alien8.de>
> Cc: "H. Peter Anvin" <hpa at zytor.com>
> Cc: x86 at kernel.org
> Cc: Andrew Morton <akpm at linux-foundation.org>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: Steve Capper <steve.capper at arm.com>
> Cc: Mike Rapoport <rppt at linux.ibm.com>
> Cc: Anshuman Khandual <anshuman.khandual at arm.com>
> Cc: Yu Zhao <yuzhao at google.com>
> Cc: Jun Yao <yaojun8558363 at gmail.com>
> Cc: Robin Murphy <robin.murphy at arm.com>
> Cc: Michal Hocko <mhocko at suse.com>
> Cc: Oscar Salvador <osalvador at suse.de>
> Cc: "Matthew Wilcox (Oracle)" <willy at infradead.org>
> Cc: Christophe Leroy <christophe.leroy at c-s.fr>
> Cc: "Aneesh Kumar K.V" <aneesh.kumar at linux.ibm.com>
> Cc: Pavel Tatashin <pasha.tatashin at soleen.com>
> Cc: Gerald Schaefer <gerald.schaefer at de.ibm.com>
> Cc: Halil Pasic <pasic at linux.ibm.com>
> Cc: Tom Lendacky <thomas.lendacky at amd.com>
> Cc: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
> Cc: Masahiro Yamada <yamada.masahiro at socionext.com>
> Cc: Dan Williams <dan.j.williams at intel.com>
> Cc: Wei Yang <richard.weiyang at gmail.com>
> Cc: Qian Cai <cai at lca.pw>
> Cc: Jason Gunthorpe <jgg at ziepe.ca>
> Cc: Logan Gunthorpe <logang at deltatee.com>
> Cc: Ira Weiny <ira.weiny at intel.com>
> Cc: linux-arm-kernel at lists.infradead.org
> Cc: linux-ia64 at vger.kernel.org
> Cc: linuxppc-dev at lists.ozlabs.org
> Cc: linux-s390 at vger.kernel.org
> Cc: linux-sh at vger.kernel.org
> Signed-off-by: David Hildenbrand <david at redhat.com>
> ---
>   arch/arm64/mm/mmu.c            |  4 +---
>   arch/ia64/mm/init.c            |  4 +---
>   arch/powerpc/mm/mem.c          |  3 +--
>   arch/s390/mm/init.c            |  4 +---
>   arch/sh/mm/init.c              |  4 +---
>   arch/x86/mm/init_32.c          |  4 +---
>   arch/x86/mm/init_64.c          |  4 +---
>   include/linux/memory_hotplug.h |  2 +-
>   mm/memory_hotplug.c            | 17 +++++++++--------
>   mm/memremap.c                  |  3 +--
>   10 files changed, 18 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index e67bab4d613e..9a2d388314f3 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -1080,7 +1080,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct zone *zone;
>   
>   	/*
>   	 * FIXME: Cleanup page tables (also in arch_add_memory() in case
> @@ -1089,7 +1088,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
>   	 * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
>   	 * unlocked yet.
>   	 */
> -	zone = page_zone(pfn_to_page(start_pfn));
> -	__remove_pages(zone, start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   }
>   #endif
> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
> index bf9df2625bc8..ae6a3e718aa0 100644
> --- a/arch/ia64/mm/init.c
> +++ b/arch/ia64/mm/init.c
> @@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct zone *zone;
>   
> -	zone = page_zone(pfn_to_page(start_pfn));
> -	__remove_pages(zone, start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   }
>   #endif
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index 9191a66b3bc5..af21e13529ce 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -130,10 +130,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
>   	int ret;
>   
> -	__remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   
>   	/* Remove htab bolted mappings for this section of memory */
>   	start = (unsigned long)__va(start);
> diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
> index 20340a03ad90..2a7373ed6ded 100644
> --- a/arch/s390/mm/init.c
> +++ b/arch/s390/mm/init.c
> @@ -296,10 +296,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct zone *zone;
>   
> -	zone = page_zone(pfn_to_page(start_pfn));
> -	__remove_pages(zone, start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   	vmem_remove_mapping(start, size);
>   }
>   #endif /* CONFIG_MEMORY_HOTPLUG */
> diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
> index dfdbaa50946e..32441b59297d 100644
> --- a/arch/sh/mm/init.c
> +++ b/arch/sh/mm/init.c
> @@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = PFN_DOWN(start);
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct zone *zone;
>   
> -	zone = page_zone(pfn_to_page(start_pfn));
> -	__remove_pages(zone, start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   }
>   #endif /* CONFIG_MEMORY_HOTPLUG */
> diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
> index 4068abb9427f..2760e4bfbc56 100644
> --- a/arch/x86/mm/init_32.c
> +++ b/arch/x86/mm/init_32.c
> @@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct zone *zone;
>   
> -	zone = page_zone(pfn_to_page(start_pfn));
> -	__remove_pages(zone, start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   }
>   #endif
>   
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index a6b5c653727b..99d92297f1cf 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
>   {
>   	unsigned long start_pfn = start >> PAGE_SHIFT;
>   	unsigned long nr_pages = size >> PAGE_SHIFT;
> -	struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
> -	struct zone *zone = page_zone(page);
>   
> -	__remove_pages(zone, start_pfn, nr_pages, altmap);
> +	__remove_pages(nid, start_pfn, nr_pages, altmap);
>   	kernel_physical_mapping_remove(start, start + size);
>   }
>   #endif /* CONFIG_MEMORY_HOTPLUG */
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index f46ea71b4ffd..c5b38e7dc8aa 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -125,7 +125,7 @@ static inline bool movable_node_is_enabled(void)
>   
>   extern void arch_remove_memory(int nid, u64 start, u64 size,
>   			       struct vmem_altmap *altmap);
> -extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
> +extern void __remove_pages(int nid, unsigned long start_pfn,
>   			   unsigned long nr_pages, struct vmem_altmap *altmap);
>   
>   /* reasonably generic interface to expand the physical pages */
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index e88c96cf9d77..49ca3364eb70 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -514,7 +514,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
>   	pgdat_resize_unlock(zone->zone_pgdat, &flags);
>   }
>   
> -static void __remove_section(unsigned long pfn, unsigned long nr_pages,
> +static void __remove_section(int nid, unsigned long pfn, unsigned long nr_pages,
>   			     unsigned long map_offset,
>   			     struct vmem_altmap *altmap)
>   {
> @@ -525,14 +525,14 @@ static void __remove_section(unsigned long pfn, unsigned long nr_pages,
>   		return;
>   
>   	/* TODO: move zone handling out of memory removal path */
> -	for_each_zone(zone)
> +	for_each_zone_nid(zone, nid)
>   		__remove_zone(zone, pfn, nr_pages);
>   	sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
>   }
>   
>   /**
>    * __remove_pages() - remove sections of pages from a zone
> - * @zone: zone from which pages need to be removed
> + * @nid: the nid all pages were added to
>    * @pfn: starting pageframe (must be aligned to start of a section)
>    * @nr_pages: number of pages to remove (must be multiple of section size)
>    * @altmap: alternative device page map or %NULL if default memmap is used
> @@ -542,12 +542,13 @@ static void __remove_section(unsigned long pfn, unsigned long nr_pages,
>    * sure that pages are marked reserved and zones are adjust properly by
>    * calling offline_pages().
>    */
> -void __remove_pages(struct zone *zone, unsigned long pfn,
> -		    unsigned long nr_pages, struct vmem_altmap *altmap)
> +void __remove_pages(int nid, unsigned long pfn, unsigned long nr_pages,
> +		    struct vmem_altmap *altmap)
>   {
>   	const unsigned long end_pfn = pfn + nr_pages;
>   	unsigned long cur_nr_pages;
>   	unsigned long map_offset = 0;
> +	struct zone *zone;
>   
>   	if (check_pfn_span(pfn, nr_pages, "remove"))
>   		return;
> @@ -555,7 +556,7 @@ void __remove_pages(struct zone *zone, unsigned long pfn,
>   	map_offset = vmem_altmap_offset(altmap);
>   
>   	/* TODO: move zone handling out of memory removal path */
> -	for_each_zone(zone)
> +	for_each_zone_nid(zone, nid)
>   		if (zone_intersects(zone, pfn, nr_pages))
>   			clear_zone_contiguous(zone);
>   
> @@ -563,12 +564,12 @@ void __remove_pages(struct zone *zone, unsigned long pfn,
>   		cond_resched();
>   		/* Select all remaining pages up to the next section boundary */
>   		cur_nr_pages = min(end_pfn - pfn, -(pfn | PAGE_SECTION_MASK));
> -		__remove_section(pfn, cur_nr_pages, map_offset, altmap);
> +		__remove_section(nid, pfn, cur_nr_pages, map_offset, altmap);
>   		map_offset = 0;
>   	}
>   
>   	/* TODO: move zone handling out of memory removal path */
> -	for_each_zone(zone)
> +	for_each_zone_nid(zone, nid)
>   		set_zone_contiguous(zone);
>   }
>   
> diff --git a/mm/memremap.c b/mm/memremap.c
> index 8a394552b5bd..292ef4c6b447 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -138,8 +138,7 @@ static void devm_memremap_pages_release(void *data)
>   	mem_hotplug_begin();
>   	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
>   		pfn = PHYS_PFN(res->start);
> -		__remove_pages(page_zone(pfn_to_page(pfn)), pfn,
> -				 PHYS_PFN(resource_size(res)), NULL);
> +		__remove_pages(nid, pfn, PHYS_PFN(resource_size(res)), NULL);
>   	} else {
>   		arch_remove_memory(nid, res->start, resource_size(res),
>   				pgmap_altmap(pgmap));
> 


More information about the Linuxppc-dev mailing list