[PATCH] powerpc/mm: Refactor the floor/ceiling check in hugetlb range freeing functions

Qian Cai qcai at redhat.com
Sat Dec 12 03:17:41 AEDT 2020


On Fri, 2020-11-06 at 13:20 +0000, Christophe Leroy wrote:
> All hugetlb range freeing functions have a verification like the following,
> which only differs by the mask used, depending on the page table level.
> 
> 	start &= MASK;
> 	if (start < floor)
> 		return;
> 	if (ceiling) {
> 		ceiling &= MASK;
> 		if (! ceiling)
> 			return;
> 		}
> 	if (end - 1 > ceiling - 1)
> 		return;
> 
> Refactor that into a helper function which takes the mask as
> an argument, returning true when [start;end[ is not fully
> contained inside [floor;ceiling[
> 
> Signed-off-by: Christophe Leroy <christophe.leroy at csgroup.eu>
> ---
>  arch/powerpc/mm/hugetlbpage.c | 56 ++++++++++++-----------------------
>  1 file changed, 19 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> index 36c3800769fb..f8d8a4988e15 100644
> --- a/arch/powerpc/mm/hugetlbpage.c
> +++ b/arch/powerpc/mm/hugetlbpage.c
> @@ -294,6 +294,21 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
>  static inline void hugepd_free(struct mmu_gather *tlb, void *hugepte) {}
>  #endif
>  
> +/* Return true when the entry to be freed maps more than the area being freed */
> +static bool range_is_outside_limits(unsigned long start, unsigned long end,
> +				    unsigned long floor, unsigned long ceiling,
> +				    unsigned long mask)
> +{
> +	if ((start & mask) < floor)
> +		return true;
> +	if (ceiling) {
> +		ceiling &= mask;
> +		if (!ceiling)
> +			return true;
> +	}
> +	return end - 1 > ceiling - 1;
> +}
> +
>  static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
>  			      unsigned long start, unsigned long end,
>  			      unsigned long floor, unsigned long ceiling)
> @@ -309,15 +324,7 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
>  	if (shift > pdshift)
>  		num_hugepd = 1 << (shift - pdshift);
>  
> -	start &= pdmask;
> -	if (start < floor)
> -		return;
> -	if (ceiling) {
> -		ceiling &= pdmask;
> -		if (! ceiling)
> -			return;
> -	}
> -	if (end - 1 > ceiling - 1)
> +	if (range_is_outside_limits(start, end, floor, ceiling, pdmask))
>  		return;
>  
>  	for (i = 0; i < num_hugepd; i++, hpdp++)
> @@ -334,18 +341,9 @@ static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
>  				   unsigned long addr, unsigned long end,
>  				   unsigned long floor, unsigned long ceiling)
>  {
> -	unsigned long start = addr;
>  	pgtable_t token = pmd_pgtable(*pmd);
>  
> -	start &= PMD_MASK;
> -	if (start < floor)
> -		return;
> -	if (ceiling) {
> -		ceiling &= PMD_MASK;
> -		if (!ceiling)
> -			return;
> -	}
> -	if (end - 1 > ceiling - 1)
> +	if (range_is_outside_limits(addr, end, floor, ceiling, PMD_MASK))
>  		return;
>  
>  	pmd_clear(pmd);
> @@ -395,15 +393,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
>  				  addr, next, floor, ceiling);
>  	} while (addr = next, addr != end);
>  
> -	start &= PUD_MASK;
> -	if (start < floor)
> -		return;
> -	if (ceiling) {
> -		ceiling &= PUD_MASK;
> -		if (!ceiling)
> -			return;
> -	}
> -	if (end - 1 > ceiling - 1)
> +	if (range_is_outside_limits(start, end, floor, ceiling, PUD_MASK))
>  		return;
>  
>  	pmd = pmd_offset(pud, start);
> @@ -446,15 +436,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, p4d_t *p4d,
>  		}
>  	} while (addr = next, addr != end);
>  
> -	start &= PGDIR_MASK;
> -	if (start < floor)
> -		return;
> -	if (ceiling) {
> -		ceiling &= PGDIR_MASK;
> -		if (!ceiling)
> -			return;
> -	}
> -	if (end - 1 > ceiling - 1)
> +	if (range_is_outside_limits(start, end, floor, ceiling, PGDIR_MASK))
>  		return;
>  
>  	pud = pud_offset(p4d, start);

Well, "start" is still in use in hugetlb_free_pmd_range() and
hugetlb_free_pud_range() after range_is_outside_limits(), but after this patch,
"start" is not longer has the bitmask, i.e., "no &=".

Anyway, reverting this commit from today's linux-next fixed a crash on POWE9 NV.

# runltp -f hugetlb
[ 7703.114640][T58070] LTP: starting hugemmap05_1 (hugemmap05 -m)
[ 7703.157792][   C99] ------------[ cut here ]------------
[ 7703.158279][   C99] kernel BUG at arch/powerpc/mm/book3s64/pgtable.c:387!
[ 7703.158306][   C99] Oops: Exception in kernel mode, sig: 5 [#1]
[ 7703.158330][   C99] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=256 NUMA PowerNV
[ 7703.158343][   C99] Modules linked in: vfio_pci vfio_virqfd vfio_iommu_spapr_tce vfio vfio_spapr_eeh loop kvm_hv kvm ip_tables x_tables sd_mod ahci libahci tg3 libata firmware_class libphy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: dummy_del_mod]
[ 7703.158435][   C99] CPU: 99 PID: 308 Comm: ksoftirqd/99 Tainted: G           O      5.10.0-rc7-next-20201211 #1
[ 7703.158464][   C99] NIP:  c00000000005dbec LR: c0000000003352f4 CTR: 0000000000000000
[ 7703.158489][   C99] REGS: c00020000bb6f830 TRAP: 0700   Tainted: G           O       (5.10.0-rc7-next-20201211)
[ 7703.158528][   C99] MSR:  900000000282b033 <SF,HV,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 24002284  XER: 20040000
[ 7703.158570][   C99] GPR00: c0000000003352f4 c00020000bb6fad0 c000000007f70b00 c0002000385b3ff0 
[ 7703.158570][   C99] GPR04: 0000000000000000 0000000000000003 c00020000bb6f8b4 0000000000000001 
[ 7703.158570][   C99] GPR08: 0000000000000001 0000000000000009 0000000000000008 0000000000000002 
[ 7703.158570][   C99] GPR12: 0000000024002488 c000201fff649c00 c000000007f2a20c 0000000000000000 
[ 7703.158570][   C99] GPR16: 0000000000000007 0000000000000000 c000000000194d10 c000000000194d10 
[ 7703.158570][   C99] GPR24: 0000000000000014 0000000000000015 c000201cc6e72398 c000000007fac4b4 
[ 7703.158570][   C99] GPR28: c000000007f2bf80 c000000007fac2f8 0000000000000008 c000200033870000 
[ 7703.158766][   C99] NIP [c00000000005dbec] __tlb_remove_table+0x1dc/0x1e0
pgtable_free at arch/powerpc/mm/book3s64/pgtable.c:387
(inlined by) __tlb_remove_table at arch/powerpc/mm/book3s64/pgtable.c:405
[ 7703.158805][   C99] LR [c0000000003352f4] tlb_remove_table_rcu+0x54/0xa0
[ 7703.158853][   C99] Call Trace:
[ 7703.158872][   C99] [c00020000bb6fad0] [c00000000005db4c] __tlb_remove_table+0x13c/0x1e0 (unreliable)
[ 7703.158890][   C99] [c00020000bb6fb00] [c0000000003352f4] tlb_remove_table_rcu+0x54/0xa0
__tlb_remove_table_free at mm/mmu_gather.c:101
(inlined by) tlb_remove_table_rcu at mm/mmu_gather.c:156
[ 7703.158927][   C99] [c00020000bb6fb30] [c000000000194d7c] rcu_core+0x35c/0xbb0
rcu_do_batch at kernel/rcu/tree.c:2502
(inlined by) rcu_core at kernel/rcu/tree.c:2737
[ 7703.158966][   C99] [c00020000bb6fbf0] [c00000000095a3d0] __do_softirq+0x480/0x704
[ 7703.159006][   C99] [c00020000bb6fd10] [c0000000000cc1f4] run_ksoftirqd+0x74/0xd0
run_ksoftirqd at kernel/softirq.c:651
(inlined by) run_ksoftirqd at kernel/softirq.c:642
[ 7703.159046][   C99] [c00020000bb6fd30] [c0000000001040c8] smpboot_thread_fn+0x278/0x320
[ 7703.159096][   C99] [c00020000bb6fda0] [c0000000000fc8a4] kthread+0x1c4/0x1d0
[ 7703.159145][   C99] [c00020000bb6fe10] [c00000000000d9fc] ret_from_kernel_thread+0x5c/0x80
[ 7703.159183][   C99] Instruction dump:
[ 7703.159204][   C99] 60000000 7c0802a6 3c82f8b4 7fe3fb78 38847470 f8010040 482b4fc5 60000000 
[ 7703.159248][   C99] 0fe00000 7c0802a6 fbe10028 f8010040 <0fe00000> 3c4c07f1 38422f10 7c0802a6 
[ 7703.159293][   C99] ---[ end trace 1d92a5231ba6a0d5 ]---



More information about the Linuxppc-dev mailing list