[PATCH v2 2/5] powerpc/32: Fix hugepage allocation on 8xx at hint address

Christophe LEROY christophe.leroy at c-s.fr
Fri Jan 19 19:49:24 AEDT 2018



Le 19/01/2018 à 09:26, Aneesh Kumar K.V a écrit :
> Christophe Leroy <christophe.leroy at c-s.fr> writes:
> 
>> On the 8xx, the page size is set in the PMD entry and applies to
>> all pages of the page table pointed by the said PMD entry.
>>
>> When an app has some regular pages allocated (e.g. see below) and tries
>> to mmap() a huge page at a hint address covered by the same PMD entry,
>> the kernel accepts the hint allthough the 8xx cannot handle different
>> page sizes in the same PMD entry.
>>
>> 10000000-10001000 r-xp 00000000 00:0f 2597 /root/malloc
>> 10010000-10011000 rwxp 00000000 00:0f 2597 /root/malloc
>>
>> mmap(0x10080000, 524288, PROT_READ|PROT_WRITE,
>>       MAP_PRIVATE|MAP_ANONYMOUS|0x40000, -1, 0) = 0x10080000
>>
>> This results the app remaining forever in do_page_fault()/hugetlb_fault()
>> and when interrupting that app, we get the following warning:
>>
>> [162980.035629] WARNING: CPU: 0 PID: 2777 at arch/powerpc/mm/hugetlbpage.c:354 hugetlb_free_pgd_range+0xc8/0x1e4
>> [162980.035699] CPU: 0 PID: 2777 Comm: malloc Tainted: G W       4.14.6 #85
>> [162980.035744] task: c67e2c00 task.stack: c668e000
>> [162980.035783] NIP:  c000fe18 LR: c00e1eec CTR: c00f90c0
>> [162980.035830] REGS: c668fc20 TRAP: 0700   Tainted: G W        (4.14.6)
>> [162980.035854] MSR:  00029032 <EE,ME,IR,DR,RI>  CR: 24044224 XER: 20000000
>> [162980.036003]
>> [162980.036003] GPR00: c00e1eec c668fcd0 c67e2c00 00000010 c6869410 10080000 00000000 77fb4000
>> [162980.036003] GPR08: ffff0001 0683c001 00000000 ffffff80 44028228 10018a34 00004008 418004fc
>> [162980.036003] GPR16: c668e000 00040100 c668e000 c06c0000 c668fe78 c668e000 c6835ba0 c668fd48
>> [162980.036003] GPR24: 00000000 73ffffff 74000000 00000001 77fb4000 100fffff 10100000 10100000
>> [162980.036743] NIP [c000fe18] hugetlb_free_pgd_range+0xc8/0x1e4
>> [162980.036839] LR [c00e1eec] free_pgtables+0x12c/0x150
>> [162980.036861] Call Trace:
>> [162980.036939] [c668fcd0] [c00f0774] unlink_anon_vmas+0x1c4/0x214 (unreliable)
>> [162980.037040] [c668fd10] [c00e1eec] free_pgtables+0x12c/0x150
>> [162980.037118] [c668fd40] [c00eabac] exit_mmap+0xe8/0x1b4
>> [162980.037210] [c668fda0] [c0019710] mmput.part.9+0x20/0xd8
>> [162980.037301] [c668fdb0] [c001ecb0] do_exit+0x1f0/0x93c
>> [162980.037386] [c668fe00] [c001f478] do_group_exit+0x40/0xcc
>> [162980.037479] [c668fe10] [c002a76c] get_signal+0x47c/0x614
>> [162980.037570] [c668fe70] [c0007840] do_signal+0x54/0x244
>> [162980.037654] [c668ff30] [c0007ae8] do_notify_resume+0x34/0x88
>> [162980.037744] [c668ff40] [c000dae8] do_user_signal+0x74/0xc4
>> [162980.037781] Instruction dump:
>> [162980.037821] 7fdff378 81370000 54a3463a 80890020 7d24182e 7c841a14 712a0004 4082ff94
>> [162980.038014] 2f890000 419e0010 712a0ff0 408200e0 <0fe00000> 54a9000a 7f984840 419d0094
>> [162980.038216] ---[ end trace c0ceeca8e7a5800a ]---
>> [162980.038754] BUG: non-zero nr_ptes on freeing mm: 1
>> [162985.363322] BUG: non-zero nr_ptes on freeing mm: -1
>>
>> In order to fix this, this patch uses the address space "slices"
>> implemented for BOOK3S/64 and enhanced to support PPC32 by the
>> preceding patch.
>>
>> This patch modifies the context.id on the 8xx to be in the range
>> [1:16] instead of [0:15] in order to identify context.id == 0 as
>> not initialised contexts as done on BOOK3S
>>
>> This patch activates CONFIG_PPC_MM_SLICES when CONFIG_HUGETLB_PAGE is
>> selected for the 8xx
>>
>> Alltough we could in theory have as many slices as PMD entries, the
>> current slices implementation limits the number of low slices to 16.
>> This limitation is not preventing us to fix the initial issue allthough
>> it is suboptimal. It will be cured in a subsequent patch.
>>
>> Fixes: 4b91428699477 ("powerpc/8xx: Implement support of hugepages")
>> Signed-off-by: Christophe Leroy <christophe.leroy at c-s.fr>
>> ---
>>   v2: First patch of v1 serie split in two parts
>>
>>   arch/powerpc/include/asm/mmu-8xx.h     | 6 ++++++
>>   arch/powerpc/kernel/setup-common.c     | 2 ++
>>   arch/powerpc/mm/8xx_mmu.c              | 2 +-
>>   arch/powerpc/mm/hugetlbpage.c          | 2 ++
>>   arch/powerpc/mm/mmu_context_nohash.c   | 4 ++--
>>   arch/powerpc/mm/slice.c                | 2 ++
>>   arch/powerpc/platforms/Kconfig.cputype | 1 +
>>   7 files changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h
>> index 5bb3dbede41a..5f89b6010453 100644
>> --- a/arch/powerpc/include/asm/mmu-8xx.h
>> +++ b/arch/powerpc/include/asm/mmu-8xx.h
>> @@ -169,6 +169,12 @@ typedef struct {
>>   	unsigned int id;
>>   	unsigned int active;
>>   	unsigned long vdso_base;
>> +#ifdef CONFIG_PPC_MM_SLICES
>> +	u16 user_psize;		/* page size index */
>> +	u64 low_slices_psize;	/* page size encodings */
>> +	unsigned char high_slices_psize[0];
>> +	unsigned long slb_addr_limit;
>> +#endif
>>   } mm_context_t;
>>   
>>   #define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
>> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
>> index 9d213542a48b..67075a1cff36 100644
>> --- a/arch/powerpc/kernel/setup-common.c
>> +++ b/arch/powerpc/kernel/setup-common.c
>> @@ -927,6 +927,8 @@ void __init setup_arch(char **cmdline_p)
>>   #ifdef CONFIG_PPC64
>>   	if (!radix_enabled())
>>   		init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
>> +#elif defined(CONFIG_PPC_8xx)
>> +	init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
>>   #else
>>   #error	"context.addr_limit not initialized."
>>   #endif
>> diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
>> index f29212e40f40..0be77709446c 100644
>> --- a/arch/powerpc/mm/8xx_mmu.c
>> +++ b/arch/powerpc/mm/8xx_mmu.c
>> @@ -192,7 +192,7 @@ void set_context(unsigned long id, pgd_t *pgd)
>>   	mtspr(SPRN_M_TW, __pa(pgd) - offset);
>>   
>>   	/* Update context */
>> -	mtspr(SPRN_M_CASID, id);
>> +	mtspr(SPRN_M_CASID, id - 1);
>>   	/* sync */
>>   	mb();
>>   }
>> diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
>> index a9b9083c5e49..79e1378ee303 100644
>> --- a/arch/powerpc/mm/hugetlbpage.c
>> +++ b/arch/powerpc/mm/hugetlbpage.c
>> @@ -553,9 +553,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
>>   	struct hstate *hstate = hstate_file(file);
>>   	int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
>>   
>> +#ifdef CONFIG_PPC_RADIX_MMU
>>   	if (radix_enabled())
>>   		return radix__hugetlb_get_unmapped_area(file, addr, len,
>>   						       pgoff, flags);
>> +#endif
> 
> if (0) didn't remove the following radix__hugetlb_get_unmapped_area for
> you?
> 

No

   CC      arch/powerpc/mm/hugetlbpage.o
arch/powerpc/mm/hugetlbpage.c: In function ‘hugetlb_get_unmapped_area’:
arch/powerpc/mm/hugetlbpage.c:558:10: error: implicit declaration of 
function ‘radix__hugetlb_get_unmapped_area’ 
[-Werror=implicit-function-declaration]
    return radix__hugetlb_get_unmapped_area(file, addr, len,
           ^
cc1: all warnings being treated as errors
make[1]: *** [arch/powerpc/mm/hugetlbpage.o] Error 1


Christophe

> 
>>   	return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
>>   }
>>   #endif
>> diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
>> index 42e02f5b6660..c1e1bf186871 100644
>> --- a/arch/powerpc/mm/mmu_context_nohash.c
>> +++ b/arch/powerpc/mm/mmu_context_nohash.c
>> @@ -435,8 +435,8 @@ void __init mmu_context_init(void)
>>   	 *      -- BenH
>>   	 */
>>   	if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
>> -		first_context = 0;
>> -		last_context = 15;
>> +		first_context = 1;
>> +		last_context = 16;
>>   		no_selective_tlbil = true;
>>   	} else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
>>   		first_context = 1;
>> diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
>> index 3f35a93afe13..b617acf35836 100644
>> --- a/arch/powerpc/mm/slice.c
>> +++ b/arch/powerpc/mm/slice.c
>> @@ -206,6 +206,7 @@ static int slice_check_fit(struct mm_struct *mm,
>>   
>>   static void slice_flush_segments(void *parm)
>>   {
>> +#ifdef CONFIG_PPC_BOOK3S_64
>>   	struct mm_struct *mm = parm;
>>   	unsigned long flags;
>>   
>> @@ -217,6 +218,7 @@ static void slice_flush_segments(void *parm)
>>   	local_irq_save(flags);
>>   	slb_flush_and_rebolt();
>>   	local_irq_restore(flags);
>> +#endif
>>   }
>>   
>>   static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
>> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
>> index ae07470fde3c..73a7ea333e9e 100644
>> --- a/arch/powerpc/platforms/Kconfig.cputype
>> +++ b/arch/powerpc/platforms/Kconfig.cputype
>> @@ -334,6 +334,7 @@ config PPC_BOOK3E_MMU
>>   config PPC_MM_SLICES
>>   	bool
>>   	default y if PPC_BOOK3S_64
>> +	default y if PPC_8xx && HUGETLB_PAGE
>>   	default n
>>   
>>   config PPC_HAVE_PMU_SUPPORT
>> -- 
>> 2.13.3


More information about the Linuxppc-dev mailing list