[PATCH v7 5/9] powerpc/pseries: flush SLB contents on SLB MCE errors.

Mahesh Jagannath Salgaonkar mahesh at linux.vnet.ibm.com
Fri Aug 10 20:30:11 AEST 2018


On 08/07/2018 10:24 PM, Michal Suchánek wrote:
> Hello,
> 
> 
> On Tue, 07 Aug 2018 19:47:14 +0530
> "Mahesh J Salgaonkar" <mahesh at linux.vnet.ibm.com> wrote:
> 
>> From: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
>>
>> On pseries, as of today system crashes if we get a machine check
>> exceptions due to SLB errors. These are soft errors and can be fixed
>> by flushing the SLBs so the kernel can continue to function instead of
>> system crash. We do this in real mode before turning on MMU. Otherwise
>> we would run into nested machine checks. This patch now fetches the
>> rtas error log in real mode and flushes the SLBs on SLB errors.
>>
>> Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
>> Signed-off-by: Michal Suchanek <msuchanek at suse.com>
>> ---
>>
>> Changes in V7:
>> - Fold Michal's patch into this patch.
>> - Handle MSR_RI=0 and evil context case in MC handler.
>> ---
>>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
>>  arch/powerpc/include/asm/machdep.h            |    1 
>>  arch/powerpc/kernel/exceptions-64s.S          |  112
>> +++++++++++++++++++++++++
>> arch/powerpc/kernel/mce.c                     |   15 +++
>> arch/powerpc/mm/slb.c                         |    6 +
>> arch/powerpc/platforms/powernv/setup.c        |   11 ++
>> arch/powerpc/platforms/pseries/pseries.h      |    1
>> arch/powerpc/platforms/pseries/ras.c          |   51 +++++++++++
>> arch/powerpc/platforms/pseries/setup.c        |    1 9 files changed,
>> 195 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
>> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index
>> 50ed64fba4ae..cc00a7088cf3 100644 ---
>> a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++
>> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -487,6 +487,7 @@
>> extern void hpte_init_native(void); 
>>  extern void slb_initialize(void);
>>  extern void slb_flush_and_rebolt(void);
>> +extern void slb_flush_and_rebolt_realmode(void);
>>  
>>  extern void slb_vmalloc_update(void);
>>  extern void slb_set_size(u16 size);
>> diff --git a/arch/powerpc/include/asm/machdep.h
>> b/arch/powerpc/include/asm/machdep.h index a47de82fb8e2..b4831f1338db
>> 100644 --- a/arch/powerpc/include/asm/machdep.h
>> +++ b/arch/powerpc/include/asm/machdep.h
>> @@ -108,6 +108,7 @@ struct machdep_calls {
>>  
>>  	/* Early exception handlers called in realmode */
>>  	int		(*hmi_exception_early)(struct pt_regs
>> *regs);
>> +	long		(*machine_check_early)(struct pt_regs
>> *regs); 
>>  	/* Called during machine check exception to retrive fixup
>> address. */ bool		(*mce_check_early_recovery)(struct
>> pt_regs *regs); diff --git a/arch/powerpc/kernel/exceptions-64s.S
>> b/arch/powerpc/kernel/exceptions-64s.S index
>> 285c6465324a..cb06f219570a 100644 ---
>> a/arch/powerpc/kernel/exceptions-64s.S +++
>> b/arch/powerpc/kernel/exceptions-64s.S @@ -332,6 +332,9 @@
>> TRAMP_REAL_BEGIN(machine_check_pSeries) machine_check_fwnmi:
>>  	SET_SCRATCH0(r13)		/* save r13 */
>>  	EXCEPTION_PROLOG_0(PACA_EXMC)
>> +BEGIN_FTR_SECTION
>> +	b	machine_check_pSeries_early
>> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>  machine_check_pSeries_0:
>>  	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>>  	/*
>> @@ -343,6 +346,90 @@ machine_check_pSeries_0:
>>  
>>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>>  
>> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>> +BEGIN_FTR_SECTION
>> +	EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> +	mr	r10,r1			/* Save r1 */
>> +	ld	r1,PACAMCEMERGSP(r13)	/* Use MC emergency
>> stack */
>> +	subi	r1,r1,INT_FRAME_SIZE	/* alloc stack
>> frame		*/
>> +	mfspr	r11,SPRN_SRR0		/* Save SRR0 */
>> +	mfspr	r12,SPRN_SRR1		/* Save SRR1 */
>> +	EXCEPTION_PROLOG_COMMON_1()
>> +	EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>> +	EXCEPTION_PROLOG_COMMON_3(0x200)
>> +	addi	r3,r1,STACK_FRAME_OVERHEAD
>> +	BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI
>> */
>> +	ld	r12,_MSR(r1)
>> +	andi.	r11,r12,MSR_PR		/* See if coming
>> from user. */
>> +	bne	2f			/* continue in V mode
>> if we are. */ +
>> +	/*
>> +	 * At this point we are not sure about what context we come
>> from.
>> +	 * We may be in the middle of swithing stack. r1 may not be
>> valid.
>> +	 * Hence stay on emergency stack, call
>> machine_check_exception and
>> +	 * return from the interrupt.
>> +	 * But before that, check if this is an un-recoverable
>> exception.
>> +	 * If yes, then stay on emergency stack and panic.
>> +	 */
>> +	andi.	r11,r12,MSR_RI
>> +	bne	1f
>> +
>> +	/*
>> +	 * Check if we have successfully handled/recovered from
>> error, if not
>> +	 * then stay on emergency stack and panic.
>> +	 */
>> +	cmpdi	r3,0		/* see if we handled MCE
>> successfully */
>> +	bne	1f		/* if handled then return from
>> interrupt */ +
>> +	LOAD_HANDLER(r10,unrecover_mce)
>> +	mtspr	SPRN_SRR0,r10
>> +	ld	r10,PACAKMSR(r13)
>> +	/*
>> +	 * We are going down. But there are chances that we might
>> get hit by
>> +	 * another MCE during panic path and we may run into
>> unstable state
>> +	 * with no way out. Hence, turn ME bit off while going down,
>> so that
>> +	 * when another MCE is hit during panic path, hypervisor will
>> +	 * power cycle the lpar, instead of getting into MCE loop.
>> +	 */
>> +	li	r3,MSR_ME
>> +	andc	r10,r10,r3		/* Turn off MSR_ME */
>> +	mtspr	SPRN_SRR1,r10
>> +	RFI_TO_KERNEL
>> +	b	.
>> +
>> +	/* Stay on emergency stack and return from interrupt. */
>> +1:	LOAD_HANDLER(r10,mce_return)
>> +	mtspr	SPRN_SRR0,r10
>> +	ld	r10,PACAKMSR(r13)
>> +	mtspr	SPRN_SRR1,r10
>> +	RFI_TO_KERNEL
>> +	b	.
> 
> I think that the logic should be inverted here. That is we should check
> for unrecoverable and unhandled exceptions and jump to unrecov_mce if
> found, fallthrough to mce_return otherwise.

sure. will make that change in next revision.

Thanks,
-Mahesh.

> 
> Thanks
> 
> Michal
> 
> 
>> +
>> +	/* Move original SRR0 and SRR1 into the respective regs */
>> +2:	ld	r9,_MSR(r1)
>> +	mtspr	SPRN_SRR1,r9
>> +	ld	r3,_NIP(r1)
>> +	mtspr	SPRN_SRR0,r3
>> +	ld	r9,_CTR(r1)
>> +	mtctr	r9
>> +	ld	r9,_XER(r1)
>> +	mtxer	r9
>> +	ld	r9,_LINK(r1)
>> +	mtlr	r9
>> +	REST_GPR(0, r1)
>> +	REST_8GPRS(2, r1)
>> +	REST_GPR(10, r1)
>> +	ld	r11,_CCR(r1)
>> +	mtcr	r11
>> +	REST_GPR(11, r1)
>> +	REST_2GPRS(12, r1)
>> +	/* restore original r1. */
>> +	ld	r1,GPR1(r1)
>> +	SET_SCRATCH0(r13)		/* save r13 */
>> +	EXCEPTION_PROLOG_0(PACA_EXMC)
>> +	b	machine_check_pSeries_0
>> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>> +
>>  EXC_COMMON_BEGIN(machine_check_common)
>>  	/*
>>  	 * Machine check is different because we use a different
>> @@ -536,6 +623,31 @@ EXC_COMMON_BEGIN(unrecover_mce)
>>  	bl	unrecoverable_exception
>>  	b	1b
>>  
>> +EXC_COMMON_BEGIN(mce_return)
>> +	/* Invoke machine_check_exception to print MCE event and
>> return. */
>> +	addi	r3,r1,STACK_FRAME_OVERHEAD
>> +	bl	machine_check_exception
>> +	ld	r9,_MSR(r1)
>> +	mtspr	SPRN_SRR1,r9
>> +	ld	r3,_NIP(r1)
>> +	mtspr	SPRN_SRR0,r3
>> +	ld	r9,_CTR(r1)
>> +	mtctr	r9
>> +	ld	r9,_XER(r1)
>> +	mtxer	r9
>> +	ld	r9,_LINK(r1)
>> +	mtlr	r9
>> +	REST_GPR(0, r1)
>> +	REST_8GPRS(2, r1)
>> +	REST_GPR(10, r1)
>> +	ld	r11,_CCR(r1)
>> +	mtcr	r11
>> +	REST_GPR(11, r1)
>> +	REST_2GPRS(12, r1)
>> +	/* restore original r1. */
>> +	ld	r1,GPR1(r1)
>> +	RFI_TO_KERNEL
>> +	b	.
>>  
>>  EXC_REAL(data_access, 0x300, 0x80)
>>  EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index efdd16a79075..ae17d8aa60c4 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -488,10 +488,19 @@ long machine_check_early(struct pt_regs *regs)
>>  {
>>  	long handled = 0;
>>  
>> -	__this_cpu_inc(irq_stat.mce_exceptions);
>> +	/*
>> +	 * For pSeries we count mce when we go into virtual mode
>> machine
>> +	 * check handler. Hence skip it. Also, We can't access per
>> cpu
>> +	 * variables in real mode for LPAR.
>> +	 */
>> +	if (early_cpu_has_feature(CPU_FTR_HVMODE))
>> +		__this_cpu_inc(irq_stat.mce_exceptions);
>>  
>> -	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>> -		handled = cur_cpu_spec->machine_check_early(regs);
>> +	/*
>> +	 * See if platform is capable of handling machine check.
>> +	 */
>> +	if (ppc_md.machine_check_early)
>> +		handled = ppc_md.machine_check_early(regs);
>>  	return handled;
>>  }
>>  
>> diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
>> index cb796724a6fc..e89f675f1b5e 100644
>> --- a/arch/powerpc/mm/slb.c
>> +++ b/arch/powerpc/mm/slb.c
>> @@ -145,6 +145,12 @@ void slb_flush_and_rebolt(void)
>>  	get_paca()->slb_cache_ptr = 0;
>>  }
>>  
>> +void slb_flush_and_rebolt_realmode(void)
>> +{
>> +	__slb_flush_and_rebolt();
>> +	get_paca()->slb_cache_ptr = 0;
>> +}
>> +
>>  void slb_vmalloc_update(void)
>>  {
>>  	unsigned long vflags;
>> diff --git a/arch/powerpc/platforms/powernv/setup.c
>> b/arch/powerpc/platforms/powernv/setup.c index
>> f96df0a25d05..b74c93bc2e55 100644 ---
>> a/arch/powerpc/platforms/powernv/setup.c +++
>> b/arch/powerpc/platforms/powernv/setup.c @@ -431,6 +431,16 @@ static
>> unsigned long pnv_get_proc_freq(unsigned int cpu) return ret_freq;
>>  }
>>  
>> +static long pnv_machine_check_early(struct pt_regs *regs)
>> +{
>> +	long handled = 0;
>> +
>> +	if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
>> +		handled = cur_cpu_spec->machine_check_early(regs);
>> +
>> +	return handled;
>> +}
>> +
>>  define_machine(powernv) {
>>  	.name			= "PowerNV",
>>  	.probe			= pnv_probe,
>> @@ -442,6 +452,7 @@ define_machine(powernv) {
>>  	.machine_shutdown	= pnv_shutdown,
>>  	.power_save             = NULL,
>>  	.calibrate_decr		= generic_calibrate_decr,
>> +	.machine_check_early	= pnv_machine_check_early,
>>  #ifdef CONFIG_KEXEC_CORE
>>  	.kexec_cpu_down		= pnv_kexec_cpu_down,
>>  #endif
>> diff --git a/arch/powerpc/platforms/pseries/pseries.h
>> b/arch/powerpc/platforms/pseries/pseries.h index
>> 60db2ee511fb..ec2a5f61d4a4 100644 ---
>> a/arch/powerpc/platforms/pseries/pseries.h +++
>> b/arch/powerpc/platforms/pseries/pseries.h @@ -24,6 +24,7 @@ struct
>> pt_regs; 
>>  extern int pSeries_system_reset_exception(struct pt_regs *regs);
>>  extern int pSeries_machine_check_exception(struct pt_regs *regs);
>> +extern long pSeries_machine_check_realmode(struct pt_regs *regs);
>>  
>>  #ifdef CONFIG_SMP
>>  extern void smp_init_pseries(void);
>> diff --git a/arch/powerpc/platforms/pseries/ras.c
>> b/arch/powerpc/platforms/pseries/ras.c index
>> 851ce326874a..e4420f7c8fda 100644 ---
>> a/arch/powerpc/platforms/pseries/ras.c +++
>> b/arch/powerpc/platforms/pseries/ras.c @@ -427,6 +427,35 @@ int
>> pSeries_system_reset_exception(struct pt_regs *regs) return 0; /*
>> need to perform reset */ }
>>  
>> +static int mce_handle_error(struct rtas_error_log *errp)
>> +{
>> +	struct pseries_errorlog *pseries_log;
>> +	struct pseries_mc_errorlog *mce_log;
>> +	int disposition = rtas_error_disposition(errp);
>> +	uint8_t error_type;
>> +
>> +	if (!rtas_error_extended(errp))
>> +		goto out;
>> +
>> +	pseries_log = get_pseries_errorlog(errp,
>> PSERIES_ELOG_SECT_ID_MCE);
>> +	if (pseries_log == NULL)
>> +		goto out;
>> +
>> +	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
>> +	error_type = rtas_mc_error_type(mce_log);
>> +
>> +	if ((disposition == RTAS_DISP_NOT_RECOVERED) &&
>> +			(error_type == PSERIES_MC_ERROR_TYPE_SLB)) {
>> +		/* Store the old slb content someplace. */
>> +		slb_flush_and_rebolt_realmode();
>> +		disposition = RTAS_DISP_FULLY_RECOVERED;
>> +		rtas_set_disposition_recovered(errp);
>> +	}
>> +
>> +out:
>> +	return disposition;
>> +}
>> +
>>  /*
>>   * Process MCE rtas errlog event.
>>   */
>> @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct
>> pt_regs *regs) struct rtas_error_log *errp;
>>  
>>  	if (fwnmi_active) {
>> -		errp = fwnmi_get_errinfo(regs);
>>  		fwnmi_release_errinfo();
>> +		errp = fwnmi_get_errlog();
>>  		if (errp && recover_mce(regs, errp))
>>  			return 1;
>>  	}
>>  
>>  	return 0;
>>  }
>> +
>> +long pSeries_machine_check_realmode(struct pt_regs *regs)
>> +{
>> +	struct rtas_error_log *errp;
>> +	int disposition;
>> +
>> +	if (fwnmi_active) {
>> +		errp = fwnmi_get_errinfo(regs);
>> +		/*
>> +		 * Call to fwnmi_release_errinfo() in real mode
>> causes kernel
>> +		 * to panic. Hence we will call it as soon as we go
>> into
>> +		 * virtual mode.
>> +		 */
>> +		disposition = mce_handle_error(errp);
>> +		if (disposition == RTAS_DISP_FULLY_RECOVERED)
>> +			return 1;
>> +	}
>> +
>> +	return 0;
>> +}
>> diff --git a/arch/powerpc/platforms/pseries/setup.c
>> b/arch/powerpc/platforms/pseries/setup.c index
>> b42087cd8c6b..7a9421d089d8 100644 ---
>> a/arch/powerpc/platforms/pseries/setup.c +++
>> b/arch/powerpc/platforms/pseries/setup.c @@ -1000,6 +1000,7 @@
>> define_machine(pseries) { .calibrate_decr		=
>> generic_calibrate_decr, .progress		= rtas_progress,
>>  	.system_reset_exception = pSeries_system_reset_exception,
>> +	.machine_check_early	= pSeries_machine_check_realmode,
>>  	.machine_check_exception = pSeries_machine_check_exception,
>>  #ifdef CONFIG_KEXEC_CORE
>>  	.machine_kexec          = pSeries_machine_kexec,
>>
>>
> 



More information about the Linuxppc-dev mailing list