[PATCH v3] powerpc/pseries: Avoid using addr_to_pfn in realmode

Ganesh ganeshgr at linux.ibm.com
Wed Jul 22 20:37:48 AEST 2020



On 7/21/20 3:38 PM, Nicholas Piggin wrote:
> Excerpts from Ganesh Goudar's message of July 20, 2020 6:03 pm:
>> When an UE or memory error exception is encountered the MCE handler
>> tries to find the pfn using addr_to_pfn() which takes effective
>> address as an argument, later pfn is used to poison the page where
>> memory error occurred, recent rework in this area made addr_to_pfn
>> to run in realmode, which can be fatal as it may try to access
>> memory outside RMO region.
>>
>> To fix this have separate functions for realmode and virtual mode
>> handling and let addr_to_pfn to run in virtual mode.
> You didn't really explain what you moved around. You added some
> helper functions, but what does it actually do differently now? Can you
> explain that in the changelog?

Sure, ill rephrase the changelog, here I have moved all that we can and we must
do in virtual mode to new helper function which runs in virtual mode, like filling
mce error info, using addr_to_pfn and calling save_mce_event().

>
> Thanks,
> Nick
>
>> Without this fix following kernel crash is seen on hitting UE.
>>
>> [  485.128036] Oops: Kernel access of bad area, sig: 11 [#1]
>> [  485.128040] LE SMP NR_CPUS=2048 NUMA pSeries
>> [  485.128047] Modules linked in:
>> [  485.128067] CPU: 15 PID: 6536 Comm: insmod Kdump: loaded Tainted: G OE 5.7.0 #22
>> [  485.128074] NIP:  c00000000009b24c LR: c0000000000398d8 CTR: c000000000cd57c0
>> [  485.128078] REGS: c000000003f1f970 TRAP: 0300   Tainted: G OE (5.7.0)
>> [  485.128082] MSR:  8000000000001003 <SF,ME,RI,LE>  CR: 28008284  XER: 00000001
>> [  485.128088] CFAR: c00000000009b190 DAR: c0000001fab00000 DSISR: 40000000 IRQMASK: 1
>> [  485.128088] GPR00: 0000000000000001 c000000003f1fbf0 c000000001634300 0000b0fa01000000
>> [  485.128088] GPR04: d000000002220000 0000000000000000 00000000fab00000 0000000000000022
>> [  485.128088] GPR08: c0000001fab00000 0000000000000000 c0000001fab00000 c000000003f1fc14
>> [  485.128088] GPR12: 0000000000000008 c000000003ff5880 d000000002100008 0000000000000000
>> [  485.128088] GPR16: 000000000000ff20 000000000000fff1 000000000000fff2 d0000000021a1100
>> [  485.128088] GPR20: d000000002200000 c00000015c893c50 c000000000d49b28 c00000015c893c50
>> [  485.128088] GPR24: d0000000021a0d08 c0000000014e5da8 d0000000021a0818 000000000000000a
>> [  485.128088] GPR28: 0000000000000008 000000000000000a c0000000017e2970 000000000000000a
>> [  485.128125] NIP [c00000000009b24c] __find_linux_pte+0x11c/0x310
>> [  485.128130] LR [c0000000000398d8] addr_to_pfn+0x138/0x170
>> [  485.128133] Call Trace:
>> [  485.128135] Instruction dump:
>> [  485.128138] 3929ffff 7d4a3378 7c883c36 7d2907b4 794a1564 7d294038 794af082 3900ffff
>> [  485.128144] 79291f24 790af00e 78e70020 7d095214 <7c69502a> 2fa30000 419e011c 70690040
>> [  485.128152] ---[ end trace d34b27e29ae0e340 ]---
>>
>> Signed-off-by: Ganesh Goudar <ganeshgr at linux.ibm.com>
>> ---
>> V2: Leave bare metal code and save_mce_event as is.
>>
>> V3: Have separate functions for realmode and virtual mode handling.
>> ---
>>   arch/powerpc/platforms/pseries/ras.c | 119 ++++++++++++++++-----------
>>   1 file changed, 70 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
>> index f3736fcd98fc..32fe3fad86b8 100644
>> --- a/arch/powerpc/platforms/pseries/ras.c
>> +++ b/arch/powerpc/platforms/pseries/ras.c
>> @@ -522,18 +522,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
>>   	return 0; /* need to perform reset */
>>   }
>>   
>> +static int mce_handle_err_realmode(int disposition, u8 error_type)
>> +{
>> +#ifdef CONFIG_PPC_BOOK3S_64
>> +	if (disposition == RTAS_DISP_NOT_RECOVERED) {
>> +		switch (error_type) {
>> +		case	MC_ERROR_TYPE_SLB:
>> +		case	MC_ERROR_TYPE_ERAT:
>> +			/*
>> +			 * Store the old slb content in paca before flushing.
>> +			 * Print this when we go to virtual mode.
>> +			 * There are chances that we may hit MCE again if there
>> +			 * is a parity error on the SLB entry we trying to read
>> +			 * for saving. Hence limit the slb saving to single
>> +			 * level of recursion.
>> +			 */
>> +			if (local_paca->in_mce == 1)
>> +				slb_save_contents(local_paca->mce_faulty_slbs);
>> +			flush_and_reload_slb();
>> +			disposition = RTAS_DISP_FULLY_RECOVERED;
>> +			break;
>> +		default:
>> +			break;
>> +		}
>> +	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
>> +		/* Platform corrected itself but could be degraded */
>> +		pr_err("MCE: limited recovery, system may be degraded\n");
>> +		disposition = RTAS_DISP_FULLY_RECOVERED;
>> +	}
>> +#endif
>> +	return disposition;
>> +}
>>   
>> -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>> +static int mce_handle_err_virtmode(struct pt_regs *regs,
>> +				   struct rtas_error_log *errp,
>> +				   struct pseries_mc_errorlog *mce_log,
>> +				   int disposition)
>>   {
>>   	struct mce_error_info mce_err = { 0 };
>> -	unsigned long eaddr = 0, paddr = 0;
>> -	struct pseries_errorlog *pseries_log;
>> -	struct pseries_mc_errorlog *mce_log;
>> -	int disposition = rtas_error_disposition(errp);
>>   	int initiator = rtas_error_initiator(errp);
>>   	int severity = rtas_error_severity(errp);
>> +	unsigned long eaddr = 0, paddr = 0;
>>   	u8 error_type, err_sub_type;
>>   
>> +	if (!mce_log)
>> +		goto out;
>> +
>> +	error_type = mce_log->error_type;
>> +	err_sub_type = rtas_mc_error_sub_type(mce_log);
>> +
>>   	if (initiator == RTAS_INITIATOR_UNKNOWN)
>>   		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
>>   	else if (initiator == RTAS_INITIATOR_CPU)
>> @@ -572,18 +609,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>>   	mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
>>   	mce_err.error_class = MCE_ECLASS_UNKNOWN;
>>   
>> -	if (!rtas_error_extended(errp))
>> -		goto out;
>> -
>> -	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
>> -	if (pseries_log == NULL)
>> -		goto out;
>> -
>> -	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
>> -	error_type = mce_log->error_type;
>> -	err_sub_type = rtas_mc_error_sub_type(mce_log);
>> -
>> -	switch (mce_log->error_type) {
>> +	switch (error_type) {
>>   	case MC_ERROR_TYPE_UE:
>>   		mce_err.error_type = MCE_ERROR_TYPE_UE;
>>   		mce_common_process_ue(regs, &mce_err);
>> @@ -683,37 +709,32 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>>   		mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
>>   		break;
>>   	}
>> +out:
>> +	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
>> +		       &mce_err, regs->nip, eaddr, paddr);
>> +	return disposition;
>> +}
>>   
>> -#ifdef CONFIG_PPC_BOOK3S_64
>> -	if (disposition == RTAS_DISP_NOT_RECOVERED) {
>> -		switch (error_type) {
>> -		case	MC_ERROR_TYPE_SLB:
>> -		case	MC_ERROR_TYPE_ERAT:
>> -			/*
>> -			 * Store the old slb content in paca before flushing.
>> -			 * Print this when we go to virtual mode.
>> -			 * There are chances that we may hit MCE again if there
>> -			 * is a parity error on the SLB entry we trying to read
>> -			 * for saving. Hence limit the slb saving to single
>> -			 * level of recursion.
>> -			 */
>> -			if (local_paca->in_mce == 1)
>> -				slb_save_contents(local_paca->mce_faulty_slbs);
>> -			flush_and_reload_slb();
>> -			disposition = RTAS_DISP_FULLY_RECOVERED;
>> -			break;
>> -		default:
>> -			break;
>> -		}
>> -	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
>> -		/* Platform corrected itself but could be degraded */
>> -		printk(KERN_ERR "MCE: limited recovery, system may "
>> -		       "be degraded\n");
>> -		disposition = RTAS_DISP_FULLY_RECOVERED;
>> -	}
>> -#endif
>> +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>> +{
>> +	struct pseries_errorlog *pseries_log;
>> +	struct pseries_mc_errorlog *mce_log = NULL;
>> +	int disposition = rtas_error_disposition(errp);
>> +	u8 error_type, err_sub_type;
>> +
>> +	if (!rtas_error_extended(errp))
>> +		goto out;
>> +
>> +	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
>> +	if (!pseries_log)
>> +		goto out;
>> +
>> +	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
>> +	error_type = mce_log->error_type;
>> +	err_sub_type = rtas_mc_error_sub_type(mce_log);
>> +
>> +	disposition = mce_handle_err_realmode(disposition, error_type);
>>   
>> -out:
>>   	/*
>>   	 * Enable translation as we will be accessing per-cpu variables
>>   	 * in save_mce_event() which may fall outside RMO region, also
>> @@ -724,10 +745,10 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>>   	 * Note: All the realmode handling like flushing SLB entries for
>>   	 *       SLB multihit is done by now.
>>   	 */
>> +out:
>>   	mtmsr(mfmsr() | MSR_IR | MSR_DR);
>> -	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
>> -			&mce_err, regs->nip, eaddr, paddr);
>> -
>> +	disposition = mce_handle_err_virtmode(regs, errp, mce_log,
>> +					      disposition);
>>   	return disposition;
>>   }
>>   
>> -- 
>> 2.17.2
>>
>>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ozlabs.org/pipermail/linuxppc-dev/attachments/20200722/7f5b1e96/attachment-0001.htm>


More information about the Linuxppc-dev mailing list