[PATCH v3] powerpc/pseries: Avoid using addr_to_pfn in realmode

Nicholas Piggin npiggin at gmail.com
Tue Jul 21 20:08:19 AEST 2020


Excerpts from Ganesh Goudar's message of July 20, 2020 6:03 pm:
> When an UE or memory error exception is encountered the MCE handler
> tries to find the pfn using addr_to_pfn() which takes effective
> address as an argument, later pfn is used to poison the page where
> memory error occurred, recent rework in this area made addr_to_pfn
> to run in realmode, which can be fatal as it may try to access
> memory outside RMO region.
> 
> To fix this have separate functions for realmode and virtual mode
> handling and let addr_to_pfn to run in virtual mode.

You didn't really explain what you moved around. You added some
helper functions, but what does it actually do differently now? Can you 
explain that in the changelog?

Thanks,
Nick

> 
> Without this fix following kernel crash is seen on hitting UE.
> 
> [  485.128036] Oops: Kernel access of bad area, sig: 11 [#1]
> [  485.128040] LE SMP NR_CPUS=2048 NUMA pSeries
> [  485.128047] Modules linked in:
> [  485.128067] CPU: 15 PID: 6536 Comm: insmod Kdump: loaded Tainted: G OE 5.7.0 #22
> [  485.128074] NIP:  c00000000009b24c LR: c0000000000398d8 CTR: c000000000cd57c0
> [  485.128078] REGS: c000000003f1f970 TRAP: 0300   Tainted: G OE (5.7.0)
> [  485.128082] MSR:  8000000000001003 <SF,ME,RI,LE>  CR: 28008284  XER: 00000001
> [  485.128088] CFAR: c00000000009b190 DAR: c0000001fab00000 DSISR: 40000000 IRQMASK: 1
> [  485.128088] GPR00: 0000000000000001 c000000003f1fbf0 c000000001634300 0000b0fa01000000
> [  485.128088] GPR04: d000000002220000 0000000000000000 00000000fab00000 0000000000000022
> [  485.128088] GPR08: c0000001fab00000 0000000000000000 c0000001fab00000 c000000003f1fc14
> [  485.128088] GPR12: 0000000000000008 c000000003ff5880 d000000002100008 0000000000000000
> [  485.128088] GPR16: 000000000000ff20 000000000000fff1 000000000000fff2 d0000000021a1100
> [  485.128088] GPR20: d000000002200000 c00000015c893c50 c000000000d49b28 c00000015c893c50
> [  485.128088] GPR24: d0000000021a0d08 c0000000014e5da8 d0000000021a0818 000000000000000a
> [  485.128088] GPR28: 0000000000000008 000000000000000a c0000000017e2970 000000000000000a
> [  485.128125] NIP [c00000000009b24c] __find_linux_pte+0x11c/0x310
> [  485.128130] LR [c0000000000398d8] addr_to_pfn+0x138/0x170
> [  485.128133] Call Trace:
> [  485.128135] Instruction dump:
> [  485.128138] 3929ffff 7d4a3378 7c883c36 7d2907b4 794a1564 7d294038 794af082 3900ffff
> [  485.128144] 79291f24 790af00e 78e70020 7d095214 <7c69502a> 2fa30000 419e011c 70690040
> [  485.128152] ---[ end trace d34b27e29ae0e340 ]---
> 
> Signed-off-by: Ganesh Goudar <ganeshgr at linux.ibm.com>
> ---
> V2: Leave bare metal code and save_mce_event as is.
> 
> V3: Have separate functions for realmode and virtual mode handling.
> ---
>  arch/powerpc/platforms/pseries/ras.c | 119 ++++++++++++++++-----------
>  1 file changed, 70 insertions(+), 49 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> index f3736fcd98fc..32fe3fad86b8 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -522,18 +522,55 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
>  	return 0; /* need to perform reset */
>  }
>  
> +static int mce_handle_err_realmode(int disposition, u8 error_type)
> +{
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	if (disposition == RTAS_DISP_NOT_RECOVERED) {
> +		switch (error_type) {
> +		case	MC_ERROR_TYPE_SLB:
> +		case	MC_ERROR_TYPE_ERAT:
> +			/*
> +			 * Store the old slb content in paca before flushing.
> +			 * Print this when we go to virtual mode.
> +			 * There are chances that we may hit MCE again if there
> +			 * is a parity error on the SLB entry we trying to read
> +			 * for saving. Hence limit the slb saving to single
> +			 * level of recursion.
> +			 */
> +			if (local_paca->in_mce == 1)
> +				slb_save_contents(local_paca->mce_faulty_slbs);
> +			flush_and_reload_slb();
> +			disposition = RTAS_DISP_FULLY_RECOVERED;
> +			break;
> +		default:
> +			break;
> +		}
> +	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
> +		/* Platform corrected itself but could be degraded */
> +		pr_err("MCE: limited recovery, system may be degraded\n");
> +		disposition = RTAS_DISP_FULLY_RECOVERED;
> +	}
> +#endif
> +	return disposition;
> +}
>  
> -static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
> +static int mce_handle_err_virtmode(struct pt_regs *regs,
> +				   struct rtas_error_log *errp,
> +				   struct pseries_mc_errorlog *mce_log,
> +				   int disposition)
>  {
>  	struct mce_error_info mce_err = { 0 };
> -	unsigned long eaddr = 0, paddr = 0;
> -	struct pseries_errorlog *pseries_log;
> -	struct pseries_mc_errorlog *mce_log;
> -	int disposition = rtas_error_disposition(errp);
>  	int initiator = rtas_error_initiator(errp);
>  	int severity = rtas_error_severity(errp);
> +	unsigned long eaddr = 0, paddr = 0;
>  	u8 error_type, err_sub_type;
>  
> +	if (!mce_log)
> +		goto out;
> +
> +	error_type = mce_log->error_type;
> +	err_sub_type = rtas_mc_error_sub_type(mce_log);
> +
>  	if (initiator == RTAS_INITIATOR_UNKNOWN)
>  		mce_err.initiator = MCE_INITIATOR_UNKNOWN;
>  	else if (initiator == RTAS_INITIATOR_CPU)
> @@ -572,18 +609,7 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>  	mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
>  	mce_err.error_class = MCE_ECLASS_UNKNOWN;
>  
> -	if (!rtas_error_extended(errp))
> -		goto out;
> -
> -	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
> -	if (pseries_log == NULL)
> -		goto out;
> -
> -	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
> -	error_type = mce_log->error_type;
> -	err_sub_type = rtas_mc_error_sub_type(mce_log);
> -
> -	switch (mce_log->error_type) {
> +	switch (error_type) {
>  	case MC_ERROR_TYPE_UE:
>  		mce_err.error_type = MCE_ERROR_TYPE_UE;
>  		mce_common_process_ue(regs, &mce_err);
> @@ -683,37 +709,32 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>  		mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
>  		break;
>  	}
> +out:
> +	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
> +		       &mce_err, regs->nip, eaddr, paddr);
> +	return disposition;
> +}
>  
> -#ifdef CONFIG_PPC_BOOK3S_64
> -	if (disposition == RTAS_DISP_NOT_RECOVERED) {
> -		switch (error_type) {
> -		case	MC_ERROR_TYPE_SLB:
> -		case	MC_ERROR_TYPE_ERAT:
> -			/*
> -			 * Store the old slb content in paca before flushing.
> -			 * Print this when we go to virtual mode.
> -			 * There are chances that we may hit MCE again if there
> -			 * is a parity error on the SLB entry we trying to read
> -			 * for saving. Hence limit the slb saving to single
> -			 * level of recursion.
> -			 */
> -			if (local_paca->in_mce == 1)
> -				slb_save_contents(local_paca->mce_faulty_slbs);
> -			flush_and_reload_slb();
> -			disposition = RTAS_DISP_FULLY_RECOVERED;
> -			break;
> -		default:
> -			break;
> -		}
> -	} else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
> -		/* Platform corrected itself but could be degraded */
> -		printk(KERN_ERR "MCE: limited recovery, system may "
> -		       "be degraded\n");
> -		disposition = RTAS_DISP_FULLY_RECOVERED;
> -	}
> -#endif
> +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
> +{
> +	struct pseries_errorlog *pseries_log;
> +	struct pseries_mc_errorlog *mce_log = NULL;
> +	int disposition = rtas_error_disposition(errp);
> +	u8 error_type, err_sub_type;
> +
> +	if (!rtas_error_extended(errp))
> +		goto out;
> +
> +	pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
> +	if (!pseries_log)
> +		goto out;
> +
> +	mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
> +	error_type = mce_log->error_type;
> +	err_sub_type = rtas_mc_error_sub_type(mce_log);
> +
> +	disposition = mce_handle_err_realmode(disposition, error_type);
>  
> -out:
>  	/*
>  	 * Enable translation as we will be accessing per-cpu variables
>  	 * in save_mce_event() which may fall outside RMO region, also
> @@ -724,10 +745,10 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
>  	 * Note: All the realmode handling like flushing SLB entries for
>  	 *       SLB multihit is done by now.
>  	 */
> +out:
>  	mtmsr(mfmsr() | MSR_IR | MSR_DR);
> -	save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED,
> -			&mce_err, regs->nip, eaddr, paddr);
> -
> +	disposition = mce_handle_err_virtmode(regs, errp, mce_log,
> +					      disposition);
>  	return disposition;
>  }
>  
> -- 
> 2.17.2
> 
> 


More information about the Linuxppc-dev mailing list