[PATCH v3 2/2] pseries/mce: Refactor the pseries mce handling code
Nicholas Piggin
npiggin at gmail.com
Thu Nov 25 00:10:27 AEDT 2021
Excerpts from Ganesh Goudar's message of November 24, 2021 7:55 pm:
> Now that we are no longer switching on the mmu in realmode
> mce handler, Revert the commit 4ff753feab02("powerpc/pseries:
> Avoid using addr_to_pfn in real mode") partially, which
> introduced functions mce_handle_err_virtmode/realmode() to
> separate mce handler code which needed translation to enabled.
>
> Signed-off-by: Ganesh Goudar <ganeshgr at linux.ibm.com>
> ---
> arch/powerpc/platforms/pseries/ras.c | 122 +++++++++++----------------
> 1 file changed, 49 insertions(+), 73 deletions(-)
>
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
> index 8613f9cc5798..62e1519b8355 100644
> --- a/arch/powerpc/platforms/pseries/ras.c
> +++ b/arch/powerpc/platforms/pseries/ras.c
> @@ -511,58 +511,17 @@ int pSeries_system_reset_exception(struct pt_regs *regs)
> return 0; /* need to perform reset */
> }
>
> -static int mce_handle_err_realmode(int disposition, u8 error_type)
> -{
> -#ifdef CONFIG_PPC_BOOK3S_64
> - if (disposition == RTAS_DISP_NOT_RECOVERED) {
> - switch (error_type) {
> - case MC_ERROR_TYPE_ERAT:
> - flush_erat();
> - disposition = RTAS_DISP_FULLY_RECOVERED;
> - break;
> - case MC_ERROR_TYPE_SLB:
> - /*
> - * Store the old slb content in paca before flushing.
> - * Print this when we go to virtual mode.
> - * There are chances that we may hit MCE again if there
> - * is a parity error on the SLB entry we trying to read
> - * for saving. Hence limit the slb saving to single
> - * level of recursion.
> - */
> - if (local_paca->in_mce == 1)
> - slb_save_contents(local_paca->mce_faulty_slbs);
> - flush_and_reload_slb();
> - disposition = RTAS_DISP_FULLY_RECOVERED;
> - break;
> - default:
> - break;
> - }
> - } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
> - /* Platform corrected itself but could be degraded */
> - pr_err("MCE: limited recovery, system may be degraded\n");
> - disposition = RTAS_DISP_FULLY_RECOVERED;
> - }
> -#endif
> - return disposition;
> -}
> -
> -static int mce_handle_err_virtmode(struct pt_regs *regs,
> - struct rtas_error_log *errp,
> - struct pseries_mc_errorlog *mce_log,
> - int disposition)
> +static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp)
> {
> struct mce_error_info mce_err = { 0 };
> + unsigned long eaddr = 0, paddr = 0;
> + struct pseries_errorlog *pseries_log;
> + struct pseries_mc_errorlog *mce_log;
> + int disposition = rtas_error_disposition(errp);
> int initiator = rtas_error_initiator(errp);
> int severity = rtas_error_severity(errp);
> - unsigned long eaddr = 0, paddr = 0;
> u8 error_type, err_sub_type;
>
> - if (!mce_log)
> - goto out;
> -
> - error_type = mce_log->error_type;
> - err_sub_type = rtas_mc_error_sub_type(mce_log);
> -
> if (initiator == RTAS_INITIATOR_UNKNOWN)
> mce_err.initiator = MCE_INITIATOR_UNKNOWN;
> else if (initiator == RTAS_INITIATOR_CPU)
> @@ -588,6 +547,8 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
> mce_err.severity = MCE_SEV_SEVERE;
> else if (severity == RTAS_SEVERITY_ERROR)
> mce_err.severity = MCE_SEV_SEVERE;
> + else if (severity == RTAS_SEVERITY_FATAL)
> + mce_err.severity = MCE_SEV_FATAL;
> else
> mce_err.severity = MCE_SEV_FATAL;
>
What's this hunk for?
> @@ -599,7 +560,18 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
> mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
> mce_err.error_class = MCE_ECLASS_UNKNOWN;
>
> - switch (error_type) {
> + if (!rtas_error_extended(errp))
> + goto out;
> +
> + pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
> + if (!pseries_log)
> + goto out;
> +
> + mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
> + error_type = mce_log->error_type;
> + err_sub_type = rtas_mc_error_sub_type(mce_log);
> +
> + switch (mce_log->error_type) {
> case MC_ERROR_TYPE_UE:
> mce_err.error_type = MCE_ERROR_TYPE_UE;
> mce_common_process_ue(regs, &mce_err);
> @@ -692,41 +664,45 @@ static int mce_handle_err_virtmode(struct pt_regs *regs,
> mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
> break;
> case MC_ERROR_TYPE_I_CACHE:
> - mce_err.error_type = MCE_ERROR_TYPE_ICACHE;
> + mce_err.error_type = MCE_ERROR_TYPE_DCACHE;
> break;
And this one. Doesn't look right.
> case MC_ERROR_TYPE_UNKNOWN:
> default:
> mce_err.error_type = MCE_ERROR_TYPE_UNKNOWN;
> break;
> }
> +
> +#ifdef CONFIG_PPC_BOOK3S_64
> + if (disposition == RTAS_DISP_NOT_RECOVERED) {
> + switch (error_type) {
> + case MC_ERROR_TYPE_SLB:
> + case MC_ERROR_TYPE_ERAT:
> + /*
> + * Store the old slb content in paca before flushing.
> + * Print this when we go to virtual mode.
> + * There are chances that we may hit MCE again if there
> + * is a parity error on the SLB entry we trying to read
> + * for saving. Hence limit the slb saving to single
> + * level of recursion.
> + */
> + if (local_paca->in_mce == 1)
> + slb_save_contents(local_paca->mce_faulty_slbs);
> + flush_and_reload_slb();
> + disposition = RTAS_DISP_FULLY_RECOVERED;
> + break;
> + default:
> + break;
> + }
> + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) {
> + /* Platform corrected itself but could be degraded */
> + pr_err("MCE: limited recovery, system may be degraded\n");
> + disposition = RTAS_DISP_FULLY_RECOVERED;
> + }
I would prefer if you just keep the mce_handle_err_realmode function
(can rename it if you want). It's actually changed a bit since the
patch being reverted so we don't want to undo that.
Thanks,
Nick
More information about the Linuxppc-dev
mailing list