[Skiboot] [RFC PATCH 1/2] hmi: Don't re-read HMER multiple times
Mahesh Jagannath Salgaonkar
mahesh at linux.vnet.ibm.com
Wed Jan 17 16:27:06 AEDT 2018
On 01/16/2018 10:15 AM, Benjamin Herrenschmidt wrote:
> We want to make sure all reporting and actions are based
> upon the same snapshot of HMER in case bits get added
> by HW while we are in OPAL.
>
> Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Acked-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
> ---
> core/hmi.c | 35 ++++++++++++++---------------------
> 1 file changed, 14 insertions(+), 21 deletions(-)
>
> diff --git a/core/hmi.c b/core/hmi.c
> index eb4faa38..5642bd0b 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -719,16 +719,13 @@ static int get_split_core_mode(void)
> * - SPR_TFMR_TB_RESIDUE_ERR
> * - SPR_TFMR_HDEC_PARITY_ERROR
> */
> -static void pre_recovery_cleanup_p8(void)
> +static void pre_recovery_cleanup_p8(uint64_t hmer)
> {
> - uint64_t hmer;
> uint64_t tfmr;
> uint32_t sibling_thread_mask;
> int split_core_mode, subcore_id, thread_id, threads_per_core;
> int i;
>
> - hmer = mfspr(SPR_HMER);
> -
> /* exit if it is not Time facility error. */
> if (!(hmer & SPR_HMER_TFAC_ERROR))
> return;
> @@ -826,15 +823,12 @@ static void pre_recovery_cleanup_p8(void)
> * - SPR_TFMR_TB_RESIDUE_ERR
> * - SPR_TFMR_HDEC_PARITY_ERROR
> */
> -static void pre_recovery_cleanup_p9(void)
> +static void pre_recovery_cleanup_p9(uint64_t hmer)
> {
> - uint64_t hmer;
> uint64_t tfmr;
> int threads_per_core = cpu_thread_count;
> int i;
>
> - hmer = mfspr(SPR_HMER);
> -
> /* exit if it is not Time facility error. */
> if (!(hmer & SPR_HMER_TFAC_ERROR))
> return;
> @@ -912,12 +906,12 @@ static void pre_recovery_cleanup_p9(void)
> wait_for_cleanup_complete();
> }
>
> -static void pre_recovery_cleanup(void)
> +static void pre_recovery_cleanup(uint64_t hmer)
> {
> if (proc_gen == proc_gen_p9)
> - return pre_recovery_cleanup_p9();
> + return pre_recovery_cleanup_p9(hmer);
> else
> - return pre_recovery_cleanup_p8();
> + return pre_recovery_cleanup_p8(hmer);
> }
>
> static void hmi_exit(void)
> @@ -926,9 +920,8 @@ static void hmi_exit(void)
> *(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
> }
>
> -static void hmi_print_debug(const uint8_t *msg)
> +static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
> {
> - uint64_t hmer = mfspr(SPR_HMER);
> const char *loc;
> uint32_t core_id, thread_index;
>
> @@ -959,7 +952,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> * In case of split core, some of the Timer facility errors need
> * cleanup to be done before we proceed with the error recovery.
> */
> - pre_recovery_cleanup();
> + pre_recovery_cleanup(hmer);
>
> lock(&hmi_lock);
> /*
> @@ -978,7 +971,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
> queue_hmi_event(hmi_evt, recover);
> }
> - hmi_print_debug("Processor recovery Done.");
> + hmi_print_debug("Processor recovery Done.", hmer);
> }
> if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
> hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
> @@ -987,7 +980,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
> queue_hmi_event(hmi_evt, recover);
> }
> - hmi_print_debug("Processor recovery Done (masked).");
> + hmi_print_debug("Processor recovery Done (masked).", hmer);
> }
> if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
> hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
> @@ -997,13 +990,13 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> queue_hmi_event(hmi_evt, recover);
> }
> hmi_print_debug("Processor recovery occurred again before"
> - "bit2 was cleared\n");
> + "bit2 was cleared\n", hmer);
> }
> /* Assert if we see malfunction alert, we can not continue. */
> if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
> hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
>
> - hmi_print_debug("Malfunction Alert");
> + hmi_print_debug("Malfunction Alert", hmer);
> if (hmi_evt)
> decode_malfunction(hmi_evt);
> }
> @@ -1012,7 +1005,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
> hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
>
> - hmi_print_debug("Hypervisor resource error");
> + hmi_print_debug("Hypervisor resource error", hmer);
> recover = 0;
> if (hmi_evt) {
> hmi_evt->severity = OpalHMI_SEV_FATAL;
> @@ -1028,7 +1021,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> if (hmer & SPR_HMER_TFAC_ERROR) {
> tfmr = mfspr(SPR_TFMR); /* save original TFMR */
>
> - hmi_print_debug("Timer Facility Error");
> + hmi_print_debug("Timer Facility Error", hmer);
>
> hmer &= ~SPR_HMER_TFAC_ERROR;
> recover = chiptod_recover_tb_errors();
> @@ -1043,7 +1036,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
> tfmr = mfspr(SPR_TFMR); /* save original TFMR */
> hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
>
> - hmi_print_debug("TFMR parity Error");
> + hmi_print_debug("TFMR parity Error", hmer);
> recover = chiptod_recover_tb_errors();
> if (hmi_evt) {
> hmi_evt->severity = OpalHMI_SEV_FATAL;
>
More information about the Skiboot
mailing list