[Skiboot] [RFC PATCH 1/2] hmi: Don't re-read HMER multiple times

Wed Jan 17 16:27:06 AEDT 2018

On 01/16/2018 10:15 AM, Benjamin Herrenschmidt wrote:
> We want to make sure all reporting and actions are based
> upon the same snapshot of HMER in case bits get added
> by HW while we are in OPAL.
> 
> Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>

Acked-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>

> ---
>  core/hmi.c | 35 ++++++++++++++---------------------
>  1 file changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/core/hmi.c b/core/hmi.c
> index eb4faa38..5642bd0b 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -719,16 +719,13 @@ static int get_split_core_mode(void)
>   *	- SPR_TFMR_TB_RESIDUE_ERR
>   *	- SPR_TFMR_HDEC_PARITY_ERROR
>   */
> -static void pre_recovery_cleanup_p8(void)
> +static void pre_recovery_cleanup_p8(uint64_t hmer)
>  {
> -	uint64_t hmer;
>  	uint64_t tfmr;
>  	uint32_t sibling_thread_mask;
>  	int split_core_mode, subcore_id, thread_id, threads_per_core;
>  	int i;
> 
> -	hmer = mfspr(SPR_HMER);
> -
>  	/* exit if it is not Time facility error. */
>  	if (!(hmer & SPR_HMER_TFAC_ERROR))
>  		return;
> @@ -826,15 +823,12 @@ static void pre_recovery_cleanup_p8(void)
>   *	- SPR_TFMR_TB_RESIDUE_ERR
>   *	- SPR_TFMR_HDEC_PARITY_ERROR
>   */
> -static void pre_recovery_cleanup_p9(void)
> +static void pre_recovery_cleanup_p9(uint64_t hmer)
>  {
> -	uint64_t hmer;
>  	uint64_t tfmr;
>  	int threads_per_core = cpu_thread_count;
>  	int i;
> 
> -	hmer = mfspr(SPR_HMER);
> -
>  	/* exit if it is not Time facility error. */
>  	if (!(hmer & SPR_HMER_TFAC_ERROR))
>  		return;
> @@ -912,12 +906,12 @@ static void pre_recovery_cleanup_p9(void)
>  	wait_for_cleanup_complete();
>  }
> 
> -static void pre_recovery_cleanup(void)
> +static void pre_recovery_cleanup(uint64_t hmer)
>  {
>  	if (proc_gen == proc_gen_p9)
> -		return pre_recovery_cleanup_p9();
> +		return pre_recovery_cleanup_p9(hmer);
>  	else
> -		return pre_recovery_cleanup_p8();
> +		return pre_recovery_cleanup_p8(hmer);
>  }
> 
>  static void hmi_exit(void)
> @@ -926,9 +920,8 @@ static void hmi_exit(void)
>  	*(this_cpu()->core_hmi_state_ptr) &= ~(this_cpu()->thread_mask);
>  }
> 
> -static void hmi_print_debug(const uint8_t *msg)
> +static void hmi_print_debug(const uint8_t *msg, uint64_t hmer)
>  {
> -	uint64_t hmer = mfspr(SPR_HMER);
>  	const char *loc;
>  	uint32_t core_id, thread_index;
> 
> @@ -959,7 +952,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	 * In case of split core, some of the Timer facility errors need
>  	 * cleanup to be done before we proceed with the error recovery.
>  	 */
> -	pre_recovery_cleanup();
> +	pre_recovery_cleanup(hmer);
> 
>  	lock(&hmi_lock);
>  	/*
> @@ -978,7 +971,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE;
>  			queue_hmi_event(hmi_evt, recover);
>  		}
> -		hmi_print_debug("Processor recovery Done.");
> +		hmi_print_debug("Processor recovery Done.", hmer);
>  	}
>  	if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) {
>  		hmer &= ~SPR_HMER_PROC_RECV_ERROR_MASKED;
> @@ -987,7 +980,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED;
>  			queue_hmi_event(hmi_evt, recover);
>  		}
> -		hmi_print_debug("Processor recovery Done (masked).");
> +		hmi_print_debug("Processor recovery Done (masked).", hmer);
>  	}
>  	if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
>  		hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
> @@ -997,13 +990,13 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  			queue_hmi_event(hmi_evt, recover);
>  		}
>  		hmi_print_debug("Processor recovery occurred again before"
> -				"bit2 was cleared\n");
> +				"bit2 was cleared\n", hmer);
>  	}
>  	/* Assert if we see malfunction alert, we can not continue. */
>  	if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
>  		hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
> 
> -		hmi_print_debug("Malfunction Alert");
> +		hmi_print_debug("Malfunction Alert", hmer);
>  		if (hmi_evt)
>  			decode_malfunction(hmi_evt);
>  	}
> @@ -1012,7 +1005,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
>  		hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
> 
> -		hmi_print_debug("Hypervisor resource error");
> +		hmi_print_debug("Hypervisor resource error", hmer);
>  		recover = 0;
>  		if (hmi_evt) {
>  			hmi_evt->severity = OpalHMI_SEV_FATAL;
> @@ -1028,7 +1021,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  	if (hmer & SPR_HMER_TFAC_ERROR) {
>  		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
> 
> -		hmi_print_debug("Timer Facility Error");
> +		hmi_print_debug("Timer Facility Error", hmer);
> 
>  		hmer &= ~SPR_HMER_TFAC_ERROR;
>  		recover = chiptod_recover_tb_errors();
> @@ -1043,7 +1036,7 @@ int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt)
>  		tfmr = mfspr(SPR_TFMR);		/* save original TFMR */
>  		hmer &= ~SPR_HMER_TFMR_PARITY_ERROR;
> 
> -		hmi_print_debug("TFMR parity Error");
> +		hmi_print_debug("TFMR parity Error", hmer);
>  		recover = chiptod_recover_tb_errors();
>  		if (hmi_evt) {
>  			hmi_evt->severity = OpalHMI_SEV_FATAL;
>