[Skiboot] [PATCH v4 1/2] opal-api: Add OPAL call to handle abnormal reboots.

Mahesh Jagannath Salgaonkar mahesh at linux.vnet.ibm.com
Thu Jul 30 13:37:58 AEST 2015


On 07/30/2015 06:38 AM, Samuel Mendoza-Jonas wrote:
> On 30/07/15 03:58, Mahesh J Salgaonkar wrote:
>> From: Vipin K Parashar <vipin at linux.vnet.ibm.com>
>>
>> This patch adds a new OPAL call OPAL_CEC_REBOOT2 which will
>> be used to handle abnormal reboot/termination by kernel host.
>>
>> This call will allow host kernel to pass reboot type and additional
>> debug data which needs to be captured/saved somewhere (for later
>> analysis) before going down.
>>
>> Currently it will support two reboot types (0). normal reboot, that
>> will behave similar to that of opal_cec_reboot() call, and
>> (1). platform error reboot, that will trigger a system checkstop
>> using xscom address and FIR bit information obtained via device-tree
>> property 'ibm,sw-checkstop-fir'.
>>
>> For unsupported reboot type, this call will do nothing and return
>> with OPAL_UNSUPPORTED.
> 
> I was going to ask whether this was the kind of behaviour we want but
> I think I answered in my own head - I suppose it would be up to the
> caller to check for OPAL_UNSUPPORTED and then presumably try to call
> the original OPAL_CEC_REBOOT call?

Yes. And in some cases caller may decide to go down in panic path if the
reboot type is not supported. Also, caller can print an error message on
console which will help us to figure why caller had to go call normal
reboot. In future, when we support more and more reboot types we may end
up in situation with different levels of OPAL and host kernel
combination in which case it may be better to let caller decide what to
do. Any other suggestions are welcome.

Thanks,
-Mahesh.

> 
> A very minor comment below about log_add_section() but otherwise
> 
> Reviewed-by: Samuel Mendoza-Jonas <sam.mj at au1.ibm.com>
> 
>>
>> In future, we can overload this call to support additional reboot types.
>>
>> Signed-off-by: Vipin K Parashar <vipin at linux.vnet.ibm.com>
>> Signed-off-by: Mahesh Salgaonkar <mahesh at linux.vnet.ibm.com>
>> ---
>>  core/platform.c    |   39 +++++++++++++++++++++++++++++++++++++++
>>  hw/xscom.c         |   32 ++++++++++++++++++++++++++++++++
>>  include/errorlog.h |    3 +++
>>  include/opal-api.h |    9 ++++++++-
>>  include/xscom.h    |    1 +
>>  5 files changed, 83 insertions(+), 1 deletion(-)
>>
>> diff --git a/core/platform.c b/core/platform.c
>> index dec7e96..12c4ec2 100644
>> --- a/core/platform.c
>> +++ b/core/platform.c
>> @@ -21,9 +21,15 @@
>>  #include <timebase.h>
>>  #include <cpu.h>
>>  #include <chip.h>
>> +#include <xscom.h>
>> +#include <errorlog.h>
>>  
>>  struct platform	platform;
>>  
>> +DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC,
>> +		 OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_FAULT_RECTIFY_REBOOT,
>> +		 OPAL_ABNORMAL_POWER_OFF);
>> +
>>  /*
>>   * Various wrappers for platform functions
>>   */
>> @@ -53,6 +59,39 @@ static int64_t opal_cec_reboot(void)
>>  }
>>  opal_call(OPAL_CEC_REBOOT, opal_cec_reboot, 0);
>>  
>> +static int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag)
>> +{
>> +	struct errorlog *buf;
>> +
>> +	switch (reboot_type) {
>> +	case OPAL_REBOOT_NORMAL:
>> +		return opal_cec_reboot();
>> +	case OPAL_REBOOT_PLATFORM_ERROR:
>> +		prlog(PR_EMERG,
>> +			  "OPAL: Reboot requested due to Platform error.");
>> +		buf = opal_elog_create(&e_info(OPAL_RC_ABNORMAL_REBOOT), 0);
>> +		if (buf) {
>> +			log_append_msg(buf,
>> +			  "OPAL: Reboot requested due to Platform error.");
>> +			if (diag) {
>> +				/* Add user section "DESC" */
>> +				log_add_section(buf, 0x44455350);
> 
> log_add_section() will use "DESC" for the tag if it's given just a 0,
> which means you don't need to carry around the magic 0x44455350 - just
> depends how explicit you want to be about how you're tagging it.
> 
>> +				log_append_data(buf, diag, strlen(diag));
>> +				log_commit(buf);
>> +			}
>> +		} else {
>> +			prerror("OPAL: failed to log an error\n");
>> +		}
>> +		return xscom_trigger_xstop();
>> +	default:
>> +		printf("OPAL: Unsupported reboot request %d\n", reboot_type);
>> +		return OPAL_UNSUPPORTED;
>> +		break;
>> +	}
>> +	return OPAL_SUCCESS;
>> +}
>> +opal_call(OPAL_CEC_REBOOT2, opal_cec_reboot2, 2);
>> +
>>  static void generic_platform_init(void)
>>  {
>>  	force_dummy_console();
>> diff --git a/hw/xscom.c b/hw/xscom.c
>> index ef3ff0f..ba9130a 100644
>> --- a/hw/xscom.c
>> +++ b/hw/xscom.c
>> @@ -53,6 +53,12 @@ DEFINE_LOG_ENTRY(OPAL_RC_XSCOM_RESET, OPAL_PLATFORM_ERR_EVT, OPAL_XSCOM,
>>  		OPAL_CEC_HARDWARE, OPAL_PREDICTIVE_ERR_GENERAL,
>>  		OPAL_NA);
>>  
>> +/* xscom details to trigger xstop */
>> +static struct {
>> +	uint64_t addr;
>> +	uint64_t fir_bit;
>> +} xstop_xscom;
>> +
>>  /*
>>   * Locking notes:
>>   *
>> @@ -483,9 +489,25 @@ static void xscom_init_chip_info(struct proc_chip *chip)
>>  	chip->ec_level |= (val >> 8) & 0xf;
>>  }
>>  
>> +/*
>> +* This function triggers xstop by writing to XSCOM.
>> +* Machine would enter xstop state post completion of this.
>> +*/
>> +int64_t xscom_trigger_xstop(void)
>> +{
>> +	int rc = OPAL_UNSUPPORTED;
>> +
>> +	if (xstop_xscom.addr)
>> +		rc = xscom_writeme(xstop_xscom.addr,
>> +				PPC_BIT(xstop_xscom.fir_bit));
>> +
>> +	return rc;
>> +}
>> +
>>  void xscom_init(void)
>>  {
>>  	struct dt_node *xn;
>> +	const struct dt_property *p;
>>  
>>  	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
>>  		uint32_t gcid = dt_get_chip_id(xn);
>> @@ -521,6 +543,16 @@ void xscom_init(void)
>>  		       chip->ec_level >> 4,
>>  		       chip->ec_level & 0xf);
>>  	}
>> +
>> +	/* Collect details to trigger xstop via XSCOM write */
>> +	p = dt_find_property(dt_root, "ibm,sw-checkstop-fir");
>> +	if (p) {
>> +		xstop_xscom.addr = dt_property_get_cell(p, 0);
>> +		xstop_xscom.fir_bit = dt_property_get_cell(p, 1);
>> +		prlog(PR_INFO, "XSTOP: XSCOM addr = 0x%llx, FIR bit = %lld\n",
>> +					xstop_xscom.addr, xstop_xscom.fir_bit);
>> +	} else
>> +		prlog(PR_INFO, "XSTOP: ibm,sw-checkstop-fir prop not found\n");
>>  }
>>  
>>  void xscom_used_by_console(void)
>> diff --git a/include/errorlog.h b/include/errorlog.h
>> index b908fd4..1499587 100644
>> --- a/include/errorlog.h
>> +++ b/include/errorlog.h
>> @@ -322,6 +322,9 @@ enum opal_reasoncode {
>>  /* IPMI */
>>  	OPAL_RC_IPMI_REQ	= OPAL_IP | 0x10,
>>  	OPAL_RC_IPMI_RESP	= OPAL_IP | 0x11,
>> +
>> +/* Platform error */
>> +	OPAL_RC_ABNORMAL_REBOOT	= OPAL_CE | 0x10,
>>  };
>>  
>>  #define DEFINE_LOG_ENTRY(reason, type, id, subsys,			\
>> diff --git a/include/opal-api.h b/include/opal-api.h
>> index bfad589..e22370f 100644
>> --- a/include/opal-api.h
>> +++ b/include/opal-api.h
>> @@ -161,7 +161,8 @@
>>  #define OPAL_PRD_MSG				113
>>  #define OPAL_LEDS_GET_INDICATOR			114
>>  #define OPAL_LEDS_SET_INDICATOR			115
>> -#define OPAL_LAST				115
>> +#define OPAL_CEC_REBOOT2			116
>> +#define OPAL_LAST				116
>>  
>>  /* Device tree flags */
>>  
>> @@ -972,6 +973,12 @@ struct opal_i2c_request {
>>  	__be64 buffer_ra;		/* Buffer real address */
>>  };
>>  
>> +/* Argument to OPAL_CEC_REBOOT2() */
>> +enum {
>> +	OPAL_REBOOT_NORMAL = 0,
>> +	OPAL_REBOOT_PLATFORM_ERROR,
>> +};
>> +
>>  #endif /* __ASSEMBLY__ */
>>  
>>  #endif /* __OPAL_API_H */
>> diff --git a/include/xscom.h b/include/xscom.h
>> index a841261..09db99b 100644
>> --- a/include/xscom.h
>> +++ b/include/xscom.h
>> @@ -179,5 +179,6 @@ extern void xscom_used_by_console(void);
>>  extern bool xscom_ok(void);
>>  
>>  extern int64_t xscom_read_cfam_chipid(uint32_t partid, uint32_t *chip_id);
>> +extern int64_t xscom_trigger_xstop(void);
>>  
>>  #endif /* __XSCOM_H */
>>
>> _______________________________________________
>> Skiboot mailing list
>> Skiboot at lists.ozlabs.org
>> https://lists.ozlabs.org/listinfo/skiboot
>>
> 
> 



More information about the Skiboot mailing list