[Skiboot] [PATCH 2/2] Add SBE driver support

Fri Apr 13 19:17:22 AEST 2018

On Tue, 2018-04-03 at 17:25 +0530, Vasant Hegde wrote:
> SBE (Self Boot Engine) on P9 has two different jobs:
>   - Boot the chip up to the point the core is functional
>   - Provide various services like timer, scom, stash MPIPL, etc., at runtime
> 
> OPAL can communicate to SBE via a set of data and control registers provided
> by the PSU block in P9 chip.
>  - Four 8 byte registers for Host to send command packets to SBE
>  - Four 8 byte registers for SBE to send response packets to Host
>  - Two doorbell registers (1 on each side) to alert either party
>    when data is placed in above mentioned data register
> 
> Protocol constraints:
>   Only one command is accepted in the command buffer until the response for the
>   command is enqueued in the response buffer by SBE.
> 
> Usage:
>   We will use SBE for various purposes like timer, MPIPL, etc.
> 
> This patch implements the SBE MBOX spec for OPAL to communicate with
> SBE.
> 
> Design consideration:
>   - Each chip has SBE. We need to track SBE messages per chip. Hence added
>     per chip sbe structure and list of messages to that chip
>   - SBE accepts only one command at a time. Hence serialized MBOX commands.
>   - OPAL gets interrupted once SBE sets doorbell register
>   - OPAL has to clear doorbell register after reading response
>   - Every command class has timeout option. Timed out messages are discarded
>   - SBE MBOX commands can be classified into four types :
>     - Those that must be sent to the master only (ex: sending MDST/MDDT info)
>     - Those that must be sent to slaves only (ex: continue MPIPL)
>     - Those that must be sent to all chips (ex: close insecure window)
>     - Those that can be sent to any chip (ex: timer)
> 
> Signed-off-by: Vasant Hegde <hegdevasant at linux.vnet.ibm.com>
> ---
>  core/init.c      |   7 +
>  hw/psi.c         |   2 +-
>  hw/sbe-p9.c      | 601 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  include/chip.h   |   4 +
>  include/sbe-p9.h | 205 ++++++++++++++++++-
>  5 files changed, 804 insertions(+), 15 deletions(-)
> 
> diff --git a/core/init.c b/core/init.c
> index 39c446070..f5b3b473e 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -50,6 +50,7 @@
>  #include <libstb/trustedboot.h>
>  #include <phys-map.h>
>  #include <imc.h>
> +#include <sbe-p9.h>
>  
>  enum proc_gen proc_gen;
>  unsigned int pcie_max_link_speed;
> @@ -1009,6 +1010,12 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>  	 */
>  	chiptod_init();
>  
> +	/*
> +	 * SBE uses TB value for scheduling timer. Hence init after
> +	 * chiptod init
> +	 */
> +	p9_sbe_init();
> +
>  	/* Initialize i2c */
>  	p8_i2c_init();
>  
> diff --git a/hw/psi.c b/hw/psi.c
> index 69e03e549..0fd007d60 100644
> --- a/hw/psi.c
> +++ b/hw/psi.c
> @@ -605,7 +605,7 @@ static void psihb_p9_interrupt(struct irq_source *is, uint32_t isn)
>  		printf("PSI: DIO irq received\n");
>  		break;
>  	case P9_PSI_IRQ_PSU:
> -		sbe_interrupt(psi->chip_id);
> +		p9_sbe_interrupt(psi->chip_id);
>  		break;
>  	}
>  }
> diff --git a/hw/sbe-p9.c b/hw/sbe-p9.c
> index c9c3febf8..059bbf757 100644
> --- a/hw/sbe-p9.c
> +++ b/hw/sbe-p9.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2017 IBM Corp.
> +/* Copyright 2017-2018 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -14,6 +14,36 @@
>   * limitations under the License.
>   */
>  
> +/*
> + * P9 OPAL - SBE communication driver
> + *
> + * P9 chip has Self Boot Engine (SBE). OPAL uses SBE for various purpose like
> + * timer, scom, MPIPL, etc,. Every chip has SBE. OPAL can communicate to SBE
> + * on all chips. Based on message type it selects appropriate SBE (ex: schedule
> + * timer on any chip).
> + *
> + * OPAL communicates to SBE via a set of data and control registers provided by
> + * the PSU block in P9 chip.
> + *  - Four 8 byte registers for Host to send command packets to SBE.
> + *  - Four 8 byte registers for SBE to send response packets to Host.
> + *  - Two doorbell registers (1 on each side) to alert either party
> + *    when data is placed in above mentioned data registers. Once Host/SBE reads
> + *    incoming data, it should clear doorbell register. Interrupt is disabled
> + *    as soon as doorbell register is cleared.
> + *
> + * OPAL - SBE message format:
> + *  - OPAL communicates to SBE via set of well defined commands.
> + *  - Reg0 contains message header (command class, subclass, flags etc).
> + *  - Reg1-3 contains actual data. If data is big then it uses indirect method
> + *    (data is passed via memory and memory address/size is passed in Reg1-3).
> + *  - Every message has defined timeout. SBE must respond within specified
> + *    time. Otherwise OPAL discards message and sends error message to caller.
> + *
> + * Constraints:
> + *  - Only one command is accepted in the command buffer until the response for
> + *    the command is enqueued in the response buffer by SBE.
> + */
> +
>  #define pr_fmt(fmt) "SBE: " fmt
>  
>  #include <chip.h>
> @@ -23,39 +53,588 @@
>  #include <sbe-p9.h>
>  #include <skiboot.h>
>  #include <timebase.h>
> -#include <timer.h>
>  #include <trace.h>
>  #include <xscom.h>
>  
> -void sbe_interrupt(uint32_t chip_id)
> +enum p9_sbe_mbox_state {
> +	sbe_mbox_idle = 0,	/* Ready to send message */
> +	sbe_mbox_send,		/* Message sent, waiting for ack/response */
> +	sbe_mbox_rr,		/* SBE in R/R */
> +};
> +
> +struct p9_sbe {
> +	/* Chip ID to send message */
> +	u32			chip_id;
> +
> +	/* List to hold SBE queue messages */
> +	struct list_head	msg_list;
> +
> +	struct lock		lock;
> +
> +	enum p9_sbe_mbox_state	state;
> +
> +	/* SBE MBOX message sequence number */
> +	u16			cur_seq;
> +
> +	/*
> +	 * Sending messages to SBE is serialized. We can use
> +	 * single buffer per SBE to hold response message.
> +	 */
> +	struct p9_sbe_msg	*resp;

Not fan of that. You return the response to the user, you've dropped
the lock, the buffer can be re-used by another user. That's bad.

> +};
> +
> +/* Default SBE chip ID */
> +static int sbe_default_chip_id = -1;
> +
> +#define SBE_STATUS_PRI_SHIFT	0x30
> +#define SBE_STATUS_SEC_SHIFT	0x20
> +#define SBE_FFDC_PRESENT	PPC_BIT(1)
> +
> +static void p9_sbe_timeout_poll(void *user_data __unused);
> +
> +static inline bool p9_sbe_ffdc_present(struct p9_sbe_msg *resp)
> +{
> +	return (resp->reg[0] & SBE_FFDC_PRESENT);
> +}
> +
> +/*
> + * bit 0    : PCBPIB status present in response
> + * bit 1    : FFDC package(s) present in response
> + * bit 2-3  : Reserved
> + * bit 4-15 : Primary status code
> + */
> +static inline u16 p9_sbe_get_primary_rc(struct p9_sbe_msg *resp)
> +{
> +	return ((resp->reg[0] >> SBE_STATUS_PRI_SHIFT) & 0xfff);
> +}
> +
> +static inline void p9_sbe_set_primary_rc(struct p9_sbe_msg *resp, u64 rc)
> +{
> +	resp->reg[0] |= (rc << SBE_STATUS_PRI_SHIFT);
> +}
> +
> +static u64 p9_sbe_rreg(u32 chip_id, u64 reg)
> +{
> +	u64 data = 0;
> +	int rc;
> +
> +	rc = xscom_read(chip_id, reg, &data);
> +	if (rc != OPAL_SUCCESS) {
> +		prlog(PR_DEBUG, "XSCOM error %d reading reg 0x%llx\n", rc, reg);
> +		return 0;
> +	}

Traditionally, on errors, we return all 1's

> +
> +	return be64_to_cpu(data);
> +}
> +
> +static void p9_sbe_reg_dump(u32 chip_id)
> +{
> +#define SBE_DUMP_REG_ONE(chip_id, x) \
> +	prlog(PR_DEBUG, "  %20s: %016llx\n", #x, p9_sbe_rreg(chip_id, x))
> +
> +	prlog(PR_DEBUG, "MBOX register dump for chip : %x\n", chip_id);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_SBE_DOORBELL_REG_RW);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG0);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG1);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG2);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG3);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_DOORBELL_REG_RW);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG4);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG5);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG6);
> +	SBE_DUMP_REG_ONE(chip_id, PSU_HOST_SBE_MBOX_REG7);
> +}
> +
> +void p9_sbe_freemsg(struct p9_sbe_msg *msg)
> +{
> +	free(msg);
> +}
> +
> +static void p9_sbe_fillmsg(struct p9_sbe_msg *msg, u16 cmd,
> +			   u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
> +{
> +	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
> +	u16 flag;
> +
> +	/*
> +	 * Always set ack required flag. SBE will interrupt OPAL once it read
> +	 * message from mailbox register. If OPAL is expecting response, then
> +	 * it will update message timeout, otherwise it will send next message.
> +	 */
> +	flag = ctrl_flag | SBE_CMD_CTRL_ACK_REQ;
> +
> +	/* Seqence ID is filled by p9_sbe_queue_msg() */
> +	msg->reg[0] = ((u64)flag << 32) | cmd;
> +	msg->reg[1] = reg1;
> +	msg->reg[2] = reg2;
> +	msg->reg[3] = reg3;
> +
> +	msg->state = sbe_msg_unused;
> +	msg->response = response;
> +}
> +
> +/*
> + * Handles "command with direct data" format only.
> + *
> + * Note: All mbox messages of our interest uses direct data format. If we need
> + *       indirect data format then we may have to enhance this function.
> + */
> +struct p9_sbe_msg *p9_sbe_mkmsg(u16 cmd, u16 ctrl_flag,
> +				u64 reg1, u64 reg2, u64 reg3)
> +{
> +	struct p9_sbe_msg *msg;
> +
> +	msg = zalloc(sizeof(struct p9_sbe_msg));
> +	if (!msg) {
> +		prlog(PR_ERR, "Failed to allocate memory for SBE message\n");
> +		return NULL;
> +	}
> +
> +	p9_sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);

So you aren't setting "resp" here, thus it can be NULL. I'd rather you
allocated a response here or left it NULL if none is needed, I don't
like the shared response buffer see below

> +	return msg;
> +}
> +
> +static inline bool p9_sbe_mbox_busy(struct p9_sbe *sbe)
> +{
> +	return sbe->state != sbe_mbox_idle ? true : false;
> +}
> +
> +static inline bool p9_sbe_msg_busy(struct p9_sbe_msg *msg)
> +{
> +	switch (msg->state) {
> +	case sbe_msg_unused:
> +	/* fall through */
> +	case sbe_msg_done:
> +	case sbe_msg_timeout:
> +		return false;
> +	default:	/* sbe_msg_queued, sbe_msg_sent, sbe_msg_wresp */
> +		break;
> +	}
> +	return true;
> +}
> +
> +static int p9_sbe_msg_send(u32 chip_id, struct p9_sbe_msg *msg)
> +{
> +	int rc, i;
> +	u64 addr, *data;
> +
> +	addr = PSU_HOST_SBE_MBOX_REG0;
> +	data = &msg->reg[0];
> +
> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
> +		rc = xscom_write(chip_id, addr, *data);
> +		if (rc)
> +			return rc;
> +
> +		addr++;
> +		data++;
> +	}
> +
> +	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
> +			 HOST_SBE_MSG_WAITING);
> +	return rc;
> +}
> +
> +static int p9_sbe_msg_receive(u32 chip_id, struct p9_sbe_msg *resp)
> +{
> +	int i;
> +	int rc = OPAL_SUCCESS;
> +	u64 addr, *data;
> +
> +	addr = PSU_HOST_SBE_MBOX_REG4;
> +	data = &resp->reg[0];
> +
> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
> +		rc = xscom_read(chip_id, addr, data);
> +		if (rc)
> +			return rc;
> +
> +		addr++;
> +		data++;
> +	}
> +
> +	return rc;
> +}
> +
> +/* WARNING: This will drop sbe->lock */
> +static void p9_sbe_msg_complete(struct p9_sbe *sbe, struct p9_sbe_msg *msg)
> +{
> +	prlog(PR_TRACE, "Completing msg [chip id = %x], reg0 : 0x%llx\n",
> +	      sbe->chip_id, msg->reg[0]);
> +
> +	if (msg->response)
> +		msg->resp = sbe->resp;

Right so you set the above, then you drop the lock below. At that point
any other message can start processing and override the response from
another CPU while the owner of this message tries to read it. Bad.

You really need to allocate a separate response buffer per message.
What you can do is only allocate if if a response is needed for the
given message (typically via an argument to mkmsg) but you need to
null check in various places then.

> +
> +	list_del_from(&sbe->msg_list, &msg->link);

How does that differ from just list_del ? checks the list ?

> +	if (msg->complete) {
> +		unlock(&sbe->lock);
> +		msg->complete(msg);
> +		lock(&sbe->lock);
> +	}
> +}
> +
> +/* WARNING: This will drop sbe->lock */
> +static void p9_sbe_send_complete(struct p9_sbe *sbe)
> +{
> +	struct p9_sbe_msg *msg;
> +
> +	if (list_empty(&sbe->msg_list))
> +		return;
> +
> +	msg = list_top(&sbe->msg_list, struct p9_sbe_msg, link);
> +	/* Need response */
> +	if (msg->response) {
> +		msg->state = sbe_msg_wresp;
> +	} else {
> +		msg->state = sbe_msg_done;
> +		p9_sbe_msg_complete(sbe, msg);
> +		sbe->state = sbe_mbox_idle;
> +	}
> +}
> +
> +/* WARNING: This will drop sbe->lock */
> +static void p9_sbe_process_queue(struct p9_sbe *sbe)
>  {
>  	int rc;
> -	u64 data;
> +	struct p9_sbe_msg *msg;
> +
> +	if (p9_sbe_mbox_busy(sbe))
> +		return;
> +
> +	if (list_empty(&sbe->msg_list))
> +		return;
> +
> +	msg = list_top(&sbe->msg_list, struct p9_sbe_msg, link);
> +	/* Send message */
> +	rc = p9_sbe_msg_send(sbe->chip_id, msg);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to send message to SBE "
> +		      "[chip id = %x]\n", sbe->chip_id);
> +		memset(sbe->resp, 0, sizeof(struct p9_sbe_msg));

So that should be msg->resp along with a NULL check

> +		p9_sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
> +		msg->state = sbe_msg_timeout;

This is not a timeout though. Should you have a more generic "error"
state name instead ?

> +		p9_sbe_msg_complete(sbe, msg);
> +
> +		/* Try to send next message */
> +		p9_sbe_process_queue(sbe);

Not fan of the recursion here, I would rather have a loop.

> +		return;
> +	}
> +
> +	prlog(PR_TRACE, "Message queued [chip id = 0x%x]:\n\t Reg0 : %016llx"
> +	      "\n\t Reg1 : %016llx\n\t Reg2 : %016llx\n\t Reg3 : %016llx\n",
> +	      sbe->chip_id, msg->reg[0], msg->reg[1], msg->reg[2], msg->reg[3]);
> +
> +	msg->timeout = mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_MAX);
> +	sbe->state = sbe_mbox_send;
> +	msg->state = sbe_msg_sent;
> +}
> +
> +/*
> + * WARNING:
> + *         Only one command is accepted in the command buffer until response
> + *         to the command is enqueued in the response buffer by SBE.
> + *
> + *         Head of msg_list contains in-flight message. Hence we should always
> + *         add new message to tail of the list.
> + */
> +int p9_sbe_queue_msg(u32 chip_id, struct p9_sbe_msg *msg,
> +		     void (*comp)(struct p9_sbe_msg *msg))
> +{
>  	struct proc_chip *chip;
> +	struct p9_sbe *sbe;
> +
> +	if (!msg)
> +		return OPAL_PARAMETER;
> +
> +	if (sbe_default_chip_id == -1)
> +		return OPAL_HARDWARE;

Why ? I would move the above test inside the if (chip_id == -1)
below...

> +
> +	/* Default to SBE on master chip */
> +	if (chip_id == -1)
> +		chip = get_chip(sbe_default_chip_id);
> +	else
> +		chip = get_chip(chip_id);
> +
> +	if (chip == NULL || chip->sbe == NULL)
> +		return OPAL_INTERNAL_ERROR;
> +
> +	sbe = chip->sbe;
> +	lock(&sbe->lock);
> +	/* Set completion and update sequence number */
> +	msg->complete = comp;
> +	msg->state = sbe_msg_queued;
> +	msg->reg[0] = msg->reg[0] | ((u64)sbe->cur_seq << 16);
> +	sbe->cur_seq++;
> +
> +	/* Reset sequence number */
> +	if (sbe->cur_seq == 0xffff)
> +		sbe->cur_seq = 1;

So never 0 ?

Also how are the sequence numbers used by the SBE ? (Are they at all ?)
because cancelling a message means creating a "hole" in the sequence
number chain, is that ok with the SBE ?

> +
> +	/* Add message to queue */
> +	list_add_tail(&sbe->msg_list, &msg->link);
> +	p9_sbe_process_queue(sbe);
> +	unlock(&sbe->lock);
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +int p9_sbe_sync_msg(u32 chip_id, struct p9_sbe_msg *msg, bool autofree)
> +{
> +	int rc;
> +
> +	rc = p9_sbe_queue_msg(chip_id, msg, NULL);
> +	if (rc)
> +		goto free_msg;
> +
> +	while (p9_sbe_msg_busy(msg)) {
> +		cpu_relax();
> +		p9_sbe_timeout_poll(NULL);

So I'm thinking we should have an p9_sbe_poll_one() that takes the chip
as an argument, unless you really want to poll all sbes in that loop ?

Another option is to run all pollers like we do for the FSP driver, but
that's more heavy handed (though will allow other things to work while
we wait for the SBE).

> +	}
> +
> +	if (msg->state == sbe_msg_done)
> +		rc = 0;
> +	else
> +		rc = -1;

Any better error code here ? Maybe have a function that convert the SBE
errors into nice OPAL errors.

> +
> +	if (msg->response && msg->resp)
> +		rc = p9_sbe_get_primary_rc(msg->resp);

Same deal, should we translate the RC or pass the "raw" SBE RC up ?
Either works but in any case, the "-1" above should be some constant.

> +free_msg:
> +	if (autofree)
> +		p9_sbe_freemsg(msg);
> +
> +	return rc;
> +}
> +
> +/* Remove SBE message from queue. It will not remove inflight message */
> +int p9_sbe_cancelmsg(u32 chip_id, struct p9_sbe_msg *msg)
> +{
> +	struct proc_chip *chip;
> +	struct p9_sbe *sbe;
> +
> +	if (chip_id == -1)
> +		chip = get_chip(sbe_default_chip_id);
> +	else
> +		chip = get_chip(chip_id);
> +	if (chip == NULL || chip->sbe == NULL)
> +		return OPAL_PARAMETER;
> +
> +	sbe = chip->sbe;
> +
> +	lock(&sbe->lock);
> +	if (msg->state != sbe_msg_queued) {
> +		unlock(&sbe->lock);
> +		return OPAL_BUSY;
> +	}
> +
> +	list_del_from(&sbe->msg_list, &msg->link);
> +	msg->state = sbe_msg_done;
> +	unlock(&sbe->lock);
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static void p9_sbe_handle_response(struct p9_sbe *sbe, struct p9_sbe_msg *msg)
> +{
> +	u16 send_seq, resp_seq;
> +	int rc;
> +
> +	if (msg == NULL)
> +		return;
> +
> +	/* Clear response message */
> +	memset(sbe->resp, 0, sizeof(struct p9_sbe_msg));

Don't use sbe->resp, use msg->resp and null check as described earlier

> +
> +	rc = p9_sbe_msg_receive(sbe->chip_id, sbe->resp);
> +	if (rc != OPAL_SUCCESS) {
> +		prlog(PR_ERR, "Failed to read response message "
> +		      "[chip id = %x]\n", sbe->chip_id);
> +		p9_sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
> +		return;
> +	}
> +
> +	/* Validate sequence number */
> +	send_seq = (msg->reg[0] >> 16) & 0xffff;
> +	resp_seq = (sbe->resp->reg[0] >> 16) & 0xffff;
> +	if (send_seq != resp_seq) {
> +		/*
> +		 * XXX Handle SBE R/R.
> +		 *     Lets send sequence error to caller until SBE reset works.
> +		 */
> +		prlog(PR_ERR, "Invalid sequence id [chip id = %x]\n",
> +		      sbe->chip_id);
> +		p9_sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_SEQ_ERR);
> +		return;
> +	}
> +}
> +
> +void p9_sbe_interrupt(uint32_t chip_id)
> +{
> +	int rc;
> +	u64 data = 0, val;
> +	struct proc_chip *chip;
> +	struct p9_sbe *sbe;
> +	struct p9_sbe_msg *msg = NULL;
>  
>  	chip = get_chip(chip_id);
> -	if (chip == NULL)
> +	if (chip == NULL || chip->sbe == NULL)
>  		return;
> +	sbe = chip->sbe;
>  
> +	lock(&sbe->lock);
>  	/* Read doorbell register */
>  	rc = xscom_read(chip_id, PSU_HOST_DOORBELL_REG_RW, &data);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to read SBE to Host doorbell register "
>  		      "[chip id = %x]\n", chip_id);
> -		goto clr_interrupt;
> +		p9_sbe_reg_dump(chip_id);
> +		unlock(&sbe->lock);
> +		return;
> +	}
> +
> +	/* Called from poller path (p9_sbe_timeout_poll), nothing to process */
> +	if (!data) {
> +		unlock(&sbe->lock);
> +		return;
>  	}
>  
> -	/* SBE passtrhough command, call prd handler */
> +	/* SBE passthrough command, call prd handler */
>  	if (data & SBE_HOST_PASSTHROUGH) {
>  		prd_sbe_passthrough(chip_id);
>  	}
>  
> +	/* SBE came back from reset */
> +	if (data & SBE_HOST_RESET) {
> +		prlog(PR_NOTICE, "Back from reset [chip id = %x]\n", chip_id);
> +		/* Reset SBE MBOX state */
> +		sbe->state = sbe_mbox_idle;
> +
> +		/* Reset message state */
> +		if (!list_empty(&sbe->msg_list)) {
> +			msg = list_top(&sbe->msg_list, struct p9_sbe_msg, link);
> +			msg->state = sbe_msg_queued;
> +		}
> +		goto clr_interrupt;
> +	}
> +
> +	/* SBE read message from mailbox register */
> +	if (data & SBE_HOST_MSG_READ) {
> +		p9_sbe_send_complete(sbe);
> +	}
> +
> +	/* Read SBE response before clearing doorbell register */
> +	if (data & SBE_HOST_RESPONSE_WAITING) {
> +		if (!list_empty(&sbe->msg_list)) {
> +			msg = list_top(&sbe->msg_list, struct p9_sbe_msg, link);
> +			p9_sbe_handle_response(sbe, msg);
> +			msg->state = sbe_msg_done;
> +			p9_sbe_msg_complete(sbe, msg);
> +		}
> +		sbe->state = sbe_mbox_idle;
> +	}

Nervous here... msg_complete drops the lock, so all sort of things can
have happened. We should clear the interrupt first I think, before we
process things, and go straight to process_queue at the bottom after
having called msg_complete. Also see my comment in the timeout code
about when you set mbox_idle. Makes me nervous here too.

>  clr_interrupt:
> -	/* Clears all the bits */
> -	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND,
> -			 SBE_HOST_RESPONSE_CLEAR);
> +	/* Clear doorbell register */
> +	val = SBE_HOST_RESPONSE_MASK & ~data;
> +	rc = xscom_write(chip_id, PSU_HOST_DOORBELL_REG_AND, val);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to clear SBE to Host doorbell "
> -		      "register [chip id = %x]\n", chip_id);
> +		      "interrupt [chip id = %x]\n", chip_id);
> +		unlock(&sbe->lock);
> +		return;
> +	}
> +
> +	p9_sbe_process_queue(sbe);
> +	unlock(&sbe->lock);
> +}
> +
> +static void p9_sbe_timeout_poll(void *user_data __unused)
> +{
> +	struct p9_sbe *sbe;
> +	struct p9_sbe_msg *msg;
> +	struct proc_chip *chip;
> +
> +	for_each_chip(chip) {
> +		if (chip->sbe == NULL)
> +			continue;
> +		sbe = chip->sbe;
> +
> +		if (list_empty(&sbe->msg_list))
> +			continue;

This is racy when running in debug mode, we've hit that with the timers
too, use list_empty_nocheck (see check_timers)

> +
> +		/*
> +		 * In some cases there will be a delay in calling OPAL interrupt
> +		 * handler routine (opal_handle_interrupt). In such cases its
> +		 * possible that SBE has responded, but OPAL didn't act on that.
> +		 * Hence check for SBE response.
> +		 */
> +		p9_sbe_interrupt(sbe->chip_id);
> +
> +		lock(&sbe->lock);

This is unfortunate as sbe_interrupt takes and releases the lock, maybe
use a __p9_sbe_interrupt() that doesn't (and takes a sbe pointer rather
than a chip as an argument since you have that already) and call *that*
from here, and p9_sbe_interrupt() is just a wrapper. something like:

void p9_sbe_interrupt(uint32_t chip_id)
{
	struct proc_chip *chip;
	struct p9_sbe *sbe;

	chip = get_chip(chip_id);
	f (chip == NULL || chip->sbe == NULL)
		return;
	sbe = chip->sbe;
	lock(&sbe->lock);
	__p9_sbe_interrupt(sbe);
	unlock(&sbe->lock);
}

> +		if (list_empty(&sbe->msg_list)) {
> +			unlock(&sbe->lock);
> +			continue;
> +		}

If you had a per-chip poll routine without an outer wrapper that
iterate all chips, all of these constructs could be simple goto out;
with a single unlock.

> +
> +		msg = list_top(&sbe->msg_list, struct p9_sbe_msg, link);
> +		if (tb_compare(mftb(), msg->timeout) != TB_AAFTERB) {
> +			unlock(&sbe->lock);
> +			continue;
> +		}
> +
> +		/* Message timeout */
> +		prlog(PR_ERR, "Message timeout [chip id = %x], "
> +		      "cmd = %llx, subcmd = %llx\n", sbe->chip_id,
> +		      (msg->reg[0] >> 8) & 0xff, msg->reg[0] & 0xff);
> +		p9_sbe_reg_dump(sbe->chip_id);
> +		memset(sbe->resp, 0, sizeof(struct p9_sbe_msg));

Usual comment about resp...

> +		p9_sbe_set_primary_rc(sbe->resp, SBE_STATUS_PRI_GENERIC_ERR);
> +		msg->state = sbe_msg_timeout;
> +		p9_sbe_msg_complete(sbe, msg);
> +
> +		/* XXX Handle SBE R/R. Reset SBE state until SBE R/R works. */
> +		sbe->state = sbe_mbox_idle;

This makes me nervous. If for any reason the message is top of the
queue but hasn't started sending yet, you could be already in idle
state before you call p9_sbe_msg_complete(). If that's the case, the
unlock inside p9_sbe_msg_complete() means somebody else can restart
the queue with another message, thus making the above state change to
idle a bug.

Now it's possible you don't have such a reason where the state would go
idle while there's something in the queue and drop the lock, but as I
said, it makes me somewhat nervous.

I would much rather have the idle set before we call msg_complete to
make sure that at the point of unlock inside it, we are completely
in sync between not having the message and being idle. Also comment :-)

> +		p9_sbe_process_queue(sbe);
> +		unlock(&sbe->lock);
> +	}
> +}
> +
> +void p9_sbe_init(void)
> +{
> +	struct dt_node *xn;
> +	struct proc_chip *chip;
> +	struct p9_sbe *sbe;
> +
> +	if (proc_gen < proc_gen_p9)
> +		return;
> +
> +	dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
> +		sbe = zalloc(sizeof(struct p9_sbe));
> +		assert(sbe);
> +		sbe->resp = zalloc(sizeof(struct p9_sbe_msg));
> +		assert(sbe->resp);
> +		sbe->chip_id = dt_get_chip_id(xn);
> +		sbe->cur_seq = 1;
> +		sbe->state = sbe_mbox_idle;
> +		list_head_init(&sbe->msg_list);
> +		init_lock(&sbe->lock);
> +
> +		chip = get_chip(sbe->chip_id);
> +		assert(chip);
> +		chip->sbe = sbe;
> +
> +		if (dt_has_node_property(xn, "primary", NULL)) {
> +			sbe_default_chip_id = sbe->chip_id;
> +			prlog(PR_DEBUG, "Master chip id : %x\n", sbe->chip_id);
> +		}
> +	}
> +
> +	if (sbe_default_chip_id == -1) {
> +		prlog(PR_ERR, "Master chip ID not found.\n");
> +		return;
>  	}
> +
> +	/* Initiate SBE timeout poller */
> +	opal_add_poller(p9_sbe_timeout_poll, NULL);
>  }
> diff --git a/include/chip.h b/include/chip.h
> index 43b5ea55b..059033e27 100644
> --- a/include/chip.h
> +++ b/include/chip.h
> @@ -113,6 +113,7 @@ struct mfsi;
>  struct xive;
>  struct lpcm;
>  struct vas;
> +struct p9_sbe;
>  
>  /* Chip type */
>  enum proc_chip_type {
> @@ -218,6 +219,9 @@ struct proc_chip {
>  
>  	/* location code of this chip */
>  	const uint8_t		*loc_code;
> +
> +	/* Used by hw/sbe-p9.c */
> +	struct p9_sbe		*sbe;
>  };
>  
>  extern uint32_t pir_to_chip_id(uint32_t pir);
> diff --git a/include/sbe-p9.h b/include/sbe-p9.h
> index cca210024..892be4b85 100644
> --- a/include/sbe-p9.h
> +++ b/include/sbe-p9.h
> @@ -1,4 +1,4 @@
> -/* Copyright 2017 IBM Corp.
> +/* Copyright 2017-2018 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -21,14 +21,213 @@
>  #include <ccan/list/list.h>
>  #include <ccan/short_types/short_types.h>
>  
> +/* Worst case command timeout value (90 sec) */
> +#define SBE_CMD_TIMEOUT_MAX			(90 * 1000)
> +
> +/* Primary response status code */
> +#define SBE_STATUS_PRI_SUCCESS			0x00
> +#define SBE_STATUS_PRI_INVALID_CMD		0x01
> +#define SBE_STATUS_PRI_INVALID_DATA		0x02
> +#define SBE_STATUS_PRI_SEQ_ERR			0x03
> +#define SBE_STATUS_PRI_INTERNAL_ERR		0x04
> +#define SBE_STATUS_PRI_UNSECURE_ACCESS		0x05
> +#define SBE_STATUS_PRI_GENERIC_ERR		0xFE
> +
> +/* Secondary response status code */
> +#define SBE_STATUS_SEC_SUCCESS			0x00
> +#define SBE_STATUS_SEC_CMD_CLASS_UNSUPPORTED	0x01
> +#define SBE_STATUS_SEC_CMD_UNSUPPORTED		0x02
> +#define SBE_STATUS_SEC_INV_ADDR			0x03
> +#define SBE_STATUS_SEC_INV_TARGET_TYPE		0x04
> +#define SBE_STATUS_SEC_INV_CHIPLET_ID		0x05
> +#define SBE_STATUS_SEC_TARGET_NOT_PRESENT	0x06
> +#define SBE_STATUS_SEC_TARGET_NOT_FUNC		0x07
> +#define SBE_STATUS_SEC_CMD_NOT_ALLOW		0x08
> +#define SBE_STATUS_SEC_FUNC_NOT_SUPPORTED	0x09
> +#define SBE_STATUS_SEC_GENERIC_ERR		0x0A
> +#define SBE_STATUS_SEC_BLACKLIST_REG		0x0B
> +#define SBE_STATUS_SEC_OS_FAILURE		0x0C
> +#define SBE_STATUS_SEC_MBX_REG_FAILURE		0x0D
> +#define SBE_STATUS_SEC_INSUFFICIENT_DATA	0x0E
> +#define SBE_STATUS_SEC_EXCESS_DATA		0x0F
> +#define SBE_STATUS_SEC_HW_TIMEOUT		0x10
> +#define SBE_STATUS_SEC_PCBPIB_ERR		0x11
> +#define SBE_STATUS_SEC_FIFO_PARITY_ERR		0x12
> +#define SBE_STATUS_SEC_TIMER_EXPIRED		0x13
> +#define SBE_STATUS_SEC_BLACKLISTED_MEM		0x14
> +#define SBE_STATUS_SEC_UNSEC_REGION_NOT_FOUND	0x15
> +#define SBE_STATUS_SEC_UNSEC_REGION_EXCEEDED	0x16
> +#define SBE_STATUS_SEC_UNSEC_REGION_AMEND	0x17
> +#define SBE_STATUS_SEC_INPUT_BUF_OVERFLOW	0x18
> +#define SBE_STATUS_SEC_INVALID_PARAMS		0x19
> +#define SBE_STATUS_SEC_BLACKLISTED_CMD		0x20
> +
> +/* Number of MBOX register on each side */
> +#define NR_HOST_SBE_MBOX_REG		0x04
> +
> +/*
> + * SBE MBOX register address
> + *   Reg 0 - 3 : Host to send command packets to SBE
> + *   Reg 4 - 7 : SBE to send response packets to Host
> + */
> +#define PSU_HOST_SBE_MBOX_REG0		0x000D0050
> +#define PSU_HOST_SBE_MBOX_REG1		0x000D0051
> +#define PSU_HOST_SBE_MBOX_REG2		0x000D0052
> +#define PSU_HOST_SBE_MBOX_REG3		0x000D0053
> +#define PSU_HOST_SBE_MBOX_REG4		0x000D0054
> +#define PSU_HOST_SBE_MBOX_REG5		0x000D0055
> +#define PSU_HOST_SBE_MBOX_REG6		0x000D0056
> +#define PSU_HOST_SBE_MBOX_REG7		0x000D0057
> +#define PSU_SBE_DOORBELL_REG_RW		0x000D0060
> +#define PSU_SBE_DOORBELL_REG_AND	0x000D0061
> +#define PSU_SBE_DOORBELL_REG_OR		0x000D0062
>  #define PSU_HOST_DOORBELL_REG_RW	0x000D0063
>  #define PSU_HOST_DOORBELL_REG_AND	0x000D0064
>  #define PSU_HOST_DOORBELL_REG_OR	0x000D0065
>  
> +/*
> + * Doorbell register to trigger SBE interrupt. Set by OPAL to inform
> + * the SBE about a waiting message in the Host/SBE mailbox registers
> + */
> +#define HOST_SBE_MSG_WAITING		PPC_BIT(0)
> +
> +/*
> + * Doorbell register for host bridge interrupt. Set by the SBE to inform
> + * host about a response message in the Host/SBE mailbox registers
> + */
> +#define SBE_HOST_RESPONSE_WAITING	PPC_BIT(0)
> +#define SBE_HOST_MSG_READ		PPC_BIT(1)
> +#define SBE_HOST_STOP15_EXIT		PPC_BIT(2)
> +#define SBE_HOST_RESET			PPC_BIT(3)
>  #define SBE_HOST_PASSTHROUGH		PPC_BIT(4)
> -#define SBE_HOST_RESPONSE_CLEAR		0x00
> +#define SBE_HOST_TIMER_EXPIRY		PPC_BIT(14)
> +#define SBE_HOST_RESPONSE_MASK		(PPC_BITMASK(0, 4) | SBE_HOST_TIMER_EXPIRY)
> +
> +/* SBE Target Type */
> +#define SBE_TARGET_TYPE_PROC		0x00
> +#define SBE_TARGET_TYPE_EX		0x01
> +#define SBE_TARGET_TYPE_PERV		0x02
> +#define SBE_TARGET_TYPE_MCS		0x03
> +#define SBE_TARGET_TYPE_EQ		0x04
> +#define SBE_TARGET_TYPE_CORE		0x05
> +
> +/* SBE MBOX command class */
> +#define SBE_MCLASS_FIRST		0xD1
> +#define SBE_MCLASS_CORE_STATE		0xD1
> +#define SBE_MCLASS_SCOM			0xD2
> +#define SBE_MCLASS_RING			0xD3
> +#define SBE_MCLASS_TIMER		0xD4
> +#define SBE_MCLASS_MPIPL		0xD5
> +#define SBE_MCLASS_SECURITY		0xD6
> +#define SBE_MCLASS_GENERIC		0xD7
> +#define SBE_MCLASS_LAST			0xD7
> +
> +/*
> + * Commands are provided in xxyy form where:
> + *   - xx : command class
> + *   - yy : command
> + *
> + * Both request and response message uses same seq ID,
> + * command class and command.
> + */
> +#define SBE_CMD_CTRL_DEADMAN_LOOP	0xD101
> +#define SBE_CMD_MULTI_SCOM		0xD201
> +#define SBE_CMD_PUT_RING_FORM_IMAGE	0xD301
> +#define SBE_CMD_CONTROL_TIMER		0xD401
> +#define SBE_CMD_GET_ARCHITECTED_REG	0xD501
> +#define SBE_CMD_CLR_ARCHITECTED_REG	0xD502
> +#define SBE_CMD_SET_UNSEC_MEM_WINDOW	0xD601
> +#define SBE_CMD_GET_SBE_FFDC		0xD701
> +#define SBE_CMD_GET_CAPABILITY		0xD702
> +#define SBE_CMD_READ_SBE_SEEPROM	0xD703
> +#define SBE_CMD_SET_FFDC_ADDR		0xD704
> +#define SBE_CMD_QUIESCE_SBE		0xD705
> +#define SBE_CMD_SET_FABRIC_ID_MAP	0xD706
> +#define SBE_CMD_STASH_MPIPL_CONFIG	0xD707
> +
> +/* SBE MBOX control flags */
> +
> +/* Generic flags */
> +#define SBE_CMD_CTRL_RESP_REQ		0x0100
> +#define SBE_CMD_CTRL_ACK_REQ		0x0200
> +
> +/* Deadman loop */
> +#define CTRL_DEADMAN_LOOP_START		0x0001
> +#define CTRL_DEADMAN_LOOP_STOP		0x0002
> +
> +/* Control timer */
> +#define CONTROL_TIMER_START		0x0001
> +#define CONTROL_TIMER_STOP		0x0002
> +
> +/* SBE message state */
> +enum p9_sbe_msg_state {
> +	sbe_msg_unused = 0,	/* Free */
> +	sbe_msg_queued,		/* Queued to SBE list */
> +	sbe_msg_sent,		/* Sent to SBE */
> +	sbe_msg_wresp,		/* Waiting for response */
> +	sbe_msg_done,		/* Complete */
> +	sbe_msg_timeout,	/* Message timeout */
> +};
> +
> +/* SBE message */
> +struct p9_sbe_msg {
> +	/*
> +	 * Reg[0] :
> +	 *   word0 :
> +	 *     direct cmd  : reserved << 16 | ctrl flag
> +	 *     indirect cmd: mem_addr_size_dword
> +	 *     response    : primary status << 16 | secondary status
> +	 *
> +	 *   word1 : seq id << 16 | cmd class << 8 | cmd
> +	 *
> +	 * WARNING:
> +	 *   - Don't populate reg[0].seq (byte 4,5). This will be populated by
> +	 *     p9_sbe_queue_msg().
> +	 */
> +	u64	reg[4];
> +
> +	/* cmd timout : mftb() + msecs_to_tb(SBE_CMD_TIMEOUT_[SHORT/LONG]_MS) */
> +	u64	timeout;
> +
> +	/* Completion function */
> +	void (*complete)(struct p9_sbe_msg *msg);
> +	void *user_data;
> +
> +	/* Current msg state */
> +	enum p9_sbe_msg_state	state;
> +
> +	/* Set if the message expects a response */
> +	bool			response;
> +
> +	/* Points to p9_sbe->resp, which contains response message */
> +	struct p9_sbe_msg	*resp;
> +
> +	/* Internal queuing */
> +	struct list_node	link;
> +} __packed;
> +
> +
> +/* Allocate and populate p9_sbe_msg structure */
> +extern struct p9_sbe_msg *p9_sbe_mkmsg(u16 cmd, u16 ctrl_flag, u64 reg1,
> +				       u64 reg2, u64 reg3) __warn_unused_result;
> +
> +/* Free p9_sbe_msg structure */
> +extern void p9_sbe_freemsg(struct p9_sbe_msg *msg);
> +
> +/* Add new message to sbe queue */
> +extern int p9_sbe_queue_msg(uint32_t chip_id, struct p9_sbe_msg *msg,
> +		void (*comp)(struct p9_sbe_msg *msg)) __warn_unused_result;
> +
> +/* Synchronously send message to SBE */
> +extern int p9_sbe_sync_msg(u32 chip_id, struct p9_sbe_msg *msg, bool autofree);
> +
> +/* Remove message from SBE queue, it will not remove inflight message */
> +extern int p9_sbe_cancelmsg(u32 chip_id, struct p9_sbe_msg *msg);
> +
> +/* Initialize the SBE mailbox driver */
> +extern void p9_sbe_init(void);
>  
>  /* SBE interrupt */
> -extern void sbe_interrupt(uint32_t chip_id);
> +extern void p9_sbe_interrupt(uint32_t chip_id);
>  
>  #endif	/* __SBE_P9_H */