[Skiboot] [RFC PATCH 2/2] Add SBE driver support

Wed Feb 22 21:21:37 AEDT 2017

On 02/16/2017 11:56 AM, Oliver O'Halloran wrote:
> On Wed, 2017-02-15 at 20:44 +0530, Vasant Hegde wrote:
>> SBE (Self Boot Engine) on P9 has two different jobs:
>>    - Boot the chip-up till core is functional
>>    - Provide various services like timer, scom, etc., at runtime

.../...

>> +static inline struct sbe_msg *__sbe_allocmsg(void)
>> +{
>> +	return zalloc(sizeof(struct sbe_msg));
>> +}
>> +
>> +struct sbe_msg *sbe_allocmsg(bool alloc_response)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	msg = __sbe_allocmsg();
>> +	if (!msg)
>> +		return NULL;
>> +
>> +	if (alloc_response) {
>> +		msg->resp = __sbe_allocmsg();
>> +		if (!msg->resp) {
>> +			free(msg);
>> +			return NULL;
>> +		}
> Given that we're limited to one outstanding response per SBE we could
> have a sbe_msg inside the sbe structure itself and skip this extra
> allocation. Up to you.

I think I should have explained this in commit message.  Actually caller can 
queue any number of messages for given SBE. And driver will serialize sending 
message. Hence I've added two structures.

>
>> +	}
>> +
>> +	return msg;
>> +}
>> +
>> +static inline void __sbe_freemsg(struct sbe_msg *msg)
>> +{
>> +	free(msg);
>> +}
>> +
>> +void sbe_freemsg(struct sbe_msg *msg)
>> +{
>> +	if (msg && msg->resp)
>> +		__sbe_freemsg(msg->resp);
>> +	__sbe_freemsg(msg);
>> +}
>
> Is there any reason we need to wrap zalloc() and free() with
> __sbe_allocmsg() and __sbe_freemsg()?

Probably I can kill __sbe_allocmsg() and free().

>
>> +
>> +void sbe_fillmsg(struct sbe_msg *msg, u16 cmd,
>> +		 u16 ctrl_flag, u64 reg1, u64 reg2, u64 reg3)
>> +{
>> +	bool response = !!(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
>> +
>> +	/* Seqence ID is filled by sbe_queue_msg() */
>> +	msg->reg0 = ((u64)ctrl_flag << 32) | cmd;
>> +	msg->reg1 = reg1;
>> +	msg->reg2 = reg2;
>> +	msg->reg3 = reg3;
>> +
>> +	prlog(PR_TRACE, "MBOX message :\n\t Reg0 : %llx\n\t "
>> +	      "Reg1 : %llx\n\t Reg2 : %llx\n\t Reg3 : %lld\n",
>> +	      msg->reg0, msg->reg1, msg->reg2, msg->reg3);
>> +
>> +	msg->response = response;
>> +}
>> +
>> +/*
>> + * Handles "command with direct data" format only.
>> + *
>> + * Note: All mbox messages of our interest uses direct data format.
>> + *       If we need indirect data format then we have to enhance
>> + *       this function.
>> + */
>> +struct sbe_msg *sbe_mkmsg(u16 cmd, u16 ctrl_flag,
>> +			  u64 reg1, u64 reg2, u64 reg3)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	msg = sbe_allocmsg(ctrl_flag & SBE_CMD_CTRL_RESP_REQ);
>> +	if (!msg) {
>> +		prlog(PR_ERR, "Failed to allocate memory for SBE
>> message\n");
>> +		return NULL;
>> +	}
>> +
>> +	sbe_fillmsg(msg, cmd, ctrl_flag, reg1, reg2, reg3);
>> +	return msg;
>> +}
>> +
>
> What's the use case for making fillmsg public? Given this is an async
> protocol you will need to allocate the structure anyway. Using static
> storage will require extraneous synchronisation or just be straight up
> broken.

Correct. I can make couple of functions as static including fillmsg()

>
> Having a single entry point (mkmsg) would also allow you to
> transparently implement a freelist (if needed) for messages similar to
> how the opal message API works.

>
>> +static inline bool sbe_busy(struct sbe_msg *msg)
>> +{
>> +	switch (msg->state) {
>> +	case sbe_msg_unused:
>> +	case sbe_msg_queued:
>> +	case sbe_msg_response:
>> +	case sbe_msg_done:
>> +	case sbe_msg_timeout:
>> +	case sbe_msg_cancelled:
>> +		return false;
>> +	default:	/* + sbe_msg_sent, sbe_msg_wresp */
>> +		break;
>> +	}
>> +
>> +	return true;
>> +}
>> +
>> +/*
>> + * Read SBE doorbell to confirm its ready to accept command. Since
>> the device
>> + * driver single threads the requests, we should never see not being
>> ready to
>> + * send a request.
>> + */
>>
>> +static bool sbe_ready(u32 chip_id)
>> +{
>> +	int rc;
>> +	u64 data;
>> +
>> +	rc = xscom_read(chip_id, PSU_SBE_DOORBELL_REG_RW, &data);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to read Host to SBE doorbell
>> register "
>> +		      "[chip id = %x]\n", chip_id);
>> +		return false;
>> +	}
>> +
>> +	if (data & HOST_SBE_MSG_WAITING) {
>> +		prlog(PR_ERR, "Not ready to accept command [chip id
>> = %x]."
>> +		      " Doorbell reg : %llx\n", chip_id, data);
>> +		/* Dump register content */
>> +		sbe_reg_dump(chip_id);
>> +		return false;
>> +	}
>> +
>> +	return true;
>> +}
>
> Maybe rename these to sbe_msg_finished() and sbe_hw_ready(). From the
> names you'd expect them to be implemented similarly, but the when you
> look at them the relationship really isn't obvious.

Done.

>
>> +
>> +static int sbe_send_msg(u32 chip_id, struct sbe_msg *msg)
>> +{
>> +	int rc, i;
>> +	u64 addr, *data;
>> +
>> +	addr = PSU_HOST_SBE_MBOX_REG0;
>> +	data = &msg->reg0;
>> +
>> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
>> +		rc = xscom_write(chip_id, addr, *data);
>> +		if (rc)
>> +			return rc;
>> +
>> +		addr++;
>> +		data++;
>> +	}
>> +
>> +	rc = xscom_write(chip_id, PSU_SBE_DOORBELL_REG_OR,
>> +			 HOST_SBE_MSG_WAITING);
>> +	if (rc == OPAL_SUCCESS)
>> +		msg->state = sbe_msg_sent;
>> +
>> +	return rc;
>> +}
>> +
>> +static int sbe_recv_msg(u32 chip_id, struct sbe_msg *resp)
>> +{
>> +	int i;
>> +	int rc = OPAL_SUCCESS;
>> +	u64 addr, *data;
>> +
>> +	addr = PSU_HOST_SBE_MBOX_REG4;
>> +	data = &resp->reg0;
>> +
>> +	for (i = 0; i < NR_HOST_SBE_MBOX_REG; i++) {
>> +		rc = xscom_read(chip_id, addr, data);
>> +		if (rc)
>> +			return rc;
>> +
>> +		addr++;
>> +		data++;
>> +	}
>> +
>> +	resp->state = sbe_msg_response;
>> +	return rc;
>> +}
>> +
>
>> +/* This is called with sbe->lock */
>> +static void sbe_complete_msg(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>> +	void (*comp)(struct sbe_msg *msg);
>> +
>> +	prlog(PR_INSANE, "Completing msg [chip id = %x], reg0 :
>> 0x%llx\n",
>> +	      sbe->chip_id, msg->reg0);
>> +
>> +	comp = msg->complete;
>> +	msg->state = sbe_msg_done;
>> +	list_del_from(&sbe->msg_list, &msg->link);
>> +
>> +	if (comp) {
>> +		unlock(&sbe->lock);
>> +		(*comp)(msg);
>> +		lock(&sbe->lock);
>> +	} else {
>> +		sbe_freemsg(msg);
>> +	}
>> +}
>
> The extra function pointer variable is kinda weird. Can you just use
> msg->complete directly?

yeah. I can do that.

>
>> +
>> +static void sbe_complete_send(struct sbe *sbe, struct sbe_msg *msg)
>> +{
>> +	u8 cmdclass;
>> +	u64 timeout;
>> +
>> +	prlog(PR_INSANE, "Completing send [chip id = %x], reg0 :
>> 0x%llx\n",
>> +	      sbe->chip_id, msg->reg0);
>> +
>> +	/* Need response */
>> +	if (msg->response) {
>> +		cmdclass = (u8)(msg->reg0 >> 8) & 0xff;
>> +		timeout = sbe_get_cmdclass_timeout(cmdclass);
>> +		msg->timeout = mftb() + msecs_to_tb(timeout);
>> +		msg->state = sbe_msg_wresp;
>> +	} else {
>> +		sbe_complete_msg(sbe, msg);
>> +	}
>> +}
>
> I don't really like how the logic of sending a message is spread across
>
> sbe_complete_send(), sbe_poke_queue() and sbe_send_msg(). It just seems
> to make the flow control convoluted especially when compared to the
> recieve path. Matter of preference I suppose...

So message may or may not expect response from SBE. If its not expecting 
response, then I will send complete message to caller as soon as I send message 
to SBE. That's why I have extra function in sender path. (sbe_complete_send()). 
  I thought having extra function makes it easy to read.

>
>> +
>> +static void sbe_poke_queue(struct sbe *sbe)
>> +{
>> +	int rc;
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +	if (sbe_busy(msg))
>> +		return;
>> +
>> +	if (!sbe_ready(sbe->chip_id))
>> +		return;
>> +
>> +	/* Send message */
>> +	rc = sbe_send_msg(sbe->chip_id, msg);
>> +	if (rc) {
>> +		/* XXX commit error log */
>> +		prlog(PR_ERR, "Failed to send message to SBE "
>> +		      "[chip id = %x]\n", sbe->chip_id);
>> +
>> +		if (msg->resp)
>> +			msg->resp->reg0 |=
>> ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
> Should we make these unsigned long literals so they don't need to be
> casted? You could also bake the shift into the constant itself.

Yes. They are embedding more info in return code. I was waiting for more clarity 
on spec.
Will fix it in next iteration.

>
>> +
>> +		sbe_complete_msg(sbe, msg);
>> +	} else {
>> +		sbe_complete_send(sbe, msg);
>> +	}
>> +
>> +	/* Recursive call to send next message */
>> +	sbe_poke_queue(sbe);
>
> Maybe document that this is limited to a single recursion due to how
> the send/recv logic works. IIRC Stewart would prefer we didn't use
> recursion at all in skiboot, but it is safe here.

Yep. Will add documentation.

>
>> +}
>> +
>> +/*
>> + * WARNING:
>> + *         Only one command is accepted in the command buffer until
>> the response
>> + *         for the command is en-queued in the response buffer by
>> SBE.
>> + *
>> + *         Head of msg_list contains inflight message. Hence we
>> should always
>> + *         add new message to tail of the list.
>> + */
>> +int sbe_queue_msg(u32 chip_id, struct sbe_msg *msg,
>> +		  void (*comp)(struct sbe_msg *msg))
>> +{
>> +	struct proc_chip *chip;
>> +	struct sbe *sbe;
>> +
>> +	if (!msg)
>> +		return OPAL_PARAMETER;
>> +
>> +	if (sbe_default_chip_id == -1)
>> +		return OPAL_INTERNAL_ERROR;
>> +
>> +	/* Default to master chip ID */
>> +	if (chip_id == -1)
>> +		chip = get_chip(sbe_default_chip_id);
>> +	else
>> +		chip = get_chip(chip_id);
>> +
>> +	if (chip == NULL || chip->sbe == NULL)
>> +		return OPAL_PARAMETER;
>> +
>> +	sbe = chip->sbe;
>> +
>> +	/* Set completion */
>> +	msg->complete = comp;
>> +	msg->state = sbe_msg_queued;
>> +
>> +	/* Clear response state */
>> +	if (msg->resp)
>> +		msg->resp->state = sbe_msg_unused;
>> +
>> +	lock(&sbe->lock);
>> +	/* Update sequence number */
>> +	msg->reg0 = msg->reg0 | (sbe->cur_seq << 16);
>> +	sbe->cur_seq++;
>> +
>> +	/* Reset sequence number */
>> +	if (sbe->cur_seq == 0xffff)
>> +		sbe->cur_seq = 1;
>> +
>> +	/* Add message to queue */
>> +	list_add_tail(&sbe->msg_list, &msg->link);
>> +	sbe_poke_queue(sbe);
>> +	unlock(&sbe->lock);
>> +
>> +	return OPAL_SUCCESS;
>> +}
>> +
>> +static void sbe_handle_response(struct sbe *sbe)
>> +{
>> +	u16 send_seq, resp_seq;
>> +	int rc;
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +	rc = sbe_recv_msg(sbe->chip_id, msg->resp);
>> +	if (rc != OPAL_SUCCESS) {
>> +		msg->resp->reg0 |= ((u64)SBE_STATUS_PRI_GENERIC_ERR
>> << 48);
>> +		/* XXX commit error log */
>> +		prlog(PR_ERR, "Failed to read response message "
>> +		      "[chip id = %x]\n", sbe->chip_id);
>> +	}
>> +
>> +	/* Validate sequence number */
>> +	send_seq = (msg->reg0 >> 16) && 0xffff;
>> +	resp_seq = (msg->resp->reg0 >> 16) && 0xffff;
>
> That should probably be a bitwise AND.

Let me try.

>
>> +	if (send_seq != resp_seq) {
>> +		/* XXX commit error log */
>> +		prlog(PR_ERR, "Invalid sequence id [chip id =
>> %x]\n",
>> +		      sbe->chip_id);
>> +		return;
> If sbe_msg_recv() fails above the sequence numbers might be wrong and
> it'll return without calling sbe_complete_msg(). Should it be doing
> that?

You are right. I've to re arrange this code.

>
>> +	}
>> +
>> +	sbe_complete_msg(sbe, msg);
>> +}
>> +
>> +static void sbe_timeout_poll(void *user_data __unused)
>> +{
>> +	u64 now;
>> +	struct sbe *sbe;
>> +	struct sbe_msg *msg;
>> +	struct proc_chip *chip;
>> +
>> +	for_each_chip(chip) {
>> +		if (chip->sbe == NULL)
>> +			continue;
>> +
>> +		sbe = chip->sbe;
>> +		lock(&sbe->lock);
>> +		if (list_empty(&sbe->msg_list)) {
>> +			unlock(&sbe->lock);
>> +			continue;
>> +		}
>> +
>> +		msg = list_top(&sbe->msg_list, struct sbe_msg,
>> link);
>> +		now = mftb();
>> +		if (tb_compare(now, msg->timeout) != TB_AAFTERB) {
>> +			unlock(&sbe->lock);
>> +			continue;
>> +		}
>> +
>> +		/* Update response message */
>> +		if (msg->resp) {
>> +			msg->resp->state = sbe_msg_timeout;
>> +			msg->resp->reg0 |=
>> ((u64)SBE_STATUS_PRI_GENERIC_ERR << 48);
>> +
>> +			prlog(PR_ERR, "Message timeout [chip id =
>> %x], "
>> +			      "Reg0 = %llx\n", sbe->chip_id, msg-
>>> reg0);
>> +			sbe_reg_dump(sbe->chip_id);
>> +		}
>> +
>> +		sbe_complete_msg(sbe, msg);
>
> What's the time taken for a SBE reset? The logic in sbe_interrupt()
> reposts messages while we complete them here. IIRC the P9 SBE boot time
> is fairly long so resets might be the main source of timeouts. Should
> we also be reposting the message here?

SBE accepts one message at a time and we wait until we get response to that 
message before
sending next one. Now timeout may happen for two reasons:
   - SBE didn't respond without specified timeout period (that's what this 
function handles)
   - SBE reset happens (after reset we get interrupt and I'm trying to repost 
last message).

Right now we don't know whether SBE is alive or not. So it may happen that I 
will continue to post
next message while SBE is dead/reset happening. This is something to fix.

>
>> +		sbe_poke_queue(sbe);
>> +		unlock(&sbe->lock);
>> +	}
>> +}
>> +
>> +static void sbe_repost_msg(struct sbe *sbe)
>> +{
>> +	struct sbe_msg *msg;
>> +
>> +	if (list_empty(&sbe->msg_list))
>> +		return;
>> +
>> +	msg = list_top(&sbe->msg_list, struct sbe_msg, link);
>> +	msg->state = sbe_msg_queued;
>> +	sbe_poke_queue(sbe);
>> +}
> This function can be folded into the SBE reset case in sbe_interrupt().
> below. When you take out the empty list handling (also done in
> sbe_interrupt()) it's only two lines of code.

Somehow I've habit of writing too many funcitons. Yes. I can be folded into 
interrupt() function.

-Vasant