[Skiboot] [PATCH 3/5] libflash/mbox-flash: Update to V2 of the protocol

Suraj Jitindar Singh sjitindarsingh at gmail.com
Tue May 2 14:22:55 AEST 2017


On Mon, 2017-04-24 at 19:14 +1000, Cyril Bur wrote:
> Updated version 2 of the protocol can be found at:
> https://github.com/openbmc/mboxbridge/blob/master/Documentation/mbox_
> protocol.md
> 
> This commit changes mbox-flash such that it will preferentially talk
> version 2 to any capable daemon but still remain capable of talking
> to
> v1 daemons.
> 
> Version two changes some of the command definitions for increased
> consistency and usability.
> Version two includes more attention bits - these are now dealt with
> at a
> simple level.

Comments below...

Suraj

> 
> Signed-off-by: Cyril Bur <cyril.bur at au1.ibm.com>
> ---
>  hw/lpc-mbox.c         |  38 ++-
>  include/lpc-mbox.h    |  11 +
>  libflash/errors.h     |   2 +
>  libflash/mbox-flash.c | 671
> ++++++++++++++++++++++++++++++++++++++++----------
>  4 files changed, 580 insertions(+), 142 deletions(-)
> 
> diff --git a/hw/lpc-mbox.c b/hw/lpc-mbox.c
> index 7e509f54..c473414f 100644
> --- a/hw/lpc-mbox.c
> +++ b/hw/lpc-mbox.c
> @@ -51,9 +51,6 @@
>  
>  #define MBOX_MAX_QUEUE_LEN 5
>  
> -#define BMC_RESET 1
> -#define BMC_COMPLETE 2
> -
>  struct mbox {
>  	uint32_t base;
>  	int queue_len;
> @@ -62,6 +59,8 @@ struct mbox {
>  	struct timer poller;
>  	void (*callback)(struct bmc_mbox_msg *msg, void *priv);
>  	void *drv_data;
> +	void (*attn)(uint8_t bits, void *priv);
> +	void *attn_data;
>  	struct lock lock; /* Protect in_flight */
>  	struct bmc_mbox_msg *in_flight;
>  };
> @@ -184,16 +183,15 @@ out_response:
>  	 * something to tell us.
>  	 */
>  	if (bmc_mbox_inb(MBOX_STATUS_1) & MBOX_STATUS_1_ATTN) {
> -		uint8_t action;
> +		uint8_t action, all;
>  
>  		/* W1C on that reg */
>  		bmc_mbox_outb(MBOX_STATUS_1_ATTN, MBOX_STATUS_1);
>  
> -		action = bmc_mbox_inb(MBOX_FLAG_REG);
> +		all = action = bmc_mbox_inb(MBOX_FLAG_REG);
>  		prlog(PR_TRACE, "Got a status register interrupt
> with action 0x%02x\n",
>  				action);
> -
> -		if (action & BMC_RESET) {
> +		if (action & MBOX_ATTN_BMC_REBOOT) {
>  			/*
>  			 * It's unlikely that something needs to be
> done at the
>  			 * driver level. Let libflash deal with it.
> @@ -201,12 +199,23 @@ out_response:
>  			 * event.
>  			 */
>  			prlog(PR_WARNING, "BMC reset detected\n");
> -			action &= ~BMC_RESET;
> +			action &= ~MBOX_ATTN_BMC_REBOOT;
>  		}
>  
> +		if (action & MBOX_ATTN_BMC_WINDOW_RESET)
> +			action &= ~MBOX_ATTN_BMC_WINDOW_RESET;
> +
> +		if (action & MBOX_ATTN_BMC_FLASH_LOST)
> +			action &= ~MBOX_ATTN_BMC_FLASH_LOST;
> +
> +		if (action & MBOX_ATTN_BMC_DAEMON_READY)
> +			action &= ~MBOX_ATTN_BMC_DAEMON_READY;
> +
>  		if (action)
>  			prlog(PR_ERR, "Got a status bit set that
> don't know about: 0x%02x\n",
>  					action);
> +
> +		mbox.attn(all, mbox.attn_data);
>  	}
>  
>  	schedule_timer(&mbox.poller,
> @@ -248,6 +257,19 @@ int bmc_mbox_register_callback(void
> (*callback)(struct bmc_mbox_msg *msg, void *
>  	return 0;
>  }
>  
> +int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void
> *priv),
> +		void *drv_data)
> +{
> +	mbox.attn = callback;
> +	mbox.attn_data = drv_data;
> +	return 0;
> +}
> +
> +uint8_t bmc_mbox_get_attn_reg(void)
> +{
> +	return bmc_mbox_inb(MBOX_FLAG_REG);
> +}
> +
>  void mbox_init(void)
>  {
>  	const struct dt_property *prop;
> diff --git a/include/lpc-mbox.h b/include/lpc-mbox.h
> index 14e38cf8..ca5e50c0 100644
> --- a/include/lpc-mbox.h
> +++ b/include/lpc-mbox.h
> @@ -33,6 +33,8 @@
>  #define MBOX_C_MARK_WRITE_DIRTY 0x07
>  #define MBOX_C_WRITE_FLUSH 0x08
>  #define MBOX_C_BMC_EVENT_ACK 0x09
> +#define MBOX_C_MARK_WRITE_ERASED 0x0a
> +#define MBOX_COMMAND_COUNT 10
>  
>  #define MBOX_R_SUCCESS 0x01
>  #define MBOX_R_PARAM_ERROR 0x02
> @@ -40,6 +42,12 @@
>  #define MBOX_R_SYSTEM_ERROR 0x04
>  #define MBOX_R_TIMEOUT 0x05
>  
> +#define MBOX_ATTN_ACK_MASK 0x3
> +#define MBOX_ATTN_BMC_REBOOT (1 << 0)
> +#define MBOX_ATTN_BMC_WINDOW_RESET (1 << 1)
> +#define MBOX_ATTN_BMC_FLASH_LOST (1 << 6)
> +#define MBOX_ATTN_BMC_DAEMON_READY (1 << 7)
> +
>  /* Default poll interval before interrupts are working */
>  #define MBOX_DEFAULT_POLL_MS	200
>  
> @@ -55,4 +63,7 @@ struct bmc_mbox_msg {
>  int bmc_mbox_enqueue(struct bmc_mbox_msg *msg);
>  int bmc_mbox_register_callback(void (*callback)(struct bmc_mbox_msg
> *msg, void *priv),
>  		void *drv_data);
> +int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void
> *priv),
> +		void *drv_data);
> +uint8_t bmc_mbox_get_attn_reg(void);
>  #endif /* __LPC_MBOX_H */
> diff --git a/libflash/errors.h b/libflash/errors.h
> index 99dcfc28..2f567211 100644
> --- a/libflash/errors.h
> +++ b/libflash/errors.h
> @@ -31,5 +31,7 @@
>  #define FLASH_ERR_CTRL_TIMEOUT		13
>  #define FLASH_ERR_ECC_INVALID		14
>  #define FLASH_ERR_BAD_READ		15
> +#define FLASH_ERR_DEVICE_GONE	16
> +#define FLASH_ERR_AGAIN	17
>  
>  #endif /* __LIBFLASH_ERRORS_H */
> diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
> index 5244c35c..3dfcebe9 100644
> --- a/libflash/mbox-flash.c
> +++ b/libflash/mbox-flash.c
> @@ -23,6 +23,7 @@
>  #include <string.h>
>  
>  #include <skiboot.h>
> +#include <inttypes.h>
>  #include <timebase.h>
>  #include <timer.h>
>  #include <libflash/libflash.h>
> @@ -46,6 +47,7 @@ struct lpc_window {
>  };
>  
>  struct mbox_flash_data {
> +	int version;
>  	uint32_t shift;
>  	struct lpc_window read;
>  	struct lpc_window write;
> @@ -53,12 +55,93 @@ struct mbox_flash_data {
>  	uint32_t total_size;
>  	uint32_t erase_granule;
>  	int rc;
> +	bool reboot;
> +	bool pause;

I prefer suspended to paused...

>  	bool busy;
> +	bool ack;
>  	uint8_t seq;
> +	/* Plus one, commands start at 1 */
> +	void (*handlers[MBOX_COMMAND_COUNT + 1])(struct
> mbox_flash_data *, struct bmc_mbox_msg*);
>  	struct bmc_mbox_msg msg_mem;
>  };
>  
> -static uint64_t mbox_flash_mask(struct mbox_flash_data *mbox_flash)
> +static void mbox_flash_callback(struct bmc_mbox_msg *msg, void
> *priv);
> +static void mbox_flash_attn(uint8_t attn, void *priv);
> +
> +static int protocol_init(struct mbox_flash_data *mbox_flash);
> +
> +static int lpc_window_read(struct mbox_flash_data *mbox_flash,
> uint32_t pos,
> +			   void *buf, uint32_t len)
> +{
> +	uint32_t off = mbox_flash->read.lpc_addr + (pos -
> mbox_flash->read.cur_pos);
> +	int rc;
> +
> +	prlog(PR_TRACE, "Reading at 0x%08x for 0x%08x offset:
> 0x%08x\n",
> +			pos, len, off);
> +
> +	while(len) {
> +		uint32_t chunk;
> +		uint32_t dat;
> +
> +		/* XXX: make this read until it's aligned */
> +		if (len > 3 && !(off & 3)) {
> +			rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
> +			if (!rc)
> +				*(uint32_t *)buf = dat;
> +			chunk = 4;
> +		} else {
> +			rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
> +			if (!rc)
> +				*(uint8_t *)buf = dat;
> +			chunk = 1;
> +		}
> +		if (rc) {
> +			prlog(PR_ERR, "lpc_read failure %d to FW
> 0x%08x\n", rc, off);
> +			return rc;
> +		}
> +		len -= chunk;
> +		off += chunk;
> +		buf += chunk;
> +	}
> +
> +	return 0;
> +}
> +
> +static int lpc_window_write(struct mbox_flash_data *mbox_flash,
> uint32_t pos,
> +			    const void *buf, uint32_t len)
> +{
> +	uint32_t off = mbox_flash->write.lpc_addr + (pos -
> mbox_flash->write.cur_pos);
> +	int rc;
> +
> +
> +	prlog(PR_TRACE, "Writing at 0x%08x for 0x%08x offset:
> 0x%08x\n",
> +			pos, len, off);
> +
> +	while(len) {
> +		uint32_t chunk;
> +
> +		if (len > 3 && !(off & 3)) {
> +			rc = lpc_write(OPAL_LPC_FW, off,
> +				       *(uint32_t *)buf, 4);
> +			chunk = 4;
> +		} else {
> +			rc = lpc_write(OPAL_LPC_FW, off,
> +				       *(uint8_t *)buf, 1);
> +			chunk = 1;
> +		}
> +		if (rc) {
> +			prlog(PR_ERR, "lpc_write failure %d to FW
> 0x%08x\n", rc, off);
> +			return rc;
> +		}
> +		len -= chunk;
> +		off += chunk;
> +		buf += chunk;
> +	}
> +
> +	return 0;
> +}
> +
> +__unused static uint64_t mbox_flash_mask(struct mbox_flash_data
> *mbox_flash)
>  {
>  	return (1 << mbox_flash->shift) - 1;
>  }
> @@ -95,6 +178,11 @@ static void msg_put_u32(struct bmc_mbox_msg *msg,
> int i, uint32_t val)
>  	memcpy(&msg->args[i], &tmp, sizeof(val));
>  }
>  
> +static uint32_t blocks_to_bytes(struct mbox_flash_data *mbox_flash,
> uint16_t blocks)
> +{
> +	return blocks << mbox_flash->shift;
> +}
> +
>  static struct bmc_mbox_msg *msg_alloc(struct mbox_flash_data
> *mbox_flash,
>  		uint8_t command)
>  {
> @@ -115,8 +203,55 @@ static void msg_free_memory(struct bmc_mbox_msg
> *mem __unused)
>  	/* Allocation is so simple this isn't required */
>  }
>  
> +/*
> + * The BMC may send is an out of band message to say that it doesn't
> + * own the flash anymore.
> + * It guarantees we can still access our (open) windows but it does
> + * not guarantee their contents until it clears the bit without
> + * sending us a corresponding bit to say that the windows are bad
> + * first.
> + * Since this is all things that will happen in the future, we
> should
> + * not perform any calls speculatively as its almost impossible to
> + * rewind.
> + */
> +static bool is_paused(struct mbox_flash_data *mbox_flash)
> +{
> +	return mbox_flash->pause;
> +}
> +
> +/*
> + * After a read or a write it is wise to check that the window we
> just
> + * read/write to/from is still valid otherwise it is possible some
> of
> + * the data didn't make it.
> + * This check is an optimisation as we'll close all our windows on
> any
> + * notification from the BMC that the windows are bad. See the above
> + * comment about is_paused().
> + * A foolproof (but much closer) method of validating reads/writes
> + * would be to attempt to close the window, if that fails then we
> can
> + * be sure that the read/write was no good.
> + */
> +static bool is_valid(struct mbox_flash_data *mbox_flash, struct
> lpc_window *win)
> +{
> +	return !is_paused(mbox_flash) && win->open;
> +}
> +
> +/*
> + * Check if we've received a BMC reboot notification.
> + * The strategy is to check on entry to mbox-flash and return a
> + * failure accordingly. Races will be handled by the fact that the
> BMC
> + * won't respond so timeouts will occur. As an added precaution
> + * msg_send() checks right before sending a message (to make the
> race
> + * as small as possible to avoid needless timeouts).
> + */
> +static bool is_reboot(struct mbox_flash_data *mbox_flash)
> +{
> +	return mbox_flash->reboot;
> +}
> +
>  static int msg_send(struct mbox_flash_data *mbox_flash, struct
> bmc_mbox_msg *msg)
>  {
> +	if (is_reboot(mbox_flash))
> +		return FLASH_ERR_AGAIN;
>  	mbox_flash->busy = true;
>  	mbox_flash->rc = 0;
>  	return bmc_mbox_enqueue(msg);
> @@ -154,91 +289,239 @@ static int wait_for_bmc(struct mbox_flash_data
> *mbox_flash, unsigned int timeout
>  	return mbox_flash->rc;
>  }
>  
> -static int lpc_window_read(struct mbox_flash_data *mbox_flash,
> uint32_t pos,
> -			   void *buf, uint32_t len)
> +static int mbox_flash_ack(struct mbox_flash_data *mbox_flash,
> uint8_t reg)
>  {
> -	uint32_t off = mbox_flash->read.lpc_addr + (pos -
> mbox_flash->read.cur_pos);
> +	struct bmc_mbox_msg *msg;
>  	int rc;
>  
> -	prlog(PR_TRACE, "Reading at 0x%08x for 0x%08x offset:
> 0x%08x\n",
> -			pos, len, off);
> +	msg = msg_alloc(mbox_flash, MBOX_C_BMC_EVENT_ACK);
> +	if (!msg)
> +		return FLASH_ERR_MALLOC_FAILED;
>  
> -	while(len) {
> -		uint32_t chunk;
> -		uint32_t dat;
> +	msg_put_u8(msg, 0, reg);
>  
> -		/* Choose access size */
> -		if (len > 3 && !(off & 3)) {
> -			rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
> -			if (!rc)
> -				*(uint32_t *)buf = dat;
> -			chunk = 4;
> -		} else {
> -			rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
> -			if (!rc)
> -				*(uint8_t *)buf = dat;
> -			chunk = 1;
> -		}
> -		if (rc) {
> -			prlog(PR_ERR, "lpc_read failure %d to FW
> 0x%08x\n", rc, off);
> -			return rc;
> -		}
> -		len -= chunk;
> -		off += chunk;
> -		buf += chunk;
> +	/* Clear this first so msg_send() doesn't freak out */
> +	mbox_flash->reboot = false;
> +
> +	rc = msg_send(mbox_flash, msg);
> +
> +	/* Still need to deal with it, we've only acked it now. */
> +	mbox_flash->reboot = true;
> +
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX
> message\n");
> +		goto out;
>  	}
>  
> -	return 0;
> +	/*
> +	 * Use a lower timeout - there is strong evidence to suggest
> the
> +	 * BMC won't respond, don't waste time spinning here just
> have the
> +	 * high levels retry when the BMC might be back
> +	 */
> +	rc = wait_for_bmc(mbox_flash, 3);
> +	if (rc)
> +		prlog(PR_ERR, "Error waiting for BMC\n");
> +
> +out:
> +	msg_free_memory(msg);
> +	return rc;
>  }
>  
> -static int lpc_window_write(struct mbox_flash_data *mbox_flash,
> uint32_t pos,
> -			    const void *buf, uint32_t len)
> +static int do_acks(struct mbox_flash_data *mbox_flash)
>  {
> -	uint32_t off = mbox_flash->write.lpc_addr + (pos -
> mbox_flash->write.cur_pos);
>  	int rc;
>  
> +	if (!mbox_flash->ack)
> +		return 0; /* Nothing to do */
>  
> -	prlog(PR_TRACE, "Writing at 0x%08x for 0x%08x offset:
> 0x%08x\n",
> -			pos, len, off);
> +	rc = mbox_flash_ack(mbox_flash, bmc_mbox_get_attn_reg() &
> MBOX_ATTN_ACK_MASK);
> +	if (!rc)
> +		mbox_flash->ack = false;
>  
> -	while(len) {
> -		uint32_t chunk;
> +	return rc;
> +}
>  
> -		if (len > 3 && !(off & 3)) {
> -			rc = lpc_write(OPAL_LPC_FW, off,
> -				       *(uint32_t *)buf, 4);
> -			chunk = 4;
> -		} else {
> -			rc = lpc_write(OPAL_LPC_FW, off,
> -				       *(uint8_t *)buf, 1);
> -			chunk = 1;
> -		}
> -		if (rc) {
> -			prlog(PR_ERR, "lpc_write failure %d to FW
> 0x%08x\n", rc, off);
> -			return rc;
> -		}
> -		len -= chunk;
> -		off += chunk;
> -		buf += chunk;
> +static void mbox_flash_do_nop(struct mbox_flash_data *mbox_flash
> __unused,
> +		struct bmc_mbox_msg *msg __unused)
> +{
> +}
> +
> +/* Version 1 and Version 2 compatible */
> +static void mbox_flash_do_get_mbox_info(struct mbox_flash_data
> *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +
> +	mbox_flash->version = msg_get_u8(msg, 0);
> +	if (mbox_flash->version == 1) {
> +		/* Not all version 1 daemons set argument 5
> correctly */
> +		mbox_flash->shift = 12; /* Protocol hardcodes to 4K
> anyway */
> +		mbox_flash->read.size = msg_get_u16(msg, 1) <<
> mbox_flash->shift;
> +		mbox_flash->write.size = msg_get_u16(msg, 3) <<
> mbox_flash->shift;
> +	} else { /* V2 compatible */
> +		mbox_flash->shift = msg_get_u8(msg, 5);
>  	}
> +	/* Callers will handle the case where the version is not
> known
> +	 *
> +	 * Here we deliberately ignore the 'default' sizes.
> +	 * All windows opened will not provide a hint and we're
> +	 * happy to let the BMC figure everything out.
> +	 * Future optimisations may use the default size.
> +	 */
> +}
>  
> -	return 0;
> +static void mbox_flash_do_get_flash_info_v2(struct mbox_flash_data
> *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +	mbox_flash->total_size = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 0));
> +	mbox_flash->erase_granule = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 2));
>  }
>  
> -static int mbox_flash_flush(struct mbox_flash_data *mbox_flash,
> uint64_t pos,
> +static void mbox_flash_do_get_flash_info_v1(struct mbox_flash_data
> *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +	mbox_flash->total_size = msg_get_u32(msg, 0);
> +	mbox_flash->erase_granule = msg_get_u32(msg, 4);
> +}
> +
> +static void mbox_flash_do_create_read_window_v2(struct
> mbox_flash_data *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +	mbox_flash->read.lpc_addr = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 0));
> +	mbox_flash->read.size = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 2));
> +	mbox_flash->read.cur_pos = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 4));
> +	mbox_flash->read.open = true;
> +	mbox_flash->write.open = false;
> +}
> +
> +static void mbox_flash_do_create_read_window_v1(struct
> mbox_flash_data *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +	mbox_flash->read.lpc_addr = msg_get_u16(msg, 0) <<
> mbox_flash->shift;

Can this use blocks_to_bytes() for consistency?

> +	mbox_flash->read.open = true;
> +	mbox_flash->write.open = false;
> +}
> +
> +static void mbox_flash_do_create_write_window_v2(struct
> mbox_flash_data *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +	mbox_flash->write.lpc_addr = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 0));
> +	mbox_flash->write.size = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 2));
> +	mbox_flash->write.cur_pos = blocks_to_bytes(mbox_flash,
> msg_get_u16(msg, 4));
> +	mbox_flash->write.open = true;
> +	mbox_flash->read.open = false;
> +}

Why not have a generic mbox_flash_do_create_window_v1 and
mbox_flash_do_create_window_v2 which handles parsing the response args
and is called by mbox_flash_do_create_[read/write]_window_v* which
 handles setting the write.open and read.open.

e.g.

mbox_flash_do_create_window_v2() {
	lpc_addr = blah;
	size = blah;
	cur_pos = blah;
	write.open = false;
	read.open = false;
}

mbox_flash_do_create_write_window_v2() {
	mbox_flash_do_create_window_v2()
	write.open = true;
}

mbox_flash_do_create_read_window_v2() {
	mbox_flash_do_create_win
dow_v2()
	read.open = true;
}

and the same for v1... Would save on code duplication and I think it clarifies that the commands are basically the same.

> +
> +static void mbox_flash_do_create_write_window_v1(struct
> mbox_flash_data *mbox_flash,
> +		struct bmc_mbox_msg *msg)
> +{
> +	mbox_flash->write.lpc_addr = msg_get_u16(msg, 0) <<
> mbox_flash->shift;

Can this use blocks_to_bytes() for consistency?

> +	mbox_flash->write.open = true;
> +	mbox_flash->read.open = false;
> +}
> +
> +/* Version 1 and Version 2 compatible */
> +static void mbox_flash_do_close_window(struct mbox_flash_data
> *mbox_flash,
> +		struct bmc_mbox_msg *msg __unused)
> +{
> +	mbox_flash->read.open = false;
> +	mbox_flash->write.open = false;
> +}
> +
> +static int handle_reboot(struct mbox_flash_data *mbox_flash)
> +{
> +	int rc;
> +
> +	/*
> +	 * If the BMC ready bit isn't present then we're basically
> +	 * guaranteed to timeout trying to talk to it so just fail
> +	 * whatever is trying to happen.
> +	 * Importantly, we can't trust that the presence of the bit
> means
> +	 * the daemon is ok - don't assume it is going to respond at
> all
> +	 * from here onwards
> +	 */
> +	if (!(bmc_mbox_get_attn_reg() & MBOX_ATTN_BMC_DAEMON_READY))
> +		return FLASH_ERR_AGAIN;
> +
> +	/* Clear this first so msg_send() doesn't freak out */
> +	mbox_flash->reboot = false;
> +
> +	rc = do_acks(mbox_flash);
> +	if (rc) {
> +		if (rc == MBOX_R_TIMEOUT)
> +			rc = FLASH_ERR_AGAIN;
> +		mbox_flash->reboot = true;
> +		return rc;
> +	}
> +
> +	rc = protocol_init(mbox_flash);
> +	if (rc)
> +		mbox_flash->reboot = true;
> +
> +	return rc;
> +}
> +
> +static bool do_delayed_work(struct mbox_flash_data *mbox_flash)
> +{
> +	return is_paused(mbox_flash) || do_acks(mbox_flash) ||
> +		(is_reboot(mbox_flash) &&
> handle_reboot(mbox_flash));
> +}
> +

Why not make this mbox_flash_mark() which takes a type [dirty/erased]
then you can call this in both the erase and dirty case since they are
essentially idential except for the command. That would save on code
duplication and mean when you fix a bug it gets fixed for both (see my
comment on your bug in the erase code path in the next patch...).

> +static int mbox_flash_dirty(struct mbox_flash_data *mbox_flash,
> uint64_t pos,
>  		uint64_t len)
>  {
>  	struct bmc_mbox_msg *msg;
>  	int rc;
>  
> -	if (!mbox_flash->write.open)
> -		prlog(PR_WARNING, "Attempting to flush without an
> open write window\n");
> +	if (!mbox_flash->write.open) {
> +		prlog(PR_ERR, "Attempting to dirty without an open
> write window\n");
> +		return FLASH_ERR_DEVICE_GONE;
> +	}
> +
> +	msg = msg_alloc(mbox_flash, MBOX_C_MARK_WRITE_DIRTY);
> +	if (!msg)
> +		return FLASH_ERR_MALLOC_FAILED;
> +
> +	if (mbox_flash->version == 1) {
> +		msg_put_u16(msg, 0, pos >> mbox_flash->shift);
> +		msg_put_u32(msg, 2, len);
> +	} else {
> +		uint64_t window_pos = (pos - mbox_flash-
> >write.cur_pos);
> +		uint64_t window_len = len + (window_pos &
> mbox_flash_mask(mbox_flash));
> +
> +		msg_put_u16(msg, 0, window_pos >> mbox_flash-
> >shift);
> +		msg_put_u16(msg, 2, ALIGN_UP(window_len, 1 <<
> mbox_flash->shift) >> mbox_flash->shift);
> +	}
> +
> +	rc = msg_send(mbox_flash, msg);
> +	if (rc) {
> +		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX
> message\n");
> +		goto out;
> +	}
> +
> +	rc = wait_for_bmc(mbox_flash, MBOX_DEFAULT_TIMEOUT);
> +	if (rc) {
> +		prlog(PR_ERR, "Error waiting for BMC\n");
> +		goto out;
> +	}
> +
> +out:
> +	msg_free_memory(msg);
> +	return rc;
> +}
> +
> +static int mbox_flash_flush(struct mbox_flash_data *mbox_flash)
> +{
> +	struct bmc_mbox_msg *msg;
> +	int rc;
> +
> +	if (!mbox_flash->write.open) {
> +		prlog(PR_ERR, "Attempting to flush without an open
> write window\n");
> +		return FLASH_ERR_DEVICE_GONE;
> +	}
>  
>  	msg = msg_alloc(mbox_flash, MBOX_C_WRITE_FLUSH);
>  	if (!msg)
>  		return FLASH_ERR_MALLOC_FAILED;
> -	msg_put_u16(msg, 0, pos >> mbox_flash->shift);
> -	msg_put_u32(msg, 2, len);
>  
>  	rc = msg_send(mbox_flash, msg);
>  	if (rc) {
> @@ -283,11 +566,17 @@ static int mbox_window_move(struct
> mbox_flash_data *mbox_flash,
>  
>  	prlog(PR_DEBUG, "Adjusting the window\n");
>  
> +	/* V1 needs to remember where it has opened the window, note
> it
> +	 * here.
> +	 * If we're running V2 the response to the CREATE_*_WINDOW
> command
> +	 * will overwrite what we've noted here.
> +	 */
> +	win->cur_pos = pos & ~mbox_flash_mask(mbox_flash);
> +
>  	msg = msg_alloc(mbox_flash, command);
>  	if (!msg)
>  		return FLASH_ERR_MALLOC_FAILED;
>  
> -	win->cur_pos = pos & ~(mbox_flash_mask(mbox_flash));
>  	msg_put_u16(msg, 0, pos >> mbox_flash->shift);
>  	rc = msg_send(mbox_flash, msg);
>  	if (rc) {
> @@ -307,6 +596,20 @@ static int mbox_window_move(struct
> mbox_flash_data *mbox_flash,
>  		/* Adjust size to meet current window */
>  		*size =  (win->cur_pos + win->size) - pos;
>  
> +	/*
> +	 * It doesn't make sense for size to be zero if len isn't
> zero.
> +	 * If this condition happens we're most likely going to spin
> since
> +	 * the caller will likely decerement pos by zero then call
> this
> +	 * again.
> +	 * Debateable as to if this should return non zero. At least
> the
> +	 * bug will be obvious from the barf.
> +	 */
> +	if (len != 0 && *size == 0) {
> +		prlog(PR_ERR, "Move window is indicating size
> zero!\n");
> +		prlog(PR_ERR, "pos: 0x%" PRIx64 ", len: 0x%" PRIx64
> "\n", pos, len);
> +		prlog(PR_ERR, "win pos: 0x%08x win size: 0x%08x\n",
> win->cur_pos, win->size);
> +	}
> +
>  out:
>  	msg_free_memory(msg);
>  	return rc;
> @@ -326,7 +629,10 @@ static int mbox_flash_write(struct
> blocklevel_device *bl, uint64_t pos,
>  
>  	mbox_flash = container_of(bl, struct mbox_flash_data, bl);
>  
> -	prlog(PR_TRACE, "Flash write at 0x%08llx for 0x%08llx\n",
> pos, len);
> +	if (do_delayed_work(mbox_flash))
> +		return FLASH_ERR_AGAIN;
> +
> +	prlog(PR_TRACE, "Flash write at %#" PRIx64 " for %#" PRIx64
> "\n", pos, len);
>  	while (len > 0) {
>  		/* Move window and get a new size to read */
>  		rc = mbox_window_move(mbox_flash, &mbox_flash-
> >write,
> @@ -340,12 +646,18 @@ static int mbox_flash_write(struct
> blocklevel_device *bl, uint64_t pos,
>  		if (rc)
>  			return rc;
>  
> +		rc = mbox_flash_dirty(mbox_flash, pos, size);
> +		if (rc)
> +			return rc;
> +
>  		/*
>  		 * Must flush here as changing the window contents
>  		 * without flushing entitles the BMC to throw away
> the
> -		 * data
> +		 * data. Unlike the read case there isn't a need to
> explicitly
> +		 * validate the window, the flush command will fail
> if the
> +		 * window was compromised.
>  		 */
> -		rc = mbox_flash_flush(mbox_flash, pos, size);
> +		rc = mbox_flash_flush(mbox_flash);

Why are we flushing with every write? Don't we just need to flush at
the end of the while(len > 0) loop?

We will call move window everytime which will either use the existing
window -> no need to flush until the end, or will close the current
window and open a new one -> will have an implicit flush which will set
rc if it fails meaning we'll exit any way. Move window will also return
an error if we received an event during the fact.

>  		if (rc)
>  			return rc;
>  
> @@ -370,7 +682,10 @@ static int mbox_flash_read(struct
> blocklevel_device *bl, uint64_t pos,
>  
>  	mbox_flash = container_of(bl, struct mbox_flash_data, bl);
>  
> -	prlog(PR_TRACE, "Flash read at 0x%08llx for 0x%08llx\n",
> pos, len);
> +	if (do_delayed_work(mbox_flash))
> +		return FLASH_ERR_AGAIN;
> +
> +	prlog(PR_TRACE, "Flash read at %#" PRIx64 " for %#" PRIx64
> "\n", pos, len);
>  	while (len > 0) {
>  		/* Move window and get a new size to read */
>  		rc = mbox_window_move(mbox_flash, &mbox_flash->read,
> @@ -387,6 +702,12 @@ static int mbox_flash_read(struct
> blocklevel_device *bl, uint64_t pos,
>  		len -= size;
>  		pos += size;
>  		buf += size;
> +		/*
> +		 * Ensure my window is still open, if it isn't we
> can't trust
> +		 * what we read
> +		 */
> +		if (!is_valid(mbox_flash, &mbox_flash->read))
> +			return FLASH_ERR_AGAIN;
>  	}
>  	return rc;
>  }
> @@ -399,6 +720,10 @@ static int mbox_flash_get_info(struct
> blocklevel_device *bl, const char **name,
>  	int rc;
>  
>  	mbox_flash = container_of(bl, struct mbox_flash_data, bl);
> +
> +	if (do_delayed_work(mbox_flash))
> +		return FLASH_ERR_AGAIN;
> +
>  	msg = msg_alloc(mbox_flash, MBOX_C_GET_FLASH_INFO);
>  	if (!msg)
>  		return FLASH_ERR_MALLOC_FAILED;
> @@ -437,27 +762,59 @@ out:
>  }
>  
>  static int mbox_flash_erase(struct blocklevel_device *bl __unused,
> -			    uint64_t pos __unused, uint64_t len
> __unused)
> +		uint64_t pos __unused, uint64_t len __unused)
>  {
> -	/*
> -	 * We can probably get away with doing nothing.
> -	 * TODO: Rethink this, causes interesting behaviour in
> pflash.
> -	 * Users do expect pflash -{e,E} to do something. This is
> because
> -	 * on real flash this would have set that region to all 0xFF
> but
> -	 * really the erase at the blocklevel interface was only
> designed
> -	 * to be "please make this region writeable".
> -	 * It may be wise (despite the large performance penalty) to
> -	 * actually write all 0xFF here. I'll leave that as an
> extersise
> -	 * for the future.
> -	 */
> +       /*
> +        * We can probably get away with doing nothing.
> +        * TODO: Rethink this, causes interesting behaviour in
> pflash.
> +        * Users do expect pflash -{e,E} to do something. This is
> because
> +        * on real flash this would have set that region to all 0xFF
> but
> +        * really the erase at the blocklevel interface was only
> designed
> +        * to be "please make this region writeable".
> +        * It may be wise (despite the large performance penalty) to
> +        * actually write all 0xFF here. I'll leave that as an
> extersise
extersise?!
> +        * for the future.
> +        */

Did you increase the indent here by one?

> +
>  	return 0;
>  }
>  
> +/* Called from interrupt handler, don't send any mbox messages */
> +static void mbox_flash_attn(uint8_t attn, void *priv)
> +{
> +	struct mbox_flash_data *mbox_flash = priv;
> +
> +	if (attn & MBOX_ATTN_ACK_MASK)
> +		mbox_flash->ack = true;
> +	if (attn & MBOX_ATTN_BMC_REBOOT) {
> +		mbox_flash->reboot = true;
> +		mbox_flash->read.open = false;
> +		mbox_flash->write.open = false;
> +		attn &= ~MBOX_ATTN_BMC_REBOOT;
> +	}
> +
> +	if (attn & MBOX_ATTN_BMC_WINDOW_RESET) {
> +		mbox_flash->read.open = false;
> +		mbox_flash->write.open = false;
> +		attn &= ~MBOX_ATTN_BMC_WINDOW_RESET;
> +	}
> +
> +	if (attn & MBOX_ATTN_BMC_FLASH_LOST) {
> +		mbox_flash->pause = true;
> +		attn &= ~MBOX_ATTN_BMC_FLASH_LOST;
> +	} else {
> +		mbox_flash->pause = false;
> +	}
> +
> +	if (attn & MBOX_ATTN_BMC_DAEMON_READY)
> +		attn &= ~MBOX_ATTN_BMC_DAEMON_READY;
> +}
> +
>  static void mbox_flash_callback(struct bmc_mbox_msg *msg, void
> *priv)
>  {
>  	struct mbox_flash_data *mbox_flash = priv;
>  
> -	prlog(PR_TRACE, "%s: BMC OK\n", __func__);
> +	prlog(PR_TRACE, "BMC OK command %u\n", msg->command);
>  
>  	if (msg->response != MBOX_R_SUCCESS) {
>  		prlog(PR_ERR, "Bad response code from BMC %d\n",
> msg->response);
> @@ -473,102 +830,148 @@ static void mbox_flash_callback(struct
> bmc_mbox_msg *msg, void *priv)
>  		goto out;
>  	}
>  
> -	mbox_flash->rc = 0;
> +	if (msg->command > MBOX_COMMAND_COUNT) {
> +		prlog(PR_ERR, "Got response to unknown command
> %02x\n", msg->command);
> +		mbox_flash->rc = -1;
> +		goto out;
> +	}
>  
> -	switch (msg->command) {
> -		case MBOX_C_RESET_STATE:
> -			break;
> -		case MBOX_C_GET_MBOX_INFO:
> -			mbox_flash->read.size = msg_get_u16(msg, 1)
> << mbox_flash->shift;
> -			mbox_flash->write.size = msg_get_u16(msg, 3)
> << mbox_flash->shift;
> -			break;
> -		case MBOX_C_GET_FLASH_INFO:
> -			mbox_flash->total_size = msg_get_u32(msg,
> 0);
> -			mbox_flash->erase_granule = msg_get_u32(msg,
> 4);
> -			break;
> -		case MBOX_C_CREATE_READ_WINDOW:
> -			mbox_flash->read.lpc_addr = msg_get_u16(msg,
> 0) << mbox_flash->shift;
> -			mbox_flash->read.open = true;
> -			mbox_flash->write.open = false;
> -			break;
> -		case MBOX_C_CLOSE_WINDOW:
> -			break;
> -		case MBOX_C_CREATE_WRITE_WINDOW:
> -			mbox_flash->write.lpc_addr =
> msg_get_u16(msg, 0) << mbox_flash->shift;
> -			mbox_flash->write.open = true;
> -			mbox_flash->read.open = false;
> -			break;
> -		case MBOX_C_MARK_WRITE_DIRTY:
> -			break;
> -		case MBOX_C_WRITE_FLUSH:
> -			break;
> -		case MBOX_C_BMC_EVENT_ACK:
> -			break;
> -		default:
> -			prlog(PR_ERR, "Got response to unknown
> command %02x\n", msg->command);
> -			mbox_flash->rc = -1;
> +	if (!mbox_flash->handlers[msg->command]) {
> +		prlog(PR_ERR, "Couldn't find handler for message!
> command: %u, seq: %u\n",
> +				msg->command, msg->seq);
> +		mbox_flash->rc = MBOX_R_SYSTEM_ERROR;
> +		goto out;
>  	}
>  
> +	mbox_flash->rc = 0;
> +
> +	mbox_flash->handlers[msg->command](mbox_flash, msg);
> +
>  out:
>  	mbox_flash->busy = false;
>  }
>  
> -int mbox_flash_init(struct blocklevel_device **bl)
> +static int protocol_init(struct mbox_flash_data *mbox_flash)
>  {
> -	struct mbox_flash_data *mbox_flash;
>  	struct bmc_mbox_msg *msg;
>  	int rc;
>  
> -	if (!bl)
> -		return FLASH_ERR_PARM_ERROR;
> +	/* Assume V2 */
> +	mbox_flash->bl.read = &mbox_flash_read;
> +	mbox_flash->bl.write = &mbox_flash_write;
> +	mbox_flash->bl.erase = &mbox_flash_erase;
> +	mbox_flash->bl.get_info = &mbox_flash_get_info;
>  
> -	*bl = NULL;
> +	/* Assume V2 */
> +	mbox_flash->handlers[0] = NULL;
> +	mbox_flash->handlers[MBOX_C_RESET_STATE] =
> &mbox_flash_do_nop;
> +	mbox_flash->handlers[MBOX_C_GET_MBOX_INFO] =
> &mbox_flash_do_get_mbox_info;
> +	mbox_flash->handlers[MBOX_C_GET_FLASH_INFO] =
> &mbox_flash_do_get_flash_info_v2;
> +	mbox_flash->handlers[MBOX_C_CREATE_READ_WINDOW] =
> &mbox_flash_do_create_read_window_v2;
> +	mbox_flash->handlers[MBOX_C_CLOSE_WINDOW] =
> &mbox_flash_do_close_window;
> +	mbox_flash->handlers[MBOX_C_CREATE_WRITE_WINDOW] =
> &mbox_flash_do_create_write_window_v2;
> +	mbox_flash->handlers[MBOX_C_MARK_WRITE_DIRTY] =
> &mbox_flash_do_nop;
> +	mbox_flash->handlers[MBOX_C_WRITE_FLUSH] =
> &mbox_flash_do_nop;
> +	mbox_flash->handlers[MBOX_C_BMC_EVENT_ACK] =
> &mbox_flash_do_nop;
> +	mbox_flash->handlers[MBOX_C_MARK_WRITE_ERASED] =
> &mbox_flash_do_nop;
>  
> -	mbox_flash = zalloc(sizeof(struct mbox_flash_data));
> -	if (!mbox_flash)
> -		return FLASH_ERR_MALLOC_FAILED;
>  
> -	/* For V1 of the protocol this is fixed. This could change
> */
> +	bmc_mbox_register_callback(&mbox_flash_callback,
> mbox_flash);
> +	bmc_mbox_register_attn(&mbox_flash_attn, mbox_flash);
> +
> +	/*
> +	 * For V1 of the protocol this is fixed.
> +	 * V2: The init code will update this
> +	 */
>  	mbox_flash->shift = 12;
>  
> -	bmc_mbox_register_callback(&mbox_flash_callback,
> mbox_flash);
> +	/*
> +	 * Always attempt init with V2.
> +	 * The GET_MBOX_INFO response will confirm that the other
> side can
> +	 * talk V2, we'll update this variable then if V2 is not
> supported
> +	 */
> +	mbox_flash->version = 2;
>  
>  	msg = msg_alloc(mbox_flash, MBOX_C_GET_MBOX_INFO);
> -	if (!msg) {
> -		rc = FLASH_ERR_MALLOC_FAILED;
> -		goto out;
> -	}
> -
> -	msg_put_u8(msg, 0, 1); /* V1, do better */
> +	if (!msg)
> +		return FLASH_ERR_MALLOC_FAILED;
>  
> +	msg_put_u8(msg, 0, mbox_flash->version);
>  	rc = msg_send(mbox_flash, msg);
>  	if (rc) {
>  		prlog(PR_ERR, "Failed to enqueue/send BMC MBOX
> message\n");
> -		goto out_msg;
> +		goto out;
>  	}
>  
>  	rc = wait_for_bmc(mbox_flash, MBOX_DEFAULT_TIMEOUT);
>  	if (rc) {
>  		prlog(PR_ERR, "Error waiting for BMC\n");
> -		goto out_msg;
> +		goto out;
>  	}
>  
>  	msg_free_memory(msg);
>  
> -	mbox_flash->bl.keep_alive = 0;
> +	prlog(PR_INFO, "Detected mbox protocol version %d\n",
> mbox_flash->version);
> +	if (mbox_flash->version == 1) {
> +		/* Not all handlers differ, update those which do */
> +		mbox_flash->handlers[MBOX_C_GET_MBOX_INFO] =
> &mbox_flash_do_get_mbox_info;
> +		mbox_flash->handlers[MBOX_C_GET_FLASH_INFO] =
> &mbox_flash_do_get_flash_info_v1;
> +		mbox_flash->handlers[MBOX_C_CREATE_READ_WINDOW] =
> +			&mbox_flash_do_create_read_window_v1;
> +		mbox_flash->handlers[MBOX_C_CREATE_WRITE_WINDOW] =
> +			&mbox_flash_do_create_write_window_v1;
> +		mbox_flash->handlers[MBOX_C_MARK_WRITE_ERASED] =
> NULL; /* Not in V1 */

I don't really like it how you're patching out specific functions, it
doesn't really lead to extensibilty... What about an array of handler
arrays which is defined above as a constant and indexed by version
number. That way you just update the array pointer making it easier to
extend in future.

> +	} else if (mbox_flash->version > 2) {
> +		/*
> +		 * Uh, we requested version 2... The BMC is can only
> lower the
> +		 * requested version not do anything else. FWIW
> there is no
> +		 * verion 0
> +		 */
> +		prlog(PR_CRIT, "Bad version: %u\n", mbox_flash-
> >version);
> +		rc = FLASH_ERR_PARM_ERROR;
> +		goto out;
> +	}
> +
> +
> +	return 0;
> +out:
> +	msg_free_memory(msg);
> +	return rc;
> +}
> +
> +int mbox_flash_init(struct blocklevel_device **bl)
> +{
> +	struct mbox_flash_data *mbox_flash;
> +	int rc;
> +
> +	if (!bl)
> +		return FLASH_ERR_PARM_ERROR;
> +
> +	*bl = NULL;
> +
> +	mbox_flash = zalloc(sizeof(struct mbox_flash_data));
> +	if (!mbox_flash)
> +		return FLASH_ERR_MALLOC_FAILED;
> +
> +	/* Assume V2 */
>  	mbox_flash->bl.read = &mbox_flash_read;
>  	mbox_flash->bl.write = &mbox_flash_write;
>  	mbox_flash->bl.erase = &mbox_flash_erase;
>  	mbox_flash->bl.get_info = &mbox_flash_get_info;
>  
> +	if (bmc_mbox_get_attn_reg() & MBOX_ATTN_BMC_REBOOT)
> +		rc = handle_reboot(mbox_flash);
> +	else
> +		rc = protocol_init(mbox_flash);

Handle reboot eventually calls protocol_init... To make it clearer that
it's called irrespective can we change this to something like:

if (reboot) {
	handle_reboot();

// This always happens
protocol_init();

> +	if (rc) {
> +		free(mbox_flash);
> +		return rc;
> +	}
> +
> +	mbox_flash->bl.keep_alive = 0;
> +
>  	*bl = &(mbox_flash->bl);
>  	return 0;
> -
> -out_msg:
> -	msg_free_memory(msg);
> -out:
> -	free(mbox_flash);
> -	return rc;
>  }
>  
>  void mbox_flash_exit(struct blocklevel_device *bl)


More information about the Skiboot mailing list