[Skiboot] [PATCH 3/5] libflash/mbox-flash: Update to V2 of the protocol
Cyril Bur
cyril.bur at au1.ibm.com
Fri May 12 23:40:52 AEST 2017
On Tue, 2017-05-02 at 14:22 +1000, Suraj Jitindar Singh wrote:
> On Mon, 2017-04-24 at 19:14 +1000, Cyril Bur wrote:
> > Updated version 2 of the protocol can be found at:
> > https://github.com/openbmc/mboxbridge/blob/master/Documentation/mbox_
> > protocol.md
> >
> > This commit changes mbox-flash such that it will preferentially talk
> > version 2 to any capable daemon but still remain capable of talking
> > to
> > v1 daemons.
> >
> > Version two changes some of the command definitions for increased
> > consistency and usability.
> > Version two includes more attention bits - these are now dealt with
> > at a
> > simple level.
>
> Comments below...
>
> Suraj
>
> >
> > Signed-off-by: Cyril Bur <cyril.bur at au1.ibm.com>
> > ---
> > hw/lpc-mbox.c | 38 ++-
> > include/lpc-mbox.h | 11 +
> > libflash/errors.h | 2 +
> > libflash/mbox-flash.c | 671
> > ++++++++++++++++++++++++++++++++++++++++----------
> > 4 files changed, 580 insertions(+), 142 deletions(-)
> >
> > diff --git a/hw/lpc-mbox.c b/hw/lpc-mbox.c
> > index 7e509f54..c473414f 100644
> > --- a/hw/lpc-mbox.c
> > +++ b/hw/lpc-mbox.c
> > @@ -51,9 +51,6 @@
> >
> > #define MBOX_MAX_QUEUE_LEN 5
> >
> > -#define BMC_RESET 1
> > -#define BMC_COMPLETE 2
> > -
> > struct mbox {
> > uint32_t base;
> > int queue_len;
> > @@ -62,6 +59,8 @@ struct mbox {
> > struct timer poller;
> > void (*callback)(struct bmc_mbox_msg *msg, void *priv);
> > void *drv_data;
> > + void (*attn)(uint8_t bits, void *priv);
> > + void *attn_data;
> > struct lock lock; /* Protect in_flight */
> > struct bmc_mbox_msg *in_flight;
> > };
> > @@ -184,16 +183,15 @@ out_response:
> > * something to tell us.
> > */
> > if (bmc_mbox_inb(MBOX_STATUS_1) & MBOX_STATUS_1_ATTN) {
> > - uint8_t action;
> > + uint8_t action, all;
> >
> > /* W1C on that reg */
> > bmc_mbox_outb(MBOX_STATUS_1_ATTN, MBOX_STATUS_1);
> >
> > - action = bmc_mbox_inb(MBOX_FLAG_REG);
> > + all = action = bmc_mbox_inb(MBOX_FLAG_REG);
> > prlog(PR_TRACE, "Got a status register interrupt
> > with action 0x%02x\n",
> > action);
> > -
> > - if (action & BMC_RESET) {
> > + if (action & MBOX_ATTN_BMC_REBOOT) {
> > /*
> > * It's unlikely that something needs to be
> > done at the
> > * driver level. Let libflash deal with it.
> > @@ -201,12 +199,23 @@ out_response:
> > * event.
> > */
> > prlog(PR_WARNING, "BMC reset detected\n");
> > - action &= ~BMC_RESET;
> > + action &= ~MBOX_ATTN_BMC_REBOOT;
> > }
> >
> > + if (action & MBOX_ATTN_BMC_WINDOW_RESET)
> > + action &= ~MBOX_ATTN_BMC_WINDOW_RESET;
> > +
> > + if (action & MBOX_ATTN_BMC_FLASH_LOST)
> > + action &= ~MBOX_ATTN_BMC_FLASH_LOST;
> > +
> > + if (action & MBOX_ATTN_BMC_DAEMON_READY)
> > + action &= ~MBOX_ATTN_BMC_DAEMON_READY;
> > +
> > if (action)
> > prlog(PR_ERR, "Got a status bit set that
> > don't know about: 0x%02x\n",
> > action);
> > +
> > + mbox.attn(all, mbox.attn_data);
> > }
> >
> > schedule_timer(&mbox.poller,
> > @@ -248,6 +257,19 @@ int bmc_mbox_register_callback(void
> > (*callback)(struct bmc_mbox_msg *msg, void *
> > return 0;
> > }
> >
> > +int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void
> > *priv),
> > + void *drv_data)
> > +{
> > + mbox.attn = callback;
> > + mbox.attn_data = drv_data;
> > + return 0;
> > +}
> > +
> > +uint8_t bmc_mbox_get_attn_reg(void)
> > +{
> > + return bmc_mbox_inb(MBOX_FLAG_REG);
> > +}
> > +
> > void mbox_init(void)
> > {
> > const struct dt_property *prop;
> > diff --git a/include/lpc-mbox.h b/include/lpc-mbox.h
> > index 14e38cf8..ca5e50c0 100644
> > --- a/include/lpc-mbox.h
> > +++ b/include/lpc-mbox.h
> > @@ -33,6 +33,8 @@
> > #define MBOX_C_MARK_WRITE_DIRTY 0x07
> > #define MBOX_C_WRITE_FLUSH 0x08
> > #define MBOX_C_BMC_EVENT_ACK 0x09
> > +#define MBOX_C_MARK_WRITE_ERASED 0x0a
> > +#define MBOX_COMMAND_COUNT 10
> >
> > #define MBOX_R_SUCCESS 0x01
> > #define MBOX_R_PARAM_ERROR 0x02
> > @@ -40,6 +42,12 @@
> > #define MBOX_R_SYSTEM_ERROR 0x04
> > #define MBOX_R_TIMEOUT 0x05
> >
> > +#define MBOX_ATTN_ACK_MASK 0x3
> > +#define MBOX_ATTN_BMC_REBOOT (1 << 0)
> > +#define MBOX_ATTN_BMC_WINDOW_RESET (1 << 1)
> > +#define MBOX_ATTN_BMC_FLASH_LOST (1 << 6)
> > +#define MBOX_ATTN_BMC_DAEMON_READY (1 << 7)
> > +
> > /* Default poll interval before interrupts are working */
> > #define MBOX_DEFAULT_POLL_MS 200
> >
> > @@ -55,4 +63,7 @@ struct bmc_mbox_msg {
> > int bmc_mbox_enqueue(struct bmc_mbox_msg *msg);
> > int bmc_mbox_register_callback(void (*callback)(struct bmc_mbox_msg
> > *msg, void *priv),
> > void *drv_data);
> > +int bmc_mbox_register_attn(void (*callback)(uint8_t bits, void
> > *priv),
> > + void *drv_data);
> > +uint8_t bmc_mbox_get_attn_reg(void);
> > #endif /* __LPC_MBOX_H */
> > diff --git a/libflash/errors.h b/libflash/errors.h
> > index 99dcfc28..2f567211 100644
> > --- a/libflash/errors.h
> > +++ b/libflash/errors.h
> > @@ -31,5 +31,7 @@
> > #define FLASH_ERR_CTRL_TIMEOUT 13
> > #define FLASH_ERR_ECC_INVALID 14
> > #define FLASH_ERR_BAD_READ 15
> > +#define FLASH_ERR_DEVICE_GONE 16
> > +#define FLASH_ERR_AGAIN 17
> >
> > #endif /* __LIBFLASH_ERRORS_H */
> > diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
> > index 5244c35c..3dfcebe9 100644
> > --- a/libflash/mbox-flash.c
> > +++ b/libflash/mbox-flash.c
> > @@ -23,6 +23,7 @@
> > #include <string.h>
> >
> > #include <skiboot.h>
> > +#include <inttypes.h>
> > #include <timebase.h>
> > #include <timer.h>
> > #include <libflash/libflash.h>
> > @@ -46,6 +47,7 @@ struct lpc_window {
> > };
> >
> > struct mbox_flash_data {
> > + int version;
> > uint32_t shift;
> > struct lpc_window read;
> > struct lpc_window write;
> > @@ -53,12 +55,93 @@ struct mbox_flash_data {
> > uint32_t total_size;
> > uint32_t erase_granule;
> > int rc;
> > + bool reboot;
> > + bool pause;
>
> I prefer suspended to paused...
>
> > bool busy;
> > + bool ack;
> > uint8_t seq;
> > + /* Plus one, commands start at 1 */
> > + void (*handlers[MBOX_COMMAND_COUNT + 1])(struct
> > mbox_flash_data *, struct bmc_mbox_msg*);
> > struct bmc_mbox_msg msg_mem;
> > };
> >
> > -static uint64_t mbox_flash_mask(struct mbox_flash_data *mbox_flash)
> > +static void mbox_flash_callback(struct bmc_mbox_msg *msg, void
> > *priv);
> > +static void mbox_flash_attn(uint8_t attn, void *priv);
> > +
> > +static int protocol_init(struct mbox_flash_data *mbox_flash);
> > +
> > +static int lpc_window_read(struct mbox_flash_data *mbox_flash,
> > uint32_t pos,
> > + void *buf, uint32_t len)
> > +{
> > + uint32_t off = mbox_flash->read.lpc_addr + (pos -
> > mbox_flash->read.cur_pos);
> > + int rc;
> > +
> > + prlog(PR_TRACE, "Reading at 0x%08x for 0x%08x offset:
> > 0x%08x\n",
> > + pos, len, off);
> > +
> > + while(len) {
> > + uint32_t chunk;
> > + uint32_t dat;
> > +
> > + /* XXX: make this read until it's aligned */
> > + if (len > 3 && !(off & 3)) {
> > + rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
> > + if (!rc)
> > + *(uint32_t *)buf = dat;
> > + chunk = 4;
> > + } else {
> > + rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
> > + if (!rc)
> > + *(uint8_t *)buf = dat;
> > + chunk = 1;
> > + }
> > + if (rc) {
> > + prlog(PR_ERR, "lpc_read failure %d to FW
> > 0x%08x\n", rc, off);
> > + return rc;
> > + }
> > + len -= chunk;
> > + off += chunk;
> > + buf += chunk;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int lpc_window_write(struct mbox_flash_data *mbox_flash,
> > uint32_t pos,
> > + const void *buf, uint32_t len)
> > +{
> > + uint32_t off = mbox_flash->write.lpc_addr + (pos -
> > mbox_flash->write.cur_pos);
> > + int rc;
> > +
> > +
> > + prlog(PR_TRACE, "Writing at 0x%08x for 0x%08x offset:
> > 0x%08x\n",
> > + pos, len, off);
> > +
> > + while(len) {
> > + uint32_t chunk;
> > +
> > + if (len > 3 && !(off & 3)) {
> > + rc = lpc_write(OPAL_LPC_FW, off,
> > + *(uint32_t *)buf, 4);
> > + chunk = 4;
> > + } else {
> > + rc = lpc_write(OPAL_LPC_FW, off,
> > + *(uint8_t *)buf, 1);
> > + chunk = 1;
> > + }
> > + if (rc) {
> > + prlog(PR_ERR, "lpc_write failure %d to FW
> > 0x%08x\n", rc, off);
> > + return rc;
> > + }
> > + len -= chunk;
> > + off += chunk;
> > + buf += chunk;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +__unused static uint64_t mbox_flash_mask(struct mbox_flash_data
> > *mbox_flash)
> > {
> > return (1 << mbox_flash->shift) - 1;
> > }
> > @@ -95,6 +178,11 @@ static void msg_put_u32(struct bmc_mbox_msg *msg,
> > int i, uint32_t val)
> > memcpy(&msg->args[i], &tmp, sizeof(val));
> > }
> >
> > +static uint32_t blocks_to_bytes(struct mbox_flash_data *mbox_flash,
> > uint16_t blocks)
> > +{
> > + return blocks << mbox_flash->shift;
> > +}
> > +
> > static struct bmc_mbox_msg *msg_alloc(struct mbox_flash_data
> > *mbox_flash,
> > uint8_t command)
> > {
> > @@ -115,8 +203,55 @@ static void msg_free_memory(struct bmc_mbox_msg
> > *mem __unused)
> > /* Allocation is so simple this isn't required */
> > }
> >
> > +/*
> > + * The BMC may send is an out of band message to say that it doesn't
> > + * own the flash anymore.
> > + * It guarantees we can still access our (open) windows but it does
> > + * not guarantee their contents until it clears the bit without
> > + * sending us a corresponding bit to say that the windows are bad
> > + * first.
> > + * Since this is all things that will happen in the future, we
> > should
> > + * not perform any calls speculatively as its almost impossible to
> > + * rewind.
> > + */
> > +static bool is_paused(struct mbox_flash_data *mbox_flash)
> > +{
> > + return mbox_flash->pause;
> > +}
> > +
> > +/*
> > + * After a read or a write it is wise to check that the window we
> > just
> > + * read/write to/from is still valid otherwise it is possible some
> > of
> > + * the data didn't make it.
> > + * This check is an optimisation as we'll close all our windows on
> > any
> > + * notification from the BMC that the windows are bad. See the above
> > + * comment about is_paused().
> > + * A foolproof (but much closer) method of validating reads/writes
> > + * would be to attempt to close the window, if that fails then we
> > can
> > + * be sure that the read/write was no good.
> > + */
> > +static bool is_valid(struct mbox_flash_data *mbox_flash, struct
> > lpc_window *win)
> > +{
> > + return !is_paused(mbox_flash) && win->open;
> > +}
> > +
> > +/*
> > + * Check if we've received a BMC reboot notification.
> > + * The strategy is to check on entry to mbox-flash and return a
> > + * failure accordingly. Races will be handled by the fact that the
> > BMC
> > + * won't respond so timeouts will occur. As an added precaution
> > + * msg_send() checks right before sending a message (to make the
> > race
> > + * as small as possible to avoid needless timeouts).
> > + */
> > +static bool is_reboot(struct mbox_flash_data *mbox_flash)
> > +{
> > + return mbox_flash->reboot;
> > +}
> > +
> > static int msg_send(struct mbox_flash_data *mbox_flash, struct
> > bmc_mbox_msg *msg)
> > {
> > + if (is_reboot(mbox_flash))
> > + return FLASH_ERR_AGAIN;
> > mbox_flash->busy = true;
> > mbox_flash->rc = 0;
> > return bmc_mbox_enqueue(msg);
> > @@ -154,91 +289,239 @@ static int wait_for_bmc(struct mbox_flash_data
> > *mbox_flash, unsigned int timeout
> > return mbox_flash->rc;
> > }
> >
> > -static int lpc_window_read(struct mbox_flash_data *mbox_flash,
> > uint32_t pos,
> > - void *buf, uint32_t len)
> > +static int mbox_flash_ack(struct mbox_flash_data *mbox_flash,
> > uint8_t reg)
> > {
> > - uint32_t off = mbox_flash->read.lpc_addr + (pos -
> > mbox_flash->read.cur_pos);
> > + struct bmc_mbox_msg *msg;
> > int rc;
> >
> > - prlog(PR_TRACE, "Reading at 0x%08x for 0x%08x offset:
> > 0x%08x\n",
> > - pos, len, off);
> > + msg = msg_alloc(mbox_flash, MBOX_C_BMC_EVENT_ACK);
> > + if (!msg)
> > + return FLASH_ERR_MALLOC_FAILED;
> >
> > - while(len) {
> > - uint32_t chunk;
> > - uint32_t dat;
> > + msg_put_u8(msg, 0, reg);
> >
> > - /* Choose access size */
> > - if (len > 3 && !(off & 3)) {
> > - rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
> > - if (!rc)
> > - *(uint32_t *)buf = dat;
> > - chunk = 4;
> > - } else {
> > - rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
> > - if (!rc)
> > - *(uint8_t *)buf = dat;
> > - chunk = 1;
> > - }
> > - if (rc) {
> > - prlog(PR_ERR, "lpc_read failure %d to FW
> > 0x%08x\n", rc, off);
> > - return rc;
> > - }
> > - len -= chunk;
> > - off += chunk;
> > - buf += chunk;
> > + /* Clear this first so msg_send() doesn't freak out */
> > + mbox_flash->reboot = false;
> > +
> > + rc = msg_send(mbox_flash, msg);
> > +
> > + /* Still need to deal with it, we've only acked it now. */
> > + mbox_flash->reboot = true;
> > +
> > + if (rc) {
> > + prlog(PR_ERR, "Failed to enqueue/send BMC MBOX
> > message\n");
> > + goto out;
> > }
> >
> > - return 0;
> > + /*
> > + * Use a lower timeout - there is strong evidence to suggest
> > the
> > + * BMC won't respond, don't waste time spinning here just
> > have the
> > + * high levels retry when the BMC might be back
> > + */
> > + rc = wait_for_bmc(mbox_flash, 3);
> > + if (rc)
> > + prlog(PR_ERR, "Error waiting for BMC\n");
> > +
> > +out:
> > + msg_free_memory(msg);
> > + return rc;
> > }
> >
> > -static int lpc_window_write(struct mbox_flash_data *mbox_flash,
> > uint32_t pos,
> > - const void *buf, uint32_t len)
> > +static int do_acks(struct mbox_flash_data *mbox_flash)
> > {
> > - uint32_t off = mbox_flash->write.lpc_addr + (pos -
> > mbox_flash->write.cur_pos);
> > int rc;
> >
> > + if (!mbox_flash->ack)
> > + return 0; /* Nothing to do */
> >
> > - prlog(PR_TRACE, "Writing at 0x%08x for 0x%08x offset:
> > 0x%08x\n",
> > - pos, len, off);
> > + rc = mbox_flash_ack(mbox_flash, bmc_mbox_get_attn_reg() &
> > MBOX_ATTN_ACK_MASK);
> > + if (!rc)
> > + mbox_flash->ack = false;
> >
> > - while(len) {
> > - uint32_t chunk;
> > + return rc;
> > +}
> >
> > - if (len > 3 && !(off & 3)) {
> > - rc = lpc_write(OPAL_LPC_FW, off,
> > - *(uint32_t *)buf, 4);
> > - chunk = 4;
> > - } else {
> > - rc = lpc_write(OPAL_LPC_FW, off,
> > - *(uint8_t *)buf, 1);
> > - chunk = 1;
> > - }
> > - if (rc) {
> > - prlog(PR_ERR, "lpc_write failure %d to FW
> > 0x%08x\n", rc, off);
> > - return rc;
> > - }
> > - len -= chunk;
> > - off += chunk;
> > - buf += chunk;
> > +static void mbox_flash_do_nop(struct mbox_flash_data *mbox_flash
> > __unused,
> > + struct bmc_mbox_msg *msg __unused)
> > +{
> > +}
> > +
> > +/* Version 1 and Version 2 compatible */
> > +static void mbox_flash_do_get_mbox_info(struct mbox_flash_data
> > *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > +
> > + mbox_flash->version = msg_get_u8(msg, 0);
> > + if (mbox_flash->version == 1) {
> > + /* Not all version 1 daemons set argument 5
> > correctly */
> > + mbox_flash->shift = 12; /* Protocol hardcodes to 4K
> > anyway */
> > + mbox_flash->read.size = msg_get_u16(msg, 1) <<
> > mbox_flash->shift;
> > + mbox_flash->write.size = msg_get_u16(msg, 3) <<
> > mbox_flash->shift;
> > + } else { /* V2 compatible */
> > + mbox_flash->shift = msg_get_u8(msg, 5);
> > }
> > + /* Callers will handle the case where the version is not
> > known
> > + *
> > + * Here we deliberately ignore the 'default' sizes.
> > + * All windows opened will not provide a hint and we're
> > + * happy to let the BMC figure everything out.
> > + * Future optimisations may use the default size.
> > + */
> > +}
> >
> > - return 0;
> > +static void mbox_flash_do_get_flash_info_v2(struct mbox_flash_data
> > *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > + mbox_flash->total_size = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 0));
> > + mbox_flash->erase_granule = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 2));
> > }
> >
> > -static int mbox_flash_flush(struct mbox_flash_data *mbox_flash,
> > uint64_t pos,
> > +static void mbox_flash_do_get_flash_info_v1(struct mbox_flash_data
> > *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > + mbox_flash->total_size = msg_get_u32(msg, 0);
> > + mbox_flash->erase_granule = msg_get_u32(msg, 4);
> > +}
> > +
> > +static void mbox_flash_do_create_read_window_v2(struct
> > mbox_flash_data *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > + mbox_flash->read.lpc_addr = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 0));
> > + mbox_flash->read.size = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 2));
> > + mbox_flash->read.cur_pos = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 4));
> > + mbox_flash->read.open = true;
> > + mbox_flash->write.open = false;
> > +}
> > +
> > +static void mbox_flash_do_create_read_window_v1(struct
> > mbox_flash_data *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > + mbox_flash->read.lpc_addr = msg_get_u16(msg, 0) <<
> > mbox_flash->shift;
>
> Can this use blocks_to_bytes() for consistency?
>
Yeah absolutely, must have missed it.
> > + mbox_flash->read.open = true;
> > + mbox_flash->write.open = false;
> > +}
> > +
> > +static void mbox_flash_do_create_write_window_v2(struct
> > mbox_flash_data *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > + mbox_flash->write.lpc_addr = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 0));
> > + mbox_flash->write.size = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 2));
> > + mbox_flash->write.cur_pos = blocks_to_bytes(mbox_flash,
> > msg_get_u16(msg, 4));
> > + mbox_flash->write.open = true;
> > + mbox_flash->read.open = false;
> > +}
>
> Why not have a generic mbox_flash_do_create_window_v1 and
> mbox_flash_do_create_window_v2 which handles parsing the response args
> and is called by mbox_flash_do_create_[read/write]_window_v* which
> handles setting the write.open and read.open.
>
> e.g.
>
> mbox_flash_do_create_window_v2() {
> lpc_addr = blah;
> size = blah;
> cur_pos = blah;
> write.open = false;
> read.open = false;
> }
>
> mbox_flash_do_create_write_window_v2() {
> mbox_flash_do_create_window_v2()
> write.open = true;
> }
>
> mbox_flash_do_create_read_window_v2() {
> mbox_flash_do_create_win
> dow_v2()
> read.open = true;
> }
>
> and the same for v1... Would save on code duplication and I think it clarifies that the commands are basically the same.
>
Yeah, I mean I don't have strong feelings either way - if you feel
strongly about it. I like that each callback is obvious and concise
with respect to the command it handles.
> > +
> > +static void mbox_flash_do_create_write_window_v1(struct
> > mbox_flash_data *mbox_flash,
> > + struct bmc_mbox_msg *msg)
> > +{
> > + mbox_flash->write.lpc_addr = msg_get_u16(msg, 0) <<
> > mbox_flash->shift;
>
> Can this use blocks_to_bytes() for consistency?
>
Wow how did I miss so many...
> > + mbox_flash->write.open = true;
> > + mbox_flash->read.open = false;
> > +}
> > +
> > +/* Version 1 and Version 2 compatible */
> > +static void mbox_flash_do_close_window(struct mbox_flash_data
> > *mbox_flash,
> > + struct bmc_mbox_msg *msg __unused)
> > +{
> > + mbox_flash->read.open = false;
> > + mbox_flash->write.open = false;
> > +}
> > +
> > +static int handle_reboot(struct mbox_flash_data *mbox_flash)
> > +{
> > + int rc;
> > +
> > + /*
> > + * If the BMC ready bit isn't present then we're basically
> > + * guaranteed to timeout trying to talk to it so just fail
> > + * whatever is trying to happen.
> > + * Importantly, we can't trust that the presence of the bit
> > means
> > + * the daemon is ok - don't assume it is going to respond at
> > all
> > + * from here onwards
> > + */
> > + if (!(bmc_mbox_get_attn_reg() & MBOX_ATTN_BMC_DAEMON_READY))
> > + return FLASH_ERR_AGAIN;
> > +
> > + /* Clear this first so msg_send() doesn't freak out */
> > + mbox_flash->reboot = false;
> > +
> > + rc = do_acks(mbox_flash);
> > + if (rc) {
> > + if (rc == MBOX_R_TIMEOUT)
> > + rc = FLASH_ERR_AGAIN;
> > + mbox_flash->reboot = true;
> > + return rc;
> > + }
> > +
> > + rc = protocol_init(mbox_flash);
> > + if (rc)
> > + mbox_flash->reboot = true;
> > +
> > + return rc;
> > +}
> > +
> > +static bool do_delayed_work(struct mbox_flash_data *mbox_flash)
> > +{
> > + return is_paused(mbox_flash) || do_acks(mbox_flash) ||
> > + (is_reboot(mbox_flash) &&
> > handle_reboot(mbox_flash));
> > +}
> > +
>
> Why not make this mbox_flash_mark() which takes a type [dirty/erased]
> then you can call this in both the erase and dirty case since they are
> essentially idential except for the command. That would save on code
> duplication and mean when you fix a bug it gets fixed for both (see my
> comment on your bug in the erase code path in the next patch...).
>
So I looked ahead - while I might agree about having mbox_flash_dirty()
and the mbox_flash_erase_v2() in the next patch share code. You're not
suggesting that mbox_flash_flush() in this patch should could be
absorbed as well? mbox_flash_flush() doesn't take arguments and while
yes there's a fair bit of boilerplate of sending a message, that's
common to all the functions...
> > +static int mbox_flash_dirty(struct mbox_flash_data *mbox_flash,
> > uint64_t pos,
> > uint64_t len)
> > {
> > struct bmc_mbox_msg *msg;
> > int rc;
> >
> > - if (!mbox_flash->write.open)
> > - prlog(PR_WARNING, "Attempting to flush without an
> > open write window\n");
> > + if (!mbox_flash->write.open) {
> > + prlog(PR_ERR, "Attempting to dirty without an open
> > write window\n");
> > + return FLASH_ERR_DEVICE_GONE;
> > + }
> > +
> > + msg = msg_alloc(mbox_flash, MBOX_C_MARK_WRITE_DIRTY);
> > + if (!msg)
> > + return FLASH_ERR_MALLOC_FAILED;
> > +
> > + if (mbox_flash->version == 1) {
> > + msg_put_u16(msg, 0, pos >> mbox_flash->shift);
> > + msg_put_u32(msg, 2, len);
> > + } else {
> > + uint64_t window_pos = (pos - mbox_flash-
> > > write.cur_pos);
> >
> > + uint64_t window_len = len + (window_pos &
> > mbox_flash_mask(mbox_flash));
> > +
> > + msg_put_u16(msg, 0, window_pos >> mbox_flash-
> > > shift);
> >
> > + msg_put_u16(msg, 2, ALIGN_UP(window_len, 1 <<
> > mbox_flash->shift) >> mbox_flash->shift);
> > + }
> > +
> > + rc = msg_send(mbox_flash, msg);
> > + if (rc) {
> > + prlog(PR_ERR, "Failed to enqueue/send BMC MBOX
> > message\n");
> > + goto out;
> > + }
> > +
> > + rc = wait_for_bmc(mbox_flash, MBOX_DEFAULT_TIMEOUT);
> > + if (rc) {
> > + prlog(PR_ERR, "Error waiting for BMC\n");
> > + goto out;
> > + }
> > +
> > +out:
> > + msg_free_memory(msg);
> > + return rc;
> > +}
> > +
> > +static int mbox_flash_flush(struct mbox_flash_data *mbox_flash)
> > +{
> > + struct bmc_mbox_msg *msg;
> > + int rc;
> > +
> > + if (!mbox_flash->write.open) {
> > + prlog(PR_ERR, "Attempting to flush without an open
> > write window\n");
> > + return FLASH_ERR_DEVICE_GONE;
> > + }
> >
> > msg = msg_alloc(mbox_flash, MBOX_C_WRITE_FLUSH);
> > if (!msg)
> > return FLASH_ERR_MALLOC_FAILED;
> > - msg_put_u16(msg, 0, pos >> mbox_flash->shift);
> > - msg_put_u32(msg, 2, len);
> >
> > rc = msg_send(mbox_flash, msg);
> > if (rc) {
> > @@ -283,11 +566,17 @@ static int mbox_window_move(struct
> > mbox_flash_data *mbox_flash,
> >
> > prlog(PR_DEBUG, "Adjusting the window\n");
> >
> > + /* V1 needs to remember where it has opened the window, note
> > it
> > + * here.
> > + * If we're running V2 the response to the CREATE_*_WINDOW
> > command
> > + * will overwrite what we've noted here.
> > + */
> > + win->cur_pos = pos & ~mbox_flash_mask(mbox_flash);
> > +
> > msg = msg_alloc(mbox_flash, command);
> > if (!msg)
> > return FLASH_ERR_MALLOC_FAILED;
> >
> > - win->cur_pos = pos & ~(mbox_flash_mask(mbox_flash));
> > msg_put_u16(msg, 0, pos >> mbox_flash->shift);
> > rc = msg_send(mbox_flash, msg);
> > if (rc) {
> > @@ -307,6 +596,20 @@ static int mbox_window_move(struct
> > mbox_flash_data *mbox_flash,
> > /* Adjust size to meet current window */
> > *size = (win->cur_pos + win->size) - pos;
> >
> > + /*
> > + * It doesn't make sense for size to be zero if len isn't
> > zero.
> > + * If this condition happens we're most likely going to spin
> > since
> > + * the caller will likely decerement pos by zero then call
> > this
> > + * again.
> > + * Debateable as to if this should return non zero. At least
> > the
> > + * bug will be obvious from the barf.
> > + */
> > + if (len != 0 && *size == 0) {
> > + prlog(PR_ERR, "Move window is indicating size
> > zero!\n");
> > + prlog(PR_ERR, "pos: 0x%" PRIx64 ", len: 0x%" PRIx64
> > "\n", pos, len);
> > + prlog(PR_ERR, "win pos: 0x%08x win size: 0x%08x\n",
> > win->cur_pos, win->size);
> > + }
> > +
> > out:
> > msg_free_memory(msg);
> > return rc;
> > @@ -326,7 +629,10 @@ static int mbox_flash_write(struct
> > blocklevel_device *bl, uint64_t pos,
> >
> > mbox_flash = container_of(bl, struct mbox_flash_data, bl);
> >
> > - prlog(PR_TRACE, "Flash write at 0x%08llx for 0x%08llx\n",
> > pos, len);
> > + if (do_delayed_work(mbox_flash))
> > + return FLASH_ERR_AGAIN;
> > +
> > + prlog(PR_TRACE, "Flash write at %#" PRIx64 " for %#" PRIx64
> > "\n", pos, len);
> > while (len > 0) {
> > /* Move window and get a new size to read */
> > rc = mbox_window_move(mbox_flash, &mbox_flash-
> > > write,
> >
> > @@ -340,12 +646,18 @@ static int mbox_flash_write(struct
> > blocklevel_device *bl, uint64_t pos,
> > if (rc)
> > return rc;
> >
> > + rc = mbox_flash_dirty(mbox_flash, pos, size);
> > + if (rc)
> > + return rc;
> > +
> > /*
> > * Must flush here as changing the window contents
> > * without flushing entitles the BMC to throw away
> > the
> > - * data
> > + * data. Unlike the read case there isn't a need to
> > explicitly
> > + * validate the window, the flush command will fail
> > if the
> > + * window was compromised.
> > */
> > - rc = mbox_flash_flush(mbox_flash, pos, size);
> > + rc = mbox_flash_flush(mbox_flash);
>
> Why are we flushing with every write? Don't we just need to flush at
> the end of the while(len > 0) loop?
>
It says in the comment, I'm quite sure we said that changing a window
without a flush means that the BMC can discard the contents of the
window...
> We will call move window everytime which will either use the existing
> window -> no need to flush until the end,
mbox_window_move() opens the biggest possible window. So if we go
around the loop a second (or more) times then mbox_window_move() will
make a new window... and...
> or will close the current
> window and open a new one -> will have an implicit flush
My understanding of the spec is that flushes must be explicit.
> which will set
> rc if it fails meaning we'll exit any way. Move window will also return
> an error if we received an event during the fact.
>
Yes, this is by design - otherwise there would probably need to be a
do_delayed_work() inside the loop.
> > if (rc)
> > return rc;
> >
> > @@ -370,7 +682,10 @@ static int mbox_flash_read(struct
> > blocklevel_device *bl, uint64_t pos,
> >
> > mbox_flash = container_of(bl, struct mbox_flash_data, bl);
> >
> > - prlog(PR_TRACE, "Flash read at 0x%08llx for 0x%08llx\n",
> > pos, len);
> > + if (do_delayed_work(mbox_flash))
> > + return FLASH_ERR_AGAIN;
> > +
> > + prlog(PR_TRACE, "Flash read at %#" PRIx64 " for %#" PRIx64
> > "\n", pos, len);
> > while (len > 0) {
> > /* Move window and get a new size to read */
> > rc = mbox_window_move(mbox_flash, &mbox_flash->read,
> > @@ -387,6 +702,12 @@ static int mbox_flash_read(struct
> > blocklevel_device *bl, uint64_t pos,
> > len -= size;
> > pos += size;
> > buf += size;
> > + /*
> > + * Ensure my window is still open, if it isn't we
> > can't trust
> > + * what we read
> > + */
> > + if (!is_valid(mbox_flash, &mbox_flash->read))
> > + return FLASH_ERR_AGAIN;
> > }
> > return rc;
> > }
> > @@ -399,6 +720,10 @@ static int mbox_flash_get_info(struct
> > blocklevel_device *bl, const char **name,
> > int rc;
> >
> > mbox_flash = container_of(bl, struct mbox_flash_data, bl);
> > +
> > + if (do_delayed_work(mbox_flash))
> > + return FLASH_ERR_AGAIN;
> > +
> > msg = msg_alloc(mbox_flash, MBOX_C_GET_FLASH_INFO);
> > if (!msg)
> > return FLASH_ERR_MALLOC_FAILED;
> > @@ -437,27 +762,59 @@ out:
> > }
> >
> > static int mbox_flash_erase(struct blocklevel_device *bl __unused,
> > - uint64_t pos __unused, uint64_t len
> > __unused)
> > + uint64_t pos __unused, uint64_t len __unused)
> > {
> > - /*
> > - * We can probably get away with doing nothing.
> > - * TODO: Rethink this, causes interesting behaviour in
> > pflash.
> > - * Users do expect pflash -{e,E} to do something. This is
> > because
> > - * on real flash this would have set that region to all 0xFF
> > but
> > - * really the erase at the blocklevel interface was only
> > designed
> > - * to be "please make this region writeable".
> > - * It may be wise (despite the large performance penalty) to
> > - * actually write all 0xFF here. I'll leave that as an
> > extersise
> > - * for the future.
> > - */
> > + /*
> > + * We can probably get away with doing nothing.
> > + * TODO: Rethink this, causes interesting behaviour in
> > pflash.
> > + * Users do expect pflash -{e,E} to do something. This is
> > because
> > + * on real flash this would have set that region to all 0xFF
> > but
> > + * really the erase at the blocklevel interface was only
> > designed
> > + * to be "please make this region writeable".
> > + * It may be wise (despite the large performance penalty) to
> > + * actually write all 0xFF here. I'll leave that as an
> > extersise
>
> extersise?!
oops
> > + * for the future.
> > + */
>
> Did you increase the indent here by one?
Yeah, I noticed that right after sending... not really sure how that
happened - I'll correct it in v2.
>
> > +
> > return 0;
> > }
> >
> > +/* Called from interrupt handler, don't send any mbox messages */
> > +static void mbox_flash_attn(uint8_t attn, void *priv)
> > +{
> > + struct mbox_flash_data *mbox_flash = priv;
> > +
> > + if (attn & MBOX_ATTN_ACK_MASK)
> > + mbox_flash->ack = true;
> > + if (attn & MBOX_ATTN_BMC_REBOOT) {
> > + mbox_flash->reboot = true;
> > + mbox_flash->read.open = false;
> > + mbox_flash->write.open = false;
> > + attn &= ~MBOX_ATTN_BMC_REBOOT;
> > + }
> > +
> > + if (attn & MBOX_ATTN_BMC_WINDOW_RESET) {
> > + mbox_flash->read.open = false;
> > + mbox_flash->write.open = false;
> > + attn &= ~MBOX_ATTN_BMC_WINDOW_RESET;
> > + }
> > +
> > + if (attn & MBOX_ATTN_BMC_FLASH_LOST) {
> > + mbox_flash->pause = true;
> > + attn &= ~MBOX_ATTN_BMC_FLASH_LOST;
> > + } else {
> > + mbox_flash->pause = false;
> > + }
> > +
> > + if (attn & MBOX_ATTN_BMC_DAEMON_READY)
> > + attn &= ~MBOX_ATTN_BMC_DAEMON_READY;
> > +}
> > +
> > static void mbox_flash_callback(struct bmc_mbox_msg *msg, void
> > *priv)
> > {
> > struct mbox_flash_data *mbox_flash = priv;
> >
> > - prlog(PR_TRACE, "%s: BMC OK\n", __func__);
> > + prlog(PR_TRACE, "BMC OK command %u\n", msg->command);
> >
> > if (msg->response != MBOX_R_SUCCESS) {
> > prlog(PR_ERR, "Bad response code from BMC %d\n",
> > msg->response);
> > @@ -473,102 +830,148 @@ static void mbox_flash_callback(struct
> > bmc_mbox_msg *msg, void *priv)
> > goto out;
> > }
> >
> > - mbox_flash->rc = 0;
> > + if (msg->command > MBOX_COMMAND_COUNT) {
> > + prlog(PR_ERR, "Got response to unknown command
> > %02x\n", msg->command);
> > + mbox_flash->rc = -1;
> > + goto out;
> > + }
> >
> > - switch (msg->command) {
> > - case MBOX_C_RESET_STATE:
> > - break;
> > - case MBOX_C_GET_MBOX_INFO:
> > - mbox_flash->read.size = msg_get_u16(msg, 1)
> > << mbox_flash->shift;
> > - mbox_flash->write.size = msg_get_u16(msg, 3)
> > << mbox_flash->shift;
> > - break;
> > - case MBOX_C_GET_FLASH_INFO:
> > - mbox_flash->total_size = msg_get_u32(msg,
> > 0);
> > - mbox_flash->erase_granule = msg_get_u32(msg,
> > 4);
> > - break;
> > - case MBOX_C_CREATE_READ_WINDOW:
> > - mbox_flash->read.lpc_addr = msg_get_u16(msg,
> > 0) << mbox_flash->shift;
> > - mbox_flash->read.open = true;
> > - mbox_flash->write.open = false;
> > - break;
> > - case MBOX_C_CLOSE_WINDOW:
> > - break;
> > - case MBOX_C_CREATE_WRITE_WINDOW:
> > - mbox_flash->write.lpc_addr =
> > msg_get_u16(msg, 0) << mbox_flash->shift;
> > - mbox_flash->write.open = true;
> > - mbox_flash->read.open = false;
> > - break;
> > - case MBOX_C_MARK_WRITE_DIRTY:
> > - break;
> > - case MBOX_C_WRITE_FLUSH:
> > - break;
> > - case MBOX_C_BMC_EVENT_ACK:
> > - break;
> > - default:
> > - prlog(PR_ERR, "Got response to unknown
> > command %02x\n", msg->command);
> > - mbox_flash->rc = -1;
> > + if (!mbox_flash->handlers[msg->command]) {
> > + prlog(PR_ERR, "Couldn't find handler for message!
> > command: %u, seq: %u\n",
> > + msg->command, msg->seq);
> > + mbox_flash->rc = MBOX_R_SYSTEM_ERROR;
> > + goto out;
> > }
> >
> > + mbox_flash->rc = 0;
> > +
> > + mbox_flash->handlers[msg->command](mbox_flash, msg);
> > +
> > out:
> > mbox_flash->busy = false;
> > }
> >
> > -int mbox_flash_init(struct blocklevel_device **bl)
> > +static int protocol_init(struct mbox_flash_data *mbox_flash)
> > {
> > - struct mbox_flash_data *mbox_flash;
> > struct bmc_mbox_msg *msg;
> > int rc;
> >
> > - if (!bl)
> > - return FLASH_ERR_PARM_ERROR;
> > + /* Assume V2 */
> > + mbox_flash->bl.read = &mbox_flash_read;
> > + mbox_flash->bl.write = &mbox_flash_write;
> > + mbox_flash->bl.erase = &mbox_flash_erase;
> > + mbox_flash->bl.get_info = &mbox_flash_get_info;
> >
> > - *bl = NULL;
> > + /* Assume V2 */
> > + mbox_flash->handlers[0] = NULL;
> > + mbox_flash->handlers[MBOX_C_RESET_STATE] =
> > &mbox_flash_do_nop;
> > + mbox_flash->handlers[MBOX_C_GET_MBOX_INFO] =
> > &mbox_flash_do_get_mbox_info;
> > + mbox_flash->handlers[MBOX_C_GET_FLASH_INFO] =
> > &mbox_flash_do_get_flash_info_v2;
> > + mbox_flash->handlers[MBOX_C_CREATE_READ_WINDOW] =
> > &mbox_flash_do_create_read_window_v2;
> > + mbox_flash->handlers[MBOX_C_CLOSE_WINDOW] =
> > &mbox_flash_do_close_window;
> > + mbox_flash->handlers[MBOX_C_CREATE_WRITE_WINDOW] =
> > &mbox_flash_do_create_write_window_v2;
> > + mbox_flash->handlers[MBOX_C_MARK_WRITE_DIRTY] =
> > &mbox_flash_do_nop;
> > + mbox_flash->handlers[MBOX_C_WRITE_FLUSH] =
> > &mbox_flash_do_nop;
> > + mbox_flash->handlers[MBOX_C_BMC_EVENT_ACK] =
> > &mbox_flash_do_nop;
> > + mbox_flash->handlers[MBOX_C_MARK_WRITE_ERASED] =
> > &mbox_flash_do_nop;
> >
> > - mbox_flash = zalloc(sizeof(struct mbox_flash_data));
> > - if (!mbox_flash)
> > - return FLASH_ERR_MALLOC_FAILED;
> >
> > - /* For V1 of the protocol this is fixed. This could change
> > */
> > + bmc_mbox_register_callback(&mbox_flash_callback,
> > mbox_flash);
> > + bmc_mbox_register_attn(&mbox_flash_attn, mbox_flash);
> > +
> > + /*
> > + * For V1 of the protocol this is fixed.
> > + * V2: The init code will update this
> > + */
> > mbox_flash->shift = 12;
> >
> > - bmc_mbox_register_callback(&mbox_flash_callback,
> > mbox_flash);
> > + /*
> > + * Always attempt init with V2.
> > + * The GET_MBOX_INFO response will confirm that the other
> > side can
> > + * talk V2, we'll update this variable then if V2 is not
> > supported
> > + */
> > + mbox_flash->version = 2;
> >
> > msg = msg_alloc(mbox_flash, MBOX_C_GET_MBOX_INFO);
> > - if (!msg) {
> > - rc = FLASH_ERR_MALLOC_FAILED;
> > - goto out;
> > - }
> > -
> > - msg_put_u8(msg, 0, 1); /* V1, do better */
> > + if (!msg)
> > + return FLASH_ERR_MALLOC_FAILED;
> >
> > + msg_put_u8(msg, 0, mbox_flash->version);
> > rc = msg_send(mbox_flash, msg);
> > if (rc) {
> > prlog(PR_ERR, "Failed to enqueue/send BMC MBOX
> > message\n");
> > - goto out_msg;
> > + goto out;
> > }
> >
> > rc = wait_for_bmc(mbox_flash, MBOX_DEFAULT_TIMEOUT);
> > if (rc) {
> > prlog(PR_ERR, "Error waiting for BMC\n");
> > - goto out_msg;
> > + goto out;
> > }
> >
> > msg_free_memory(msg);
> >
> > - mbox_flash->bl.keep_alive = 0;
> > + prlog(PR_INFO, "Detected mbox protocol version %d\n",
> > mbox_flash->version);
> > + if (mbox_flash->version == 1) {
> > + /* Not all handlers differ, update those which do */
> > + mbox_flash->handlers[MBOX_C_GET_MBOX_INFO] =
> > &mbox_flash_do_get_mbox_info;
> > + mbox_flash->handlers[MBOX_C_GET_FLASH_INFO] =
> > &mbox_flash_do_get_flash_info_v1;
> > + mbox_flash->handlers[MBOX_C_CREATE_READ_WINDOW] =
> > + &mbox_flash_do_create_read_window_v1;
> > + mbox_flash->handlers[MBOX_C_CREATE_WRITE_WINDOW] =
> > + &mbox_flash_do_create_write_window_v1;
> > + mbox_flash->handlers[MBOX_C_MARK_WRITE_ERASED] =
> > NULL; /* Not in V1 */
>
> I don't really like it how you're patching out specific functions, it
> doesn't really lead to extensibilty... What about an array of handler
> arrays which is defined above as a constant and indexed by version
> number. That way you just update the array pointer making it easier to
> extend in future.
Probably a good idea, I wonder if this starts to get a tad over
engineered for what we need right now. Definitely would be the way to
go if we start cranking out revisions.
>
> > + } else if (mbox_flash->version > 2) {
> > + /*
> > + * Uh, we requested version 2... The BMC is can only
> > lower the
> > + * requested version not do anything else. FWIW
> > there is no
> > + * verion 0
> > + */
> > + prlog(PR_CRIT, "Bad version: %u\n", mbox_flash-
> > > version);
> >
> > + rc = FLASH_ERR_PARM_ERROR;
> > + goto out;
> > + }
> > +
> > +
> > + return 0;
> > +out:
> > + msg_free_memory(msg);
> > + return rc;
> > +}
> > +
> > +int mbox_flash_init(struct blocklevel_device **bl)
> > +{
> > + struct mbox_flash_data *mbox_flash;
> > + int rc;
> > +
> > + if (!bl)
> > + return FLASH_ERR_PARM_ERROR;
> > +
> > + *bl = NULL;
> > +
> > + mbox_flash = zalloc(sizeof(struct mbox_flash_data));
> > + if (!mbox_flash)
> > + return FLASH_ERR_MALLOC_FAILED;
> > +
> > + /* Assume V2 */
> > mbox_flash->bl.read = &mbox_flash_read;
> > mbox_flash->bl.write = &mbox_flash_write;
> > mbox_flash->bl.erase = &mbox_flash_erase;
> > mbox_flash->bl.get_info = &mbox_flash_get_info;
> >
> > + if (bmc_mbox_get_attn_reg() & MBOX_ATTN_BMC_REBOOT)
> > + rc = handle_reboot(mbox_flash);
> > + else
> > + rc = protocol_init(mbox_flash);
>
> Handle reboot eventually calls protocol_init... To make it clearer that
> it's called irrespective can we change this to something like:
>
> if (reboot) {
> handle_reboot();
>
> // This always happens
> protocol_init();
(and remove the call to protocol_init() in handle_reboot())
do_delayed_work() also calls handle_reboot() and relies on the fact
that it will protocol_init() if needed. I'm not opposed to that idea
but it means that do_delayed_work() will grow by the amount this
shrinks - although perhaps it will look nicer...
>
> > + if (rc) {
> > + free(mbox_flash);
> > + return rc;
> > + }
> > +
> > + mbox_flash->bl.keep_alive = 0;
> > +
> > *bl = &(mbox_flash->bl);
> > return 0;
> > -
> > -out_msg:
> > - msg_free_memory(msg);
> > -out:
> > - free(mbox_flash);
> > - return rc;
> > }
> >
> > void mbox_flash_exit(struct blocklevel_device *bl)
>
>
More information about the Skiboot
mailing list