[Skiboot] [PATCH 4/4] libflash/blocklevel: Backend agnostic blocklevel_erase_async()
Cyril Bur
cyril.bur at au1.ibm.com
Thu Jun 29 13:49:28 AEST 2017
On Thu, 2017-06-29 at 13:19 +1000, Alistair Popple wrote:
> On Thu, 29 Jun 2017 09:38:04 AM Cyril Bur wrote:
> > This patch provides a simple (although not particularly efficient)
> > asynchronous erase function. The advantage of this approach is that it
> > doesn't require any of the blocklevel backends to provide an
> > asynchronous implementation. This is also the disadvantage of this
> > implementation as all it actually does is break the work up in chunks
> > that it can performed quickly, but still synchronously. Only a backend
> > could provide a more asynchronous implementation.
> >
> > The motivation behind this patch is two fold:
> > Firstly this starts a blocklevel async interface - which could easily be
> > extended to pass the async calls down to backends which provide async
> > implementation.
>
> To be honest having seen the code this seems to add a reasonable amount of
> complexity to the blocklevel layer for no real gain as we don't actually
> implement async and only skiboot uses it.
>
> Given this I am wondering if it would be simpler to confine the hack to skiboot
> and wait until we have other users of libflash that want async functions, or for
> when we actually implement async at the HW level?
>
*sigh* maybe, thoughts anyone?
> > Secondly this solves a problem we have right now with the
> > opal_flash_erase call where it can block in Skiboot for around three
> > minutes. This causes a variety of problems in Linux due to a processor
> > being gone for a long time. For example:
> >
> > [ 98.610043] INFO: rcu_sched detected stalls on CPUs/tasks:
> > [ 98.610050] 113-...: (1 GPs behind) idle=96f/140000000000000/0 softirq=527/528 fqs=1044
> > [ 98.610051] (detected by 112, t=2102 jiffies, g=223, c=222, q=123)
> > [ 98.610060] Task dump for CPU 113:
> > [ 98.610062] pflash R running task 0 3335 3333 0x00040004
> > [ 98.610066] Call Trace:
> > [ 98.610070] [c000001fdd847730] [0000000000000001] 0x1 (unreliable)
> > [ 98.610076] [c000001fdd847900] [c000000000013854] __switch_to+0x1e8/0x1f4
> > [ 98.610081] [c000001fdd847960] [c0000000006122c4] __schedule+0x32c/0x874
> > [ 98.610083] [c000001fdd847a30] [c000001fdd847b40] 0xc000001fdd847b40
> >
> > It is for this reason that breaking the work up in smaller chunks solves
> > this problem as Skiboot can return the CPU to Linux between chunks to
> > avoid Linux getting upset.
> >
> > Reported-By: Samuel Mendoza-Jonas <sam at mendozajonas.com>
> > Signed-off-by: Cyril Bur <cyril.bur at au1.ibm.com>
> > ---
> > core/flash.c | 46 +++++++++++++++++++++++++--
> > libflash/blocklevel.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++----
> > libflash/blocklevel.h | 19 +++++++++++
> > libflash/errors.h | 14 ++++++++
> > libflash/libflash.h | 13 --------
> > 5 files changed, 159 insertions(+), 21 deletions(-)
> >
> > diff --git a/core/flash.c b/core/flash.c
> > index 177f7ae1..e8b9fc3c 100644
> > --- a/core/flash.c
> > +++ b/core/flash.c
> > @@ -28,6 +28,13 @@
> > #include <libstb/stb.h>
> > #include <libstb/container.h>
> > #include <elf.h>
> > +#include <timer.h>
> > +#include <timebase.h>
> > +
> > +struct flash_async_info {
> > + struct timer poller;
> > + uint64_t token;
> > +};
> >
> > struct flash {
> > struct list_node list;
> > @@ -36,6 +43,7 @@ struct flash {
> > uint64_t size;
> > uint32_t block_size;
> > int id;
> > + struct flash_async_info async_info;
> > };
> >
> > static LIST_HEAD(flashes);
> > @@ -200,6 +208,29 @@ static int flash_nvram_probe(struct flash *flash, struct ffs_handle *ffs)
> >
> > /* core flash support */
> >
> > +/*
> > + * Called with flash lock held, drop it on async completion
> > + */
> > +static void flash_poll(struct timer *t __unused, void *data, uint64_t now __unused)
> > +{
> > + struct flash *flash = data;
> > + int rc;
> > +
> > + rc = blocklevel_async_poll(flash->bl);
> > + if (rc == FLASH_ASYNC_POLL) {
> > + /*
> > + * We want to get called pretty much straight away, just have
> > + * to be sure that we jump back out to Linux so that if this
> > + * very long we don't cause RCU or the scheduler to freak
> > + */
> > + schedule_timer(&flash->async_info.poller, msecs_to_tb(1));
> > + return;
> > + }
> > +
> > + flash_release(flash);
> > + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, flash->async_info.token, rc);
> > +}
> > +
> > static struct dt_node *flash_add_dt_node(struct flash *flash, int id)
> > {
> > struct dt_node *flash_node;
> > @@ -295,6 +326,7 @@ int flash_register(struct blocklevel_device *bl)
> > flash->size = size;
> > flash->block_size = block_size;
> > flash->id = num_flashes();
> > + init_timer(&flash->async_info.poller, flash_poll, flash);
> >
> > list_add(&flashes, &flash->list);
> >
> > @@ -369,8 +401,18 @@ static int64_t opal_flash_op(enum flash_op op, uint64_t id, uint64_t offset,
> > rc = blocklevel_raw_write(flash->bl, offset, (void *)buf, size);
> > break;
> > case FLASH_OP_ERASE:
> > - rc = blocklevel_erase(flash->bl, offset, size);
> > - break;
> > + rc = blocklevel_erase_async(flash->bl, offset, size);
> > + if (rc != FLASH_ASYNC_POLL)
> > + break;
> > + schedule_timer(&flash->async_info.poller, msecs_to_tb(1));
> > + /*
> > + * FLASH_OP_ERASE can now happen asynchronously, don't go out
> > + * the regular path.
> > + *
> > + * Ensure we hold the lock to flash for the entirety of the
> > + * async process
> > + */
> > + return OPAL_ASYNC_COMPLETION;
> > default:
> > assert(0);
> > }
> > diff --git a/libflash/blocklevel.c b/libflash/blocklevel.c
> > index 9802ad0f..acda2066 100644
> > --- a/libflash/blocklevel.c
> > +++ b/libflash/blocklevel.c
> > @@ -29,6 +29,10 @@
> >
> > #define PROT_REALLOC_NUM 25
> >
> > +#ifndef MIN
> > +#define MIN(a, b) ((a) < (b) ? (a) : (b))
> > +#endif
> > +
> > /* This function returns tristate values.
> > * 1 - The region is ECC protected
> > * 0 - The region is not ECC protected
> > @@ -78,6 +82,46 @@ static int release(struct blocklevel_device *bl)
> > return rc;
> > }
> >
> > +int blocklevel_async_poll(struct blocklevel_device *bl)
> > +{
> > + uint64_t len;
> > + int rc;
> > +
> > + if (!bl->async.active)
> > + return FLASH_ERR_PARM_ERROR;
> > +
> > + /* Just chunk things up by erase granule */
> > + len = MIN(bl->erase_mask + 1, bl->async.len);
> > +
> > + switch (bl->async.op) {
> > + case READ:
> > + case WRITE:
> > + /*
> > + * Very large caveat - do not remove this without auditing
> > + * this entire file. smart_write() and smart_erase() will
> > + * not 'just work' async. They will need work.
> > + */
> > + bl->async.buf += len;
>
> How would async.op == READ/WRITE? I'd be inclinded to complain loudly and return
> here as it should never happen.
Perhaps, I wasn't sure so I did nothing... Yes yes inaction is a form
of action... but its less action?
>
> > + return FLASH_ERR_PARM_ERROR;
> > + case ERASE:
> > + rc = bl->erase(bl, bl->async.pos, len);
> > + break;
> > + default:
> > + return FLASH_ERR_PARM_ERROR;
> > + }
> > +
> > + if (rc)
> > + return rc;
> > +
> > + bl->async.pos += len;
> > + bl->async.len -= len;
>
> Wouldn't storing the end position be a little clearer (ie. bl->async.end)?
Would it? Everything is always specified as a pos and len, doesn't it
make sense to keep it in the same layout as the functions accept their
args? Otherwise there will be a lot of bl->async.end - bl->async.pos.
And I'm not sure how nice if (bl->async.pos == bl->async.end) is over
if (bl->async.len == 0)
>
> > + if (bl->async.len == 0)
> > + bl->async.active = false;
> > +
> > + return bl->async.active ? FLASH_ASYNC_POLL : 0;
> > +}
> > +
> > int blocklevel_raw_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len)
> > {
> > int rc;
> > @@ -189,9 +233,9 @@ out:
> > return rc;
> > }
> >
> > -int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> > +static int check_erase_params(struct blocklevel_device *bl, const char *caller,
> > + uint64_t pos, uint64_t len)
> > {
> > - int rc;
> > if (!bl || !bl->erase) {
> > errno = EINVAL;
> > return FLASH_ERR_PARM_ERROR;
> > @@ -199,16 +243,48 @@ int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> >
> > /* Programmer may be making a horrible mistake without knowing it */
> > if (pos & bl->erase_mask) {
> > - fprintf(stderr, "blocklevel_erase: pos (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> > - pos, bl->erase_mask + 1);
> > + FL_ERR("%s: pos (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> > + caller, pos, bl->erase_mask + 1);
> > + return FLASH_ERR_ERASE_BOUNDARY;
> > }
> >
> > if (len & bl->erase_mask) {
> > - fprintf(stderr, "blocklevel_erase: len (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> > - len, bl->erase_mask + 1);
> > + FL_ERR("%s: len (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> > + caller, len, bl->erase_mask + 1);
> > return FLASH_ERR_ERASE_BOUNDARY;
> > }
> >
> > + return 0;
> > +}
> > +
> > +int blocklevel_erase_async(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> > +{
> > + int rc;
> > +
> > + rc = check_erase_params(bl, __func__, pos, len);
> > + if (rc)
> > + return rc;
> > +
> > + if (bl->async.active)
> > + return FLASH_ERR_PARM_ERROR;
> > +
> > + bl->async.op = ERASE;
> > + bl->async.pos = pos;
> > + bl->async.len = len;
> > + bl->async.active = true;
> > +
> > + /* Call the poll once to get everything going */
> > + return blocklevel_async_poll(bl);
> > +}
> > +
> > +int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> > +{
> > + int rc;
> > +
> > + rc = check_erase_params(bl, __func__, pos, len);
> > + if (rc)
> > + return rc;
> > +
> > rc = reacquire(bl);
> > if (rc)
> > return rc;
> > diff --git a/libflash/blocklevel.h b/libflash/blocklevel.h
> > index ba42c83d..7079e2fc 100644
> > --- a/libflash/blocklevel.h
> > +++ b/libflash/blocklevel.h
> > @@ -34,6 +34,20 @@ enum blocklevel_flags {
> > WRITE_NEED_ERASE = 1,
> > };
> >
> > +enum blocklevel_async_op {
> > + READ = 1,
> > + WRITE,
> > + ERASE
> > +};
> > +
> > +struct blocklevel_async {
> > + bool active;
> > + enum blocklevel_async_op op;
> > + uint64_t pos;
> > + uint64_t len;
> > + void *buf;
> > +};
> > +
> > /*
> > * libffs may be used with different backends, all should provide these for
> > * libflash to get the information it needs
> > @@ -56,6 +70,8 @@ struct blocklevel_device {
> > enum blocklevel_flags flags;
> >
> > struct blocklevel_range ecc_prot;
> > +
> > + struct blocklevel_async async;
> > };
> > int blocklevel_raw_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len);
> > int blocklevel_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len);
> > @@ -65,6 +81,9 @@ int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len);
> > int blocklevel_get_info(struct blocklevel_device *bl, const char **name, uint64_t *total_size,
> > uint32_t *erase_granule);
> >
> > +int blocklevel_async_poll(struct blocklevel_device *bl);
> > +int blocklevel_erase_async(struct blocklevel_device *bl, uint64_t pos, uint64_t len);
> > +
> > /*
> > * blocklevel_smart_write() performs reads on the data to see if it
> > * can skip erase or write calls. This is likely more convenient for
> > diff --git a/libflash/errors.h b/libflash/errors.h
> > index 2f567211..deda7b87 100644
> > --- a/libflash/errors.h
> > +++ b/libflash/errors.h
>
> Why are we moving this in this patch? Just as clean-up? If so it probably should
> be a seperate patch.
Damn, busted trying to sneak in a cleanup.
Yeah subtly hidden in there is that blocklevel does from using
fprintf(stderr, ...) to FL_ERR() which, for userspace use boils down to
the same but makes it look nicer in the skiboot log.
I'll split it up on resend.
>
> > @@ -33,5 +33,19 @@
> > #define FLASH_ERR_BAD_READ 15
> > #define FLASH_ERR_DEVICE_GONE 16
> > #define FLASH_ERR_AGAIN 17
> > +#define FLASH_ASYNC_POLL 18
> > +
> > +#ifdef __SKIBOOT__
> > +#include <skiboot.h>
> > +#define FL_INF(fmt...) do { prlog(PR_INFO, fmt); } while(0)
> > +#define FL_DBG(fmt...) do { prlog(PR_DEBUG, fmt); } while(0)
> > +#define FL_ERR(fmt...) do { prlog(PR_ERR, fmt); } while(0)
> > +#else
> > +#include <stdio.h>
> > +extern bool libflash_debug;
> > +#define FL_DBG(fmt...) do { if (libflash_debug) printf(fmt); } while(0)
> > +#define FL_INF(fmt...) do { printf(fmt); } while(0)
> > +#define FL_ERR(fmt...) do { fprintf(stderr, fmt); } while(0)
> > +#endif
> >
> > #endif /* __LIBFLASH_ERRORS_H */
> > diff --git a/libflash/libflash.h b/libflash/libflash.h
> > index 4fecfe75..ff3a9823 100644
> > --- a/libflash/libflash.h
> > +++ b/libflash/libflash.h
> > @@ -28,19 +28,6 @@
> > */
> > #include <libflash/errors.h>
> >
> > -#ifdef __SKIBOOT__
> > -#include <skiboot.h>
> > -#define FL_INF(fmt...) do { prlog(PR_INFO, fmt); } while(0)
> > -#define FL_DBG(fmt...) do { prlog(PR_DEBUG, fmt); } while(0)
> > -#define FL_ERR(fmt...) do { prlog(PR_ERR, fmt); } while(0)
> > -#else
> > -#include <stdio.h>
> > -extern bool libflash_debug;
> > -#define FL_DBG(fmt...) do { if (libflash_debug) printf(fmt); } while(0)
> > -#define FL_INF(fmt...) do { printf(fmt); } while(0)
> > -#define FL_ERR(fmt...) do { printf(fmt); } while(0)
> > -#endif
> > -
> > /* Flash chip, opaque */
> > struct flash_chip;
> > struct spi_flash_ctrl;
> >
>
>
More information about the Skiboot
mailing list