[Skiboot] [PATCH 4/4] libflash/blocklevel: Backend agnostic blocklevel_erase_async()
Alistair Popple
alistair at popple.id.au
Thu Jun 29 13:19:10 AEST 2017
On Thu, 29 Jun 2017 09:38:04 AM Cyril Bur wrote:
> This patch provides a simple (although not particularly efficient)
> asynchronous erase function. The advantage of this approach is that it
> doesn't require any of the blocklevel backends to provide an
> asynchronous implementation. This is also the disadvantage of this
> implementation as all it actually does is break the work up in chunks
> that it can performed quickly, but still synchronously. Only a backend
> could provide a more asynchronous implementation.
>
> The motivation behind this patch is two fold:
> Firstly this starts a blocklevel async interface - which could easily be
> extended to pass the async calls down to backends which provide async
> implementation.
To be honest having seen the code this seems to add a reasonable amount of
complexity to the blocklevel layer for no real gain as we don't actually
implement async and only skiboot uses it.
Given this I am wondering if it would be simpler to confine the hack to skiboot
and wait until we have other users of libflash that want async functions, or for
when we actually implement async at the HW level?
> Secondly this solves a problem we have right now with the
> opal_flash_erase call where it can block in Skiboot for around three
> minutes. This causes a variety of problems in Linux due to a processor
> being gone for a long time. For example:
>
> [ 98.610043] INFO: rcu_sched detected stalls on CPUs/tasks:
> [ 98.610050] 113-...: (1 GPs behind) idle=96f/140000000000000/0 softirq=527/528 fqs=1044
> [ 98.610051] (detected by 112, t=2102 jiffies, g=223, c=222, q=123)
> [ 98.610060] Task dump for CPU 113:
> [ 98.610062] pflash R running task 0 3335 3333 0x00040004
> [ 98.610066] Call Trace:
> [ 98.610070] [c000001fdd847730] [0000000000000001] 0x1 (unreliable)
> [ 98.610076] [c000001fdd847900] [c000000000013854] __switch_to+0x1e8/0x1f4
> [ 98.610081] [c000001fdd847960] [c0000000006122c4] __schedule+0x32c/0x874
> [ 98.610083] [c000001fdd847a30] [c000001fdd847b40] 0xc000001fdd847b40
>
> It is for this reason that breaking the work up in smaller chunks solves
> this problem as Skiboot can return the CPU to Linux between chunks to
> avoid Linux getting upset.
>
> Reported-By: Samuel Mendoza-Jonas <sam at mendozajonas.com>
> Signed-off-by: Cyril Bur <cyril.bur at au1.ibm.com>
> ---
> core/flash.c | 46 +++++++++++++++++++++++++--
> libflash/blocklevel.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++----
> libflash/blocklevel.h | 19 +++++++++++
> libflash/errors.h | 14 ++++++++
> libflash/libflash.h | 13 --------
> 5 files changed, 159 insertions(+), 21 deletions(-)
>
> diff --git a/core/flash.c b/core/flash.c
> index 177f7ae1..e8b9fc3c 100644
> --- a/core/flash.c
> +++ b/core/flash.c
> @@ -28,6 +28,13 @@
> #include <libstb/stb.h>
> #include <libstb/container.h>
> #include <elf.h>
> +#include <timer.h>
> +#include <timebase.h>
> +
> +struct flash_async_info {
> + struct timer poller;
> + uint64_t token;
> +};
>
> struct flash {
> struct list_node list;
> @@ -36,6 +43,7 @@ struct flash {
> uint64_t size;
> uint32_t block_size;
> int id;
> + struct flash_async_info async_info;
> };
>
> static LIST_HEAD(flashes);
> @@ -200,6 +208,29 @@ static int flash_nvram_probe(struct flash *flash, struct ffs_handle *ffs)
>
> /* core flash support */
>
> +/*
> + * Called with flash lock held, drop it on async completion
> + */
> +static void flash_poll(struct timer *t __unused, void *data, uint64_t now __unused)
> +{
> + struct flash *flash = data;
> + int rc;
> +
> + rc = blocklevel_async_poll(flash->bl);
> + if (rc == FLASH_ASYNC_POLL) {
> + /*
> + * We want to get called pretty much straight away, just have
> + * to be sure that we jump back out to Linux so that if this
> + * very long we don't cause RCU or the scheduler to freak
> + */
> + schedule_timer(&flash->async_info.poller, msecs_to_tb(1));
> + return;
> + }
> +
> + flash_release(flash);
> + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, flash->async_info.token, rc);
> +}
> +
> static struct dt_node *flash_add_dt_node(struct flash *flash, int id)
> {
> struct dt_node *flash_node;
> @@ -295,6 +326,7 @@ int flash_register(struct blocklevel_device *bl)
> flash->size = size;
> flash->block_size = block_size;
> flash->id = num_flashes();
> + init_timer(&flash->async_info.poller, flash_poll, flash);
>
> list_add(&flashes, &flash->list);
>
> @@ -369,8 +401,18 @@ static int64_t opal_flash_op(enum flash_op op, uint64_t id, uint64_t offset,
> rc = blocklevel_raw_write(flash->bl, offset, (void *)buf, size);
> break;
> case FLASH_OP_ERASE:
> - rc = blocklevel_erase(flash->bl, offset, size);
> - break;
> + rc = blocklevel_erase_async(flash->bl, offset, size);
> + if (rc != FLASH_ASYNC_POLL)
> + break;
> + schedule_timer(&flash->async_info.poller, msecs_to_tb(1));
> + /*
> + * FLASH_OP_ERASE can now happen asynchronously, don't go out
> + * the regular path.
> + *
> + * Ensure we hold the lock to flash for the entirety of the
> + * async process
> + */
> + return OPAL_ASYNC_COMPLETION;
> default:
> assert(0);
> }
> diff --git a/libflash/blocklevel.c b/libflash/blocklevel.c
> index 9802ad0f..acda2066 100644
> --- a/libflash/blocklevel.c
> +++ b/libflash/blocklevel.c
> @@ -29,6 +29,10 @@
>
> #define PROT_REALLOC_NUM 25
>
> +#ifndef MIN
> +#define MIN(a, b) ((a) < (b) ? (a) : (b))
> +#endif
> +
> /* This function returns tristate values.
> * 1 - The region is ECC protected
> * 0 - The region is not ECC protected
> @@ -78,6 +82,46 @@ static int release(struct blocklevel_device *bl)
> return rc;
> }
>
> +int blocklevel_async_poll(struct blocklevel_device *bl)
> +{
> + uint64_t len;
> + int rc;
> +
> + if (!bl->async.active)
> + return FLASH_ERR_PARM_ERROR;
> +
> + /* Just chunk things up by erase granule */
> + len = MIN(bl->erase_mask + 1, bl->async.len);
> +
> + switch (bl->async.op) {
> + case READ:
> + case WRITE:
> + /*
> + * Very large caveat - do not remove this without auditing
> + * this entire file. smart_write() and smart_erase() will
> + * not 'just work' async. They will need work.
> + */
> + bl->async.buf += len;
How would async.op == READ/WRITE? I'd be inclinded to complain loudly and return
here as it should never happen.
> + return FLASH_ERR_PARM_ERROR;
> + case ERASE:
> + rc = bl->erase(bl, bl->async.pos, len);
> + break;
> + default:
> + return FLASH_ERR_PARM_ERROR;
> + }
> +
> + if (rc)
> + return rc;
> +
> + bl->async.pos += len;
> + bl->async.len -= len;
Wouldn't storing the end position be a little clearer (ie. bl->async.end)?
> + if (bl->async.len == 0)
> + bl->async.active = false;
> +
> + return bl->async.active ? FLASH_ASYNC_POLL : 0;
> +}
> +
> int blocklevel_raw_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len)
> {
> int rc;
> @@ -189,9 +233,9 @@ out:
> return rc;
> }
>
> -int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> +static int check_erase_params(struct blocklevel_device *bl, const char *caller,
> + uint64_t pos, uint64_t len)
> {
> - int rc;
> if (!bl || !bl->erase) {
> errno = EINVAL;
> return FLASH_ERR_PARM_ERROR;
> @@ -199,16 +243,48 @@ int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
>
> /* Programmer may be making a horrible mistake without knowing it */
> if (pos & bl->erase_mask) {
> - fprintf(stderr, "blocklevel_erase: pos (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> - pos, bl->erase_mask + 1);
> + FL_ERR("%s: pos (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> + caller, pos, bl->erase_mask + 1);
> + return FLASH_ERR_ERASE_BOUNDARY;
> }
>
> if (len & bl->erase_mask) {
> - fprintf(stderr, "blocklevel_erase: len (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> - len, bl->erase_mask + 1);
> + FL_ERR("%s: len (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
> + caller, len, bl->erase_mask + 1);
> return FLASH_ERR_ERASE_BOUNDARY;
> }
>
> + return 0;
> +}
> +
> +int blocklevel_erase_async(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> +{
> + int rc;
> +
> + rc = check_erase_params(bl, __func__, pos, len);
> + if (rc)
> + return rc;
> +
> + if (bl->async.active)
> + return FLASH_ERR_PARM_ERROR;
> +
> + bl->async.op = ERASE;
> + bl->async.pos = pos;
> + bl->async.len = len;
> + bl->async.active = true;
> +
> + /* Call the poll once to get everything going */
> + return blocklevel_async_poll(bl);
> +}
> +
> +int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
> +{
> + int rc;
> +
> + rc = check_erase_params(bl, __func__, pos, len);
> + if (rc)
> + return rc;
> +
> rc = reacquire(bl);
> if (rc)
> return rc;
> diff --git a/libflash/blocklevel.h b/libflash/blocklevel.h
> index ba42c83d..7079e2fc 100644
> --- a/libflash/blocklevel.h
> +++ b/libflash/blocklevel.h
> @@ -34,6 +34,20 @@ enum blocklevel_flags {
> WRITE_NEED_ERASE = 1,
> };
>
> +enum blocklevel_async_op {
> + READ = 1,
> + WRITE,
> + ERASE
> +};
> +
> +struct blocklevel_async {
> + bool active;
> + enum blocklevel_async_op op;
> + uint64_t pos;
> + uint64_t len;
> + void *buf;
> +};
> +
> /*
> * libffs may be used with different backends, all should provide these for
> * libflash to get the information it needs
> @@ -56,6 +70,8 @@ struct blocklevel_device {
> enum blocklevel_flags flags;
>
> struct blocklevel_range ecc_prot;
> +
> + struct blocklevel_async async;
> };
> int blocklevel_raw_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len);
> int blocklevel_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len);
> @@ -65,6 +81,9 @@ int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len);
> int blocklevel_get_info(struct blocklevel_device *bl, const char **name, uint64_t *total_size,
> uint32_t *erase_granule);
>
> +int blocklevel_async_poll(struct blocklevel_device *bl);
> +int blocklevel_erase_async(struct blocklevel_device *bl, uint64_t pos, uint64_t len);
> +
> /*
> * blocklevel_smart_write() performs reads on the data to see if it
> * can skip erase or write calls. This is likely more convenient for
> diff --git a/libflash/errors.h b/libflash/errors.h
> index 2f567211..deda7b87 100644
> --- a/libflash/errors.h
> +++ b/libflash/errors.h
Why are we moving this in this patch? Just as clean-up? If so it probably should
be a seperate patch.
> @@ -33,5 +33,19 @@
> #define FLASH_ERR_BAD_READ 15
> #define FLASH_ERR_DEVICE_GONE 16
> #define FLASH_ERR_AGAIN 17
> +#define FLASH_ASYNC_POLL 18
> +
> +#ifdef __SKIBOOT__
> +#include <skiboot.h>
> +#define FL_INF(fmt...) do { prlog(PR_INFO, fmt); } while(0)
> +#define FL_DBG(fmt...) do { prlog(PR_DEBUG, fmt); } while(0)
> +#define FL_ERR(fmt...) do { prlog(PR_ERR, fmt); } while(0)
> +#else
> +#include <stdio.h>
> +extern bool libflash_debug;
> +#define FL_DBG(fmt...) do { if (libflash_debug) printf(fmt); } while(0)
> +#define FL_INF(fmt...) do { printf(fmt); } while(0)
> +#define FL_ERR(fmt...) do { fprintf(stderr, fmt); } while(0)
> +#endif
>
> #endif /* __LIBFLASH_ERRORS_H */
> diff --git a/libflash/libflash.h b/libflash/libflash.h
> index 4fecfe75..ff3a9823 100644
> --- a/libflash/libflash.h
> +++ b/libflash/libflash.h
> @@ -28,19 +28,6 @@
> */
> #include <libflash/errors.h>
>
> -#ifdef __SKIBOOT__
> -#include <skiboot.h>
> -#define FL_INF(fmt...) do { prlog(PR_INFO, fmt); } while(0)
> -#define FL_DBG(fmt...) do { prlog(PR_DEBUG, fmt); } while(0)
> -#define FL_ERR(fmt...) do { prlog(PR_ERR, fmt); } while(0)
> -#else
> -#include <stdio.h>
> -extern bool libflash_debug;
> -#define FL_DBG(fmt...) do { if (libflash_debug) printf(fmt); } while(0)
> -#define FL_INF(fmt...) do { printf(fmt); } while(0)
> -#define FL_ERR(fmt...) do { printf(fmt); } while(0)
> -#endif
> -
> /* Flash chip, opaque */
> struct flash_chip;
> struct spi_flash_ctrl;
>
More information about the Skiboot
mailing list