[Skiboot] [PATCH 4/4] libflash/blocklevel: Backend agnostic blocklevel_erase_async()

Cyril Bur cyril.bur at au1.ibm.com
Thu Jun 29 09:38:04 AEST 2017


This patch provides a simple (although not particularly efficient)
asynchronous erase function. The advantage of this approach is that it
doesn't require any of the blocklevel backends to provide an
asynchronous implementation. This is also the disadvantage of this
implementation as all it actually does is break the work up in chunks
that it can performed quickly, but still synchronously. Only a backend
could provide a more asynchronous implementation.

The motivation behind this patch is two fold:
Firstly this starts a blocklevel async interface - which could easily be
extended to pass the async calls down to backends which provide async
implementation.
Secondly this solves a problem we have right now with the
opal_flash_erase call where it can block in Skiboot for around three
minutes. This causes a variety of problems in Linux due to a processor
being gone for a long time. For example:

[   98.610043] INFO: rcu_sched detected stalls on CPUs/tasks:
[   98.610050]         113-...: (1 GPs behind) idle=96f/140000000000000/0 softirq=527/528 fqs=1044
[   98.610051]         (detected by 112, t=2102 jiffies, g=223, c=222, q=123)
[   98.610060] Task dump for CPU 113:
[   98.610062] pflash          R  running task        0  3335   3333 0x00040004
[   98.610066] Call Trace:
[   98.610070] [c000001fdd847730] [0000000000000001] 0x1 (unreliable)
[   98.610076] [c000001fdd847900] [c000000000013854] __switch_to+0x1e8/0x1f4
[   98.610081] [c000001fdd847960] [c0000000006122c4] __schedule+0x32c/0x874
[   98.610083] [c000001fdd847a30] [c000001fdd847b40] 0xc000001fdd847b40

It is for this reason that breaking the work up in smaller chunks solves
this problem as Skiboot can return the CPU to Linux between chunks to
avoid Linux getting upset.

Reported-By: Samuel Mendoza-Jonas <sam at mendozajonas.com>
Signed-off-by: Cyril Bur <cyril.bur at au1.ibm.com>
---
 core/flash.c          | 46 +++++++++++++++++++++++++--
 libflash/blocklevel.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++----
 libflash/blocklevel.h | 19 +++++++++++
 libflash/errors.h     | 14 ++++++++
 libflash/libflash.h   | 13 --------
 5 files changed, 159 insertions(+), 21 deletions(-)

diff --git a/core/flash.c b/core/flash.c
index 177f7ae1..e8b9fc3c 100644
--- a/core/flash.c
+++ b/core/flash.c
@@ -28,6 +28,13 @@
 #include <libstb/stb.h>
 #include <libstb/container.h>
 #include <elf.h>
+#include <timer.h>
+#include <timebase.h>
+
+struct flash_async_info {
+	struct timer poller;
+	uint64_t token;
+};
 
 struct flash {
 	struct list_node	list;
@@ -36,6 +43,7 @@ struct flash {
 	uint64_t		size;
 	uint32_t		block_size;
 	int			id;
+	struct flash_async_info async_info;
 };
 
 static LIST_HEAD(flashes);
@@ -200,6 +208,29 @@ static int flash_nvram_probe(struct flash *flash, struct ffs_handle *ffs)
 
 /* core flash support */
 
+/*
+ * Called with flash lock held, drop it on async completion
+ */
+static void flash_poll(struct timer *t __unused, void *data, uint64_t now __unused)
+{
+	struct flash *flash = data;
+	int rc;
+
+	rc = blocklevel_async_poll(flash->bl);
+	if (rc == FLASH_ASYNC_POLL) {
+		/*
+		 * We want to get called pretty much straight away, just have
+		 * to be sure that we jump back out to Linux so that if this
+		 * very long we don't cause RCU or the scheduler to freak
+		 */
+		schedule_timer(&flash->async_info.poller, msecs_to_tb(1));
+		return;
+	}
+
+	flash_release(flash);
+	opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, flash->async_info.token, rc);
+}
+
 static struct dt_node *flash_add_dt_node(struct flash *flash, int id)
 {
 	struct dt_node *flash_node;
@@ -295,6 +326,7 @@ int flash_register(struct blocklevel_device *bl)
 	flash->size = size;
 	flash->block_size = block_size;
 	flash->id = num_flashes();
+	init_timer(&flash->async_info.poller, flash_poll, flash);
 
 	list_add(&flashes, &flash->list);
 
@@ -369,8 +401,18 @@ static int64_t opal_flash_op(enum flash_op op, uint64_t id, uint64_t offset,
 		rc = blocklevel_raw_write(flash->bl, offset, (void *)buf, size);
 		break;
 	case FLASH_OP_ERASE:
-		rc = blocklevel_erase(flash->bl, offset, size);
-		break;
+		rc = blocklevel_erase_async(flash->bl, offset, size);
+		if (rc != FLASH_ASYNC_POLL)
+			break;
+		schedule_timer(&flash->async_info.poller, msecs_to_tb(1));
+		/*
+		 * FLASH_OP_ERASE can now happen asynchronously, don't go out
+		 * the regular path.
+		 *
+		 * Ensure we hold the lock to flash for the entirety of the
+		 * async process
+		 */
+		return OPAL_ASYNC_COMPLETION;
 	default:
 		assert(0);
 	}
diff --git a/libflash/blocklevel.c b/libflash/blocklevel.c
index 9802ad0f..acda2066 100644
--- a/libflash/blocklevel.c
+++ b/libflash/blocklevel.c
@@ -29,6 +29,10 @@
 
 #define PROT_REALLOC_NUM 25
 
+#ifndef MIN
+#define MIN(a, b)	((a) < (b) ? (a) : (b))
+#endif
+
 /* This function returns tristate values.
  * 1  - The region is ECC protected
  * 0  - The region is not ECC protected
@@ -78,6 +82,46 @@ static int release(struct blocklevel_device *bl)
 	return rc;
 }
 
+int blocklevel_async_poll(struct blocklevel_device *bl)
+{
+	uint64_t len;
+	int rc;
+
+	if (!bl->async.active)
+		return FLASH_ERR_PARM_ERROR;
+
+	/* Just chunk things up by erase granule */
+	len = MIN(bl->erase_mask + 1, bl->async.len);
+
+	switch (bl->async.op) {
+		case READ:
+		case WRITE:
+			/*
+			 * Very large caveat - do not remove this without auditing
+			 * this entire file. smart_write() and smart_erase() will
+			 * not 'just work' async. They will need work.
+			 */
+			bl->async.buf += len;
+			return FLASH_ERR_PARM_ERROR;
+		case ERASE:
+			rc = bl->erase(bl, bl->async.pos, len);
+			break;
+		default:
+			return FLASH_ERR_PARM_ERROR;
+	}
+
+	if (rc)
+		return rc;
+
+	bl->async.pos += len;
+	bl->async.len -= len;
+
+	if (bl->async.len == 0)
+		bl->async.active = false;
+
+	return bl->async.active ? FLASH_ASYNC_POLL : 0;
+}
+
 int blocklevel_raw_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len)
 {
 	int rc;
@@ -189,9 +233,9 @@ out:
 	return rc;
 }
 
-int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
+static int check_erase_params(struct blocklevel_device *bl, const char *caller,
+		uint64_t pos, uint64_t len)
 {
-	int rc;
 	if (!bl || !bl->erase) {
 		errno = EINVAL;
 		return FLASH_ERR_PARM_ERROR;
@@ -199,16 +243,48 @@ int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
 
 	/* Programmer may be making a horrible mistake without knowing it */
 	if (pos & bl->erase_mask) {
-		fprintf(stderr, "blocklevel_erase: pos (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
-				pos, bl->erase_mask + 1);
+		FL_ERR("%s: pos (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
+				caller, pos, bl->erase_mask + 1);
+		return FLASH_ERR_ERASE_BOUNDARY;
 	}
 
 	if (len & bl->erase_mask) {
-		fprintf(stderr, "blocklevel_erase: len (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
-				len, bl->erase_mask + 1);
+		FL_ERR("%s: len (0x%"PRIx64") is not erase block (0x%08x) aligned\n",
+				caller, len, bl->erase_mask + 1);
 		return FLASH_ERR_ERASE_BOUNDARY;
 	}
 
+	return 0;
+}
+
+int blocklevel_erase_async(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
+{
+	int rc;
+
+	rc = check_erase_params(bl, __func__, pos, len);
+	if (rc)
+		return rc;
+
+	if (bl->async.active)
+		return FLASH_ERR_PARM_ERROR;
+
+	bl->async.op = ERASE;
+	bl->async.pos = pos;
+	bl->async.len = len;
+	bl->async.active = true;
+
+	/* Call the poll once to get everything going */
+	return blocklevel_async_poll(bl);
+}
+
+int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len)
+{
+	int rc;
+
+	rc = check_erase_params(bl, __func__, pos, len);
+	if (rc)
+		return rc;
+
 	rc = reacquire(bl);
 	if (rc)
 		return rc;
diff --git a/libflash/blocklevel.h b/libflash/blocklevel.h
index ba42c83d..7079e2fc 100644
--- a/libflash/blocklevel.h
+++ b/libflash/blocklevel.h
@@ -34,6 +34,20 @@ enum blocklevel_flags {
 	WRITE_NEED_ERASE = 1,
 };
 
+enum blocklevel_async_op {
+	READ = 1,
+	WRITE,
+	ERASE
+};
+
+struct blocklevel_async {
+	bool active;
+	enum blocklevel_async_op op;
+	uint64_t pos;
+	uint64_t len;
+	void *buf;
+};
+
 /*
  * libffs may be used with different backends, all should provide these for
  * libflash to get the information it needs
@@ -56,6 +70,8 @@ struct blocklevel_device {
 	enum blocklevel_flags flags;
 
 	struct blocklevel_range ecc_prot;
+
+	struct blocklevel_async async;
 };
 int blocklevel_raw_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len);
 int blocklevel_read(struct blocklevel_device *bl, uint64_t pos, void *buf, uint64_t len);
@@ -65,6 +81,9 @@ int blocklevel_erase(struct blocklevel_device *bl, uint64_t pos, uint64_t len);
 int blocklevel_get_info(struct blocklevel_device *bl, const char **name, uint64_t *total_size,
 		uint32_t *erase_granule);
 
+int blocklevel_async_poll(struct blocklevel_device *bl);
+int blocklevel_erase_async(struct blocklevel_device *bl, uint64_t pos, uint64_t len);
+
 /*
  * blocklevel_smart_write() performs reads on the data to see if it
  * can skip erase or write calls. This is likely more convenient for
diff --git a/libflash/errors.h b/libflash/errors.h
index 2f567211..deda7b87 100644
--- a/libflash/errors.h
+++ b/libflash/errors.h
@@ -33,5 +33,19 @@
 #define FLASH_ERR_BAD_READ		15
 #define FLASH_ERR_DEVICE_GONE	16
 #define FLASH_ERR_AGAIN	17
+#define FLASH_ASYNC_POLL 18
+
+#ifdef __SKIBOOT__
+#include <skiboot.h>
+#define FL_INF(fmt...) do { prlog(PR_INFO, fmt);  } while(0)
+#define FL_DBG(fmt...) do { prlog(PR_DEBUG, fmt); } while(0)
+#define FL_ERR(fmt...) do { prlog(PR_ERR, fmt);   } while(0)
+#else
+#include <stdio.h>
+extern bool libflash_debug;
+#define FL_DBG(fmt...) do { if (libflash_debug) printf(fmt); } while(0)
+#define FL_INF(fmt...) do { printf(fmt); } while(0)
+#define FL_ERR(fmt...) do { fprintf(stderr, fmt); } while(0)
+#endif
 
 #endif /* __LIBFLASH_ERRORS_H */
diff --git a/libflash/libflash.h b/libflash/libflash.h
index 4fecfe75..ff3a9823 100644
--- a/libflash/libflash.h
+++ b/libflash/libflash.h
@@ -28,19 +28,6 @@
  */
 #include <libflash/errors.h>
 
-#ifdef __SKIBOOT__
-#include <skiboot.h>
-#define FL_INF(fmt...) do { prlog(PR_INFO, fmt);  } while(0)
-#define FL_DBG(fmt...) do { prlog(PR_DEBUG, fmt); } while(0)
-#define FL_ERR(fmt...) do { prlog(PR_ERR, fmt);   } while(0)
-#else
-#include <stdio.h>
-extern bool libflash_debug;
-#define FL_DBG(fmt...) do { if (libflash_debug) printf(fmt); } while(0)
-#define FL_INF(fmt...) do { printf(fmt); } while(0)
-#define FL_ERR(fmt...) do { printf(fmt); } while(0)
-#endif
-
 /* Flash chip, opaque */
 struct flash_chip;
 struct spi_flash_ctrl;
-- 
2.13.2



More information about the Skiboot mailing list