[Skiboot] [PATCH 2/2] Asynchronous LID/Resource loading for FSP systems

Mahesh Jagannath Salgaonkar mahesh at linux.vnet.ibm.com
Tue Mar 31 05:32:07 AEDT 2015


On 03/24/2015 09:01 AM, Stewart Smith wrote:
> This moves away from using fsp_sync_msg in fsp_fetch_data and instead
> using the platform hooks for start_preload_resource() to actually queue
> up a load and having the plumbing for checking if a resource is loaded yet.
> 
> This gets rid of the "pollers called with locks held" warning we got
> heaps of previously. You can now boot some FSP systems without getting
> this warning at all.
> 
> This also sets the stage for starting load of LIDs much earlier to when
> they're needed, improving boot time.
> 
> Signed-off-by: Stewart Smith <stewart at linux.vnet.ibm.com>
> ---
>  core/hostservices.c         |    2 +-
>  core/vpd.c                  |    2 +-
>  hdata/test/stubs.c          |    2 +-
>  hw/fsp/fsp.c                |  378 +++++++++++++++++++++++++++++++------------
>  include/fsp.h               |    2 +
>  platforms/ibm-fsp/apollo.c  |    1 +
>  platforms/ibm-fsp/firenze.c |    1 +
>  7 files changed, 281 insertions(+), 107 deletions(-)
> 
> diff --git a/core/hostservices.c b/core/hostservices.c
> index 22d34e4..a2289c9 100644
> --- a/core/hostservices.c
> +++ b/core/hostservices.c
> @@ -415,7 +415,7 @@ static int __hservice_lid_load(uint32_t lid, void **buf, size_t *len)
>  	 */
>  	*buf = malloc(HBRT_LOAD_LID_SIZE);
>  	*len = HBRT_LOAD_LID_SIZE;
> -	rc = fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0, *buf, len);
> +	rc = fsp_load_lid(lid, *buf, len);
>  	if (rc != 0)
>  		/* Take advantage of realloc corner case here. */
>  		*len = 0;
> diff --git a/core/vpd.c b/core/vpd.c
> index deb552c..0e6f83b 100644
> --- a/core/vpd.c
> +++ b/core/vpd.c
> @@ -155,7 +155,7 @@ static void *vpd_lid_load(const uint8_t *lx, uint8_t lxrn, size_t *size)
>  	*size = VPD_LID_MAX_SIZE;
>  
>  	/* Load it from the FSP */
> -	rc = fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid_no, 0, data, size);
> +	rc = fsp_load_lid(lid_no, data, size);
>  	if (rc) {
>  		prerror("VPD: Error %d loading VPD LID\n", rc);
>  		goto fail;
> diff --git a/hdata/test/stubs.c b/hdata/test/stubs.c
> index c7dae5f..d0a8ef1 100644
> --- a/hdata/test/stubs.c
> +++ b/hdata/test/stubs.c
> @@ -54,7 +54,7 @@ STUB(dt_next);
>  STUB(dt_has_node_property);
>  STUB(dt_get_address);
>  STUB(op_display);
> -STUB(fsp_fetch_data);
> +STUB(fsp_load_lid);
>  STUB(get_ics_phandle);
>  STUB(get_psi_interrupt);
>  STUB(fsp_adjust_lid_side);
> diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
> index 17d73b5..e5a3c1d 100644
> --- a/hw/fsp/fsp.c
> +++ b/hw/fsp/fsp.c
> @@ -38,6 +38,7 @@
>  #include <opal.h>
>  #include <opal-api.h>
>  #include <opal-msg.h>
> +#include <ccan/list/list.h>
>  
>  DEFINE_LOG_ENTRY(OPAL_RC_FSP_POLL_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_FSP,
>  		 OPAL_PLATFORM_FIRMWARE, OPAL_ERROR_PANIC, OPAL_NA, NULL);
> @@ -2144,88 +2145,36 @@ uint32_t fsp_adjust_lid_side(uint32_t lid_no)
>  	return lid_no;
>  }
>  
> -int fsp_fetch_data(uint8_t flags, uint16_t id, uint32_t sub_id,
> -		   uint32_t offset, void *buffer, size_t *length)
> -{
> -	uint32_t total, remaining = *length;
> -	uint64_t baddr;
> -	uint64_t balign, boff, bsize;
> -	struct fsp_msg *msg;
> -	static struct lock fsp_fetch_lock = LOCK_UNLOCKED;
> -
> -	*length = total = 0;
> -
> -	if (!fsp_present())
> -		return -ENODEV;
> -
> -	prlog(PR_DEBUG, "FSP: Fetch data id: %02x sid: %08x to %p"
> -	      "(0x%x bytes)\n",
> -	      id, sub_id, buffer, remaining);
> -
> -	/*
> -	 * Use a lock to avoid multiple processors trying to fetch
> -	 * at the same time and colliding on the TCE space
> -	 */
> -	lock(&fsp_fetch_lock);
> -
> -	while(remaining) {
> -		uint32_t chunk, taddr, woffset, wlen;
> -		uint8_t rc;
> -
> -		/* Calculate alignment skew */
> -		baddr = (uint64_t)buffer;
> -		balign = baddr & ~TCE_MASK;
> -		boff = baddr & TCE_MASK;
> -
> -		/* Get a chunk */
> -		chunk = remaining;
> -		if (chunk > (PSI_DMA_FETCH_SIZE - boff))
> -			chunk = PSI_DMA_FETCH_SIZE - boff;
> -		bsize = ((boff + chunk) + TCE_MASK) & ~TCE_MASK;
> -
> -		prlog(PR_DEBUG, "FSP:  0x%08x bytes balign=%llx"
> -		      " boff=%llx bsize=%llx\n",
> -		      chunk, balign, boff, bsize);
> -		fsp_tce_map(PSI_DMA_FETCH, (void *)balign, bsize);
> -		taddr = PSI_DMA_FETCH + boff;
> -		msg = fsp_mkmsg(FSP_CMD_FETCH_SP_DATA, 6,
> -				flags << 16 | id, sub_id, offset,
> -				0, taddr, chunk);
> -		rc = fsp_sync_msg(msg, false);
> -		fsp_tce_unmap(PSI_DMA_FETCH, bsize);
> -
> -		woffset = msg->resp->data.words[1];
> -		wlen = msg->resp->data.words[2];
> -		prlog(PR_DEBUG, "FSP:   -> rc=0x%02x off: %08x"
> -		      " twritten: %08x\n",
> -		      rc, woffset, wlen);
> -		fsp_freemsg(msg);
> -
> -		/* XXX Is flash busy (0x3f) a reason for retry ? */
> -		if (rc != 0 && rc != 2) {
> -			unlock(&fsp_fetch_lock);
> -			return -EIO;
> -		}
> -
> -		remaining -= wlen;
> -		total += wlen;
> -		buffer += wlen;
> -		offset += wlen;
> -
> -		/* The doc seems to indicate that we get rc=2 if there's
> -		 * more data and rc=0 if we reached the end of file, but
> -		 * it looks like I always get rc=0, so let's consider
> -		 * an EOF if we got less than what we asked
> -		 */
> -		if (wlen < chunk)
> -			break;
> -	}
> -	unlock(&fsp_fetch_lock);
> +struct fsp_fetch_lid_item {
> +	enum resource_id id;
> +	uint32_t idx;
>  
> -	*length = total;
> +	uint32_t lid;
> +	uint32_t lid_no;
> +	uint64_t bsize;
> +	uint32_t offset;
> +	void *buffer;
> +	size_t *length;
> +	size_t remaining;
> +	struct list_node link;
> +	int result;
> +};
>  
> -	return 0;
> -}
> +/*
> + * We have a queue of things to fetch
> + * when fetched, it moves to fsp_fetched_lid until we're asked if it
> + * has been fetched, in which case it's free()d.
> + *
> + * Everything is protected with fsp_fetch_lock.
> + *
> + * We use PSI_DMA_FETCH TCE entry for this fetching queue. If something
> + * is in the fsp_fetch_lid_queue, it means we're using this TCE entry!
> + *
> + * If we add the first entry to fsp_fetch_lid_queue, we trigger fetching!
> + */
> +static LIST_HEAD(fsp_fetch_lid_queue);
> +static LIST_HEAD(fsp_fetched_lid);
> +static struct lock fsp_fetch_lock = LOCK_UNLOCKED;
>  
>  /*
>   * Asynchronous fsp fetch data call
> @@ -2275,12 +2224,156 @@ static struct {
>  	{ RESOURCE_ID_CAPP,	CAPP_IDX_VENICE_DD20,	0x80a02004 },
>  };
>  
> +static void fsp_start_fetching_next_lid(void);
> +static void fsp_fetch_lid_next_chunk(struct fsp_fetch_lid_item *last);
> +
> +static void fsp_fetch_lid_complete(struct fsp_msg *msg)
> +{
> +	struct fsp_fetch_lid_item *last;
> +	uint32_t woffset, wlen;
> +	uint8_t rc;
> +
> +	lock(&fsp_fetch_lock);
> +	last = list_top(&fsp_fetch_lid_queue, struct fsp_fetch_lid_item, link);
> +	fsp_tce_unmap(PSI_DMA_FETCH, last->bsize);
> +
> +	woffset = msg->resp->data.words[1];
> +	wlen = msg->resp->data.words[2];
> +	rc = (msg->resp->word1 >> 8) & 0xff;
> +
> +	/* Fall back to a PHYP LID for kernel loads */
> +	if (rc && last->lid_no == KERNEL_LID_OPAL) {
> +		const char *ltype = dt_prop_get_def(dt_root, "lid-type", NULL);
> +		if (!ltype || strcmp(ltype, "opal")) {
> +			prerror("Failed to load in OPAL mode...\n");
> +			last->result = OPAL_PARAMETER;
> +			last = list_pop(&fsp_fetch_lid_queue,
> +					struct fsp_fetch_lid_item, link);
> +			list_add_tail(&fsp_fetched_lid, &last->link);
> +			fsp_start_fetching_next_lid();
> +			unlock(&fsp_fetch_lock);
> +			return;
> +		}
> +		printf("Trying to load as PHYP LID...\n");
> +		last->lid = KERNEL_LID_PHYP;
> +		/* Retry with different LID */
> +		fsp_fetch_lid_next_chunk(last);
> +	}
> +


> +	if (rc !=0 && rc != 2) {
> +		last->result = -EIO;
> +		last = list_pop(&fsp_fetch_lid_queue, struct fsp_fetch_lid_item, link);
> +		list_add_tail(&fsp_fetched_lid, &last->link);
> +		fsp_start_fetching_next_lid();
> +		unlock(&fsp_fetch_lock);
> +		return;
> +	}
> +
> +	if (rc == 0)
> +		last->result = OPAL_SUCCESS;

It looks like FSP don't behave as per the documentation :-(. We always
get rc=0 irrespective of whether end-of-file is reached or not. This
cause partial LID load resulting into opal hang while booting petitboot
kernel.

I see that this patch is already in skiboot git tree, hence just sent
out a fix patch that fixes the partial loading of LID.

https://lists.ozlabs.org/pipermail/skiboot/2015-March/000887.html

Thanks,
-Mahesh.



More information about the Skiboot mailing list