[Skiboot] [PATCH V3 2/2] core/pldm: Register PLDM as a blocklevel device

Sat Apr 15 01:00:54 AEST 2023

On 29/04/2022 11:46, Christophe Lombard wrote:
> In the same way that ipmi-hiomap implements the PNOR access control
> protocol, this patch allows to "virtualize" the content of a BMC flash
> based on lid files.
> Previously, flash PNOR partitions were viewed this way:
>    partitionXX=NAME, start address, end address, flags
> 
> The content of each partition is now stored in a lid file. In order to
> continue to use the libflash library, we manually fill in the contents of
> a fake flash header when accessing offset 0. This reproduces the behavior
> via ipmi-hiomap of reading the flash header on the BMC.
> 
> For the reading and writing of BMC lids files, we convert the virtual
> addresses of these 'fake' partitions by identifying: lid id.
> 
> Signed-off-by: Christophe Lombard <clombard at linux.vnet.ibm.com>
> ---
>   core/pldm/pldm-lid-files.c | 226 +++++++++++++++++++++++++++++++++++++
>   include/pldm.h             |   5 +
>   2 files changed, 231 insertions(+)
> 
> diff --git a/core/pldm/pldm-lid-files.c b/core/pldm/pldm-lid-files.c
> index 11e2f9e2..e8122268 100644
> --- a/core/pldm/pldm-lid-files.c
> +++ b/core/pldm/pldm-lid-files.c
> @@ -25,6 +25,14 @@ struct pldm_lid {
>   
>   static LIST_HEAD(lid_files);
>   
> +struct pldm_ctx_data {
> +	/* Members protected by the blocklevel lock */
> +	struct blocklevel_device bl;
> +	uint32_t total_size;
> +	uint32_t erase_granule;
> +	struct lock lock;
> +};
> +
>   #define MEGABYTE (1024*1024)
>   
>   /*
> @@ -34,6 +42,17 @@ static LIST_HEAD(lid_files);
>    */
>   #define VMM_SIZE_RESERVED_PER_SECTION (16 * MEGABYTE)
>   
> +/* The version of this partition implementation */
> +#define FFS_VERSION_1 1
> +
> +/* Magic number for the partition header (ASCII 'PART') */
> +#define FFS_MAGIC 0x50415254
> +
> +/* pid of logical partitions/containers */
> +#define FFS_PID_TOPLEVEL 0xFFFFFFFF

Those 3 FFS_* macros are already defined and included from 
libflash/ffs.h and can simply be removed.

> +
> +#define ERASE_GRANULE_DEF 0x1000
> +
>   /*
>    * Print the attributes of lid files.
>    */
> @@ -143,8 +162,195 @@ out:
>   	return rc;
>   }
>   
> +static uint32_t checksum(void *data, size_t size)
> +{
> +	uint32_t i, csum = 0;
> +
> +	for (i = csum = 0; i < (size/4); i++)
> +		csum ^= ((uint32_t *)data)[i];
> +	return csum;
> +}
> +
> +/* Helper functions for typesafety and size safety */
> +static uint32_t hdr_checksum(struct __ffs_hdr *hdr)
> +{
> +	return checksum(hdr, sizeof(struct __ffs_hdr));
> +}
> +
> +static uint32_t entry_checksum(struct __ffs_entry *ent)
> +{
> +	return checksum(ent, sizeof(struct __ffs_entry));
> +}
> +
> +/*
> + * Fill __ffs structures in order to return a 'fake' header flash
> + */
> +static int lid_ids_to_header_flash(void *buf, uint64_t len)
> +{
> +	struct __ffs_entry *entry;
> +	struct __ffs_hdr *raw_hdr;
> +	struct pldm_lid *lid = NULL;
> +	uint32_t count, part_id, i;
> +	uint32_t block_size;
> +
> +	/* number of lid files */
> +	count = get_lids_count();
> +
> +	/* last member of struct __ffs_hdr is a flexible array member */
> +	raw_hdr = zalloc(sizeof(struct __ffs_hdr) + (count * sizeof(struct __ffs_entry)));

We're going to do this each time the flash header is read at offset 0, 
which seems to be done many times. It should only be done once at init, 
which would also avoid leaking memory (we allocate on every invocation 
and never free)

> +
> +	/* complete header flash */
> +	block_size = sizeof(struct __ffs_hdr) + (sizeof(struct __ffs_entry) * count);
> +	block_size = ALIGN_UP(block_size, 1 << 13);

We currently have 44 entries on rainier, and the block size, before 
alignment, is 5680 bytes. So we align up on a 8k boundary and the rest 
of the code ends up working ok. However, it is risky if the number of 
lids grow and we go over 8k (actually, I think 16k would still be ok 
with the code below, but problems would start if we reach 24k)

Why not pick a block size, say 8k, and do the maths based on it?

> +
> +	raw_hdr->magic = cpu_to_be32(FFS_MAGIC);
> +	raw_hdr->version = cpu_to_be32(FFS_VERSION_1);
> +	raw_hdr->size = cpu_to_be32(0x1);

... which would matter here...

> +	raw_hdr->entry_size = cpu_to_be32(sizeof(struct __ffs_entry));
> +	raw_hdr->entry_count = cpu_to_be32(count);
> +	raw_hdr->block_size = cpu_to_be32(block_size);
> +	raw_hdr->block_count = cpu_to_be32(0x4000);

... and here (I don't understand the 0x4000, it seems completely bogus)

> +	raw_hdr->checksum = hdr_checksum(raw_hdr);
> +
> +	lid = list_top(&lid_files, struct pldm_lid, list);
> +	part_id = 1;
> +
> +	for (i = 0; i < count; i++) {
> +		entry = &raw_hdr->entries[i];
> +
> +		strcpy(entry->name, lid->name);
> +		entry->base = cpu_to_be32(lid->start / block_size);
> +		entry->size = cpu_to_be32(lid->length / block_size);

... and for the 2 line above. And that's where it really matters: if the 
header ever go over 8k, we may not have round numbers here. We also need 
to pick VMM_SIZE_RESERVED_PER_SECTION as a multiple of the block size.

Also, for the entry->size, I think we need to round up otherwise we can 
truncate.

> +		entry->pid = cpu_to_be32(FFS_PID_TOPLEVEL);
> +		entry->id = cpu_to_be32(part_id);
> +		entry->type = cpu_to_be32(0x1);
> +		entry->flags = cpu_to_be32(0x0);
> +		entry->actual = cpu_to_be32(lid->length);
> +		entry->checksum = entry_checksum(entry);
> +
> +		lid = list_next(&lid_files, lid, list);
> +		part_id++;
> +	}
> +
> +	memcpy(buf, raw_hdr, len);
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +/*
> + * Search lid member from the virtual address.
> + */
> +static struct pldm_lid *vaddr_to_lid_id(uint64_t pos)
> +{
> +	struct pldm_lid *lid = NULL;
> +
> +	list_for_each(&lid_files, lid, list)
> +		if ((pos >= lid->start) && (pos < lid->start + VMM_SIZE_RESERVED_PER_SECTION))
> +			break;
> +
> +	return lid;
> +}
> +
> +static int lid_files_read(struct blocklevel_device *bl __unused,
> +			  uint64_t pos, void *buf, uint64_t len)
> +{
> +	struct pldm_lid *lid;
> +	uint64_t offset;
> +	int rc = 0;
> +
> +	/* LPC is only 32bit */
> +	if (pos > UINT_MAX || len > UINT_MAX)
> +		return FLASH_ERR_PARM_ERROR;

if (pos > UINT_MAX || (pos + len) > UINT_MAX)

> +
> +	prlog(PR_TRACE, "lid files read at 0x%llx for 0x%llx\n", pos, len);
> +
> +	if (pos == 0) {
> +		/* return a 'fake' header flash */
> +		rc = lid_ids_to_header_flash(buf, len);

A side effect of creating the header only once during init is that we 
can read at any offset in the header and not only at offset 0. Not sure 
it would be useful, but we have no reason to fail if it's a valid range 
within the header.

> +	} else {
> +		/* convert offset to lid id */
> +		lid = vaddr_to_lid_id(pos);
> +		if (!lid)
> +			return OPAL_PARAMETER;
> +
> +		/* read lid file */
> +		offset = pos - lid->start;
> +		rc = pldm_file_io_read_file(lid->handle, lid->length, buf, offset, len);
> +	}
> +
> +	return rc;
> +}
> +
> +static int lid_files_write(struct blocklevel_device *bl __unused,
> +			   uint64_t pos, const void *buf, uint64_t len)
> +{
> +	struct pldm_lid *lid;
> +	uint64_t offset;
> +
> +	/* LPC is only 32bit */
> +	if (pos > UINT_MAX || len > UINT_MAX)

Same as above.

> +		return FLASH_ERR_PARM_ERROR;
> +
> +	prlog(PR_TRACE, "lid files write at 0x%llx for 0x%llx\n", pos, len);
> +
> +	/* convert offset to lid id */
> +	lid = vaddr_to_lid_id(pos);
> +	if (!lid)
> +		return OPAL_PARAMETER;
> +
> +	/* write lid file */
> +	offset = pos - lid->start;
> +	return pldm_file_io_write_file(lid->handle, buf, offset, len);
> +}
> +
> +static int lid_files_erase(struct blocklevel_device *bl __unused,
> +			   uint64_t pos, uint64_t len)
> +{
> +
> +	prlog(PR_TRACE, "lid files erase at 0x%llx for 0x%llx\n", pos, len);

Since we don't support it, PR_ERR instead of PR_TRACE?

> +	return OPAL_UNSUPPORTED;
> +}
> +
> +static int get_lid_files_info(struct blocklevel_device *bl,
> +			      const char **name, uint64_t *total_size,
> +			      uint32_t *erase_granule)
> +{
> +	struct pldm_ctx_data *ctx;
> +
> +	ctx = container_of(bl, struct pldm_ctx_data, bl);
> +	ctx->bl.erase_mask = ctx->erase_granule - 1;
> +
> +	if (name)
> +		*name = NULL;
> +	if (total_size)
> +		*total_size = ctx->total_size;
> +	if (erase_granule)
> +		*erase_granule = ctx->erase_granule;
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +bool pldm_lid_files_exit(struct blocklevel_device *bl)
> +{
> +	struct pldm_ctx_data *ctx;
> +	struct pldm_lid *lid;
> +	bool status = true;

That 'status' variable doesn't seem to be useful and could be dropped.

   Fred