[Skiboot] [PATCH v5 3/8] skiboot: Find the IMC DTB

Hemant Kumar hemant at linux.vnet.ibm.com
Wed Feb 15 22:42:32 AEDT 2017


Hi Oliver,

Apologies for the delay in the reply.

On 02/10/2017 12:28 PM, Oliver O'Halloran wrote:
> On Wed, 2017-02-08 at 02:41 +0530, Hemant Kumar wrote:
>> IMC (In Memory Collection) catalog is a repository of information
>> about the Performance Monitoring Units (PMUs) and their events under
>> the IMC infrastructure. The information include :
>>   - The PMU names
>>   - Event names
>>   - Event description
>>   - Event offsets
>>   - Event scale
>>   - Event unit
>>
>> The catalog is provided as a flattened device tree (dtb). Processors
>> with different PVR values may have different PMU or event names.
>> Hence,
>> for each processor, there can be multiple device tree binaries (dtbs)
>> containing the IMC information. Each of the dtb is compressed and
>> forms
>> a sub-partition inside the PNOR partition "IMA_CATALOG". Here is a
>> link
>> to the commit adding this partition to PNOR :
>> https://github.com/open-power/pnor/commit/c940142c6dc64dd176096dc648f
>> 433c889919e84
>>
>> So, each compressed dtb forms a sub-partition inside the IMA_CATALOG
>> partition and can be accessed/loaded through a sub-partition id which
>> is nothing but the PVR id. Based on the current processor's PVR, the
>> appropriate sub-partion will be loaded.
>>
>>   IMA_CATALOG
>>    partition
>>   -----------   Sub-id (E.g)
>>   |         |
>>   |Catalog 1|    0x100
>>   |---------|
>>   |         |
>>   |Catalog 2|    0x200    <------  Current processor's PVR (0x200)
>>   |---------|
>>   ...
>>
>> In the above example, if the current processor's PVR is 0x200,
>> catalog 2
>> should be loaded.
>>
>> Note however, that the catalog information is in the form of a dtb
>> and
>> the dtb is compressed too. So, the sub-partition loaded must be
>> decompressed first before we can actually use it (which is done in
>> subsequent patches).
>>
>> It is important to mention here that while a PNOR image built for one
>> processor is specific to only that processor and isn't portable, a
>> single system generation (Processor version) may have multiple
>> revisions
>> and these revisions may have some changes in their IMC PMUs and
>> events,
>> and hence, the need for multiple IMC DTBs.
>>
>> The sub-partition that we obtain from the IMA_CATALOG partition is a
>> compressed device tree binary. We uncompress it using the libxz's
>> functions. After uncompressing it, we link the device tree binary to
>> the
>> system's device tree. The kernel can now access the device tree and
>> get
>> the IMC PMUs and their events' information.
>>
>> Not all the IMC PMUs listed in the device tree may be available. This
>> is
>> indicated by imc availability vector (which is a part of the IMC
>> control
>> block structure). We need to check this vector and make sure to
>> remove
>> the IMC device nodes which are unavailable.
>>
>> Signed-off-by: Hemant Kumar <hemant at linux.vnet.ibm.com>
>> ---
>> Chaneglog:
>> v2 -> v3:
>>   - Clubbed patch 2 and patch 4 from v2 into one.
>>   - Changed references from IMA to IMC.
>>   - Uses proc_gen now to check for power9.
>>   - Uses macro MAX_AVL instead of 48.
>>   - Reworked a segment of the code to disable the unavailable nest
>> units
>>     first and then attach the final IMC device tree to the system dt.
>>   - Created a new helper get_imc_cb() to fetch the nest IMC control
>> block
>>     struct.
>>   - Use of be64_to_cpu() to read data from control block.
>> v1 -> v2:
>>   - Moved the macro checks for power8 to a separate function
>> is_power8().
>>   - Added a new function disable_unavailable_units() to check the IMA
>>     availability vector and remove the nest PMU nodes from the device
>>     tree.
>>
>>   core/flash.c       |   1 +
>>   core/init.c        |   4 ++
>>   hw/Makefile.inc    |   2 +-
>>   hw/imc.c           | 188
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   include/imc.h      |   2 +
>>   include/platform.h |   1 +
>>   6 files changed, 197 insertions(+), 1 deletion(-)
>>   create mode 100644 hw/imc.c
>>
>> diff --git a/core/flash.c b/core/flash.c
>> index 92de421..32b32dc 100644
>> --- a/core/flash.c
>> +++ b/core/flash.c
>> @@ -416,6 +416,7 @@ static struct {
>>   	{ RESOURCE_ID_KERNEL,	RESOURCE_SUBID_NONE,		
>> "BOOTKERNEL" },
>>   	{ RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE,		
>> "ROOTFS" },
>>   	{ RESOURCE_ID_CAPP,	RESOURCE_SUBID_SUPPORTED,	
>> "CAPP" },
>> +	{ RESOURCE_ID_CATALOG,  RESOURCE_SUBID_SUPPORTED,	"IM
>> A_CATALOG" },
> I suppose it's too late to rename this to IMC_CATALOG?

Yeah, its too late now.

>>   };
>>   
>>   /* This mimics the hostboot SBE format */
>> diff --git a/core/init.c b/core/init.c
>> index 43ce3a0..07f4568 100644
>> --- a/core/init.c
>> +++ b/core/init.c
>> @@ -47,6 +47,7 @@
>>   #include <nvram.h>
>>   #include <libstb/stb.h>
>>   #include <libstb/container.h>
>> +#include <imc.h>
>>   
>>   enum proc_gen proc_gen;
>>   
>> @@ -903,6 +904,9 @@ void __noreturn __nomcount main_cpu_entry(const
>> void *fdt)
>>   	/* Init SLW related stuff, including fastsleep */
>>   	slw_init();
>>   
>> +	/* Init IMA related stuff (load the IMA dtb into memory) */
>> +	imc_init();
>> +
>>   	op_display(OP_LOG, OP_MOD_INIT, 0x0002);
>>   
>>   	/* Read in NVRAM and set it up */
>> diff --git a/hw/Makefile.inc b/hw/Makefile.inc
>> index a433c2b..02a9c72 100644
>> --- a/hw/Makefile.inc
>> +++ b/hw/Makefile.inc
>> @@ -1,7 +1,7 @@
>>   # -*-Makefile-*-
>>   SUBDIRS += hw
>>   HW_OBJS  = xscom.o chiptod.o gx.o cec.o lpc.o lpc-uart.o psi.o
>> -HW_OBJS += homer.o slw.o occ.o fsi-master.o centaur.o
>> +HW_OBJS += homer.o slw.o occ.o fsi-master.o centaur.o imc.o
>>   HW_OBJS += nx.o nx-rng.o nx-crypto.o nx-842.o
>>   HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o
>>   HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o
>> diff --git a/hw/imc.c b/hw/imc.c
>> new file mode 100644
>> index 0000000..f0eca7b
>> --- /dev/null
>> +++ b/hw/imc.c
>> @@ -0,0 +1,188 @@
>> +/* Copyright 2016 IBM Corp.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at
>> + *
>> + *     http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing,
>> software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> + * implied.
>> + * See the License for the specific language governing permissions
>> and
>> + * limitations under the License.
>> + */
>> +
>> +#include <skiboot.h>
>> +#include <xscom.h>
>> +#include <imc.h>
>> +#include <chip.h>
>> +#include <libxz/xz.h>
>> +#include <nest_imc.h>
>> +
>> +static struct imc_chip_cb *get_imc_cb(void)
>> +{
>> +	uint64_t cb_loc;
>> +	struct proc_chip *chip = get_chip(this_cpu()->chip_id);
>> +
>> +	cb_loc = chip->homer_base + CB_STRUCT_OFFSET;
>> +	return (struct imc_chip_cb *)cb_loc;
>> +}
>> +
>> +/*
>> + * Decompresses the blob obtained from the IMA_CATALOG sub-partition
>> + * in "buf" of size "size", assigns the uncompressed device tree
>> + * binary to "fdt" and returns.
>> + * Returns 0 on success and -1 on error.
>> + */
>> +static int decompress_subpartition(char *buf, size_t size, void
>> **fdt)
>> +{
>> +	struct xz_dec *s;
>> +	struct xz_buf b;
>> +	void *data;
>> +	int ret = 0;
>> +
>> +	/* Initialize the xz library first */
>> +	xz_crc32_init();
>> +	s = xz_dec_init(XZ_SINGLE, 0);
>> +	if (s == NULL)
>> +	{
>> +		prerror("IMC: initialization error for xz\n");
>> +		return -1;
>> +	}
>> +
>> +	/* Allocate memory for the uncompressed data */
>> +	data = malloc(IMC_DTB_SIZE);
>> +	if (!data) {
>> +		prerror("IMC: memory allocation error\n");
>> +		ret = -1;
>> +		goto err;
>> +	}
>> +
>> +	/*
>> +	 * Source address : buf
>> +	 * Source size : size
>> +	 * Destination address : data
>> +	 * Destination size : IMC_DTB_SIZE
>> +	 */
>> +	b.in = buf;
>> +	b.in_pos = 0;
>> +	b.in_size = size;
>> +	b.out = data;
>> +	b.out_pos = 0;
>> +	b.out_size = IMC_DTB_SIZE;
>> +
>> +	/* Start decompressing */
>> +	ret = xz_dec_run(s, &b);
>> +	if (ret != XZ_STREAM_END) {
>> +		prerror("IMC: failed to decompress subpartition\n");
>> +		free(data);
>> +		ret = -1;
>> +	}
>> +	*fdt = data;
>> +
>> +err:
>> +	/* Clean up memory */
>> +	xz_dec_end(s);
>> +	return ret;
>> +}
>> +
>
>> +/*
>> + * Remove the PMU device nodes from the system's dt, if they are not
> I don't think this should be "from the system's dt"

Good catch!
This is actually a bug. Even before attaching the imc-counters
node to the system device tree, we are trying to prune the
unavailable nest PMUs. So, it's basically useless. Will fix this.

>
> + * available in the hardware. The availability is described by the
>> + * control block's imc_chip_avl_vector.
>> + * Each bit represents a device unit. If the device is available,
>> then
>> + * the bit is set else its unset.
>> + */
>> +static int disable_unavailable_units(void)
>> +{
>> +	uint64_t avl_vec;
>> +	struct imc_chip_cb *cb;
>> +	struct dt_node *target;
>> +	int i;
>> +
>> +	/* Fetch the IMC control block structure */
>> +	cb = get_imc_cb();
>> +
>> +	avl_vec = be64_to_cpu(cb->imc_chip_avl_vector);
>> +	for (i = 0; i < MAX_AVL; i++) {
>> +		if (!(PPC_BITMASK(i, i) & avl_vec)) {
>> +			/* Check if the device node exists */
>> +			target = dt_find_by_name(dt_root,
>> nest_pmus[i]);
> I don't think you want to be looking at dt_root here. Isn't this
> function supposed to prune the unflattened IMC DT? If so we should pass
> it its root as a function argument.

Right. See above.

>
>> +			if (!target)
>> +				continue;
>> +			/* Remove the device node */
>> +			dt_free(target);
>> +		}
>> +	}
>> +
>> +	return 0;
>> +}
>
>> +
>> +/*
>> + * Fetch the IMA_CATALOG partition and find the appropriate sub-
>> partition
>> + * based on the platform's PVR.
>> + * Decompress the sub-partition and link the imc device tree to the
>> + * existing device tree.
>> + */
>> +void imc_init(void)
>> +{
>> +	char *buf = NULL;
>> +	size_t size = IMC_DTB_SIZE;
>> +	uint32_t pvr = mfspr(SPR_PVR);
>> +	int ret;
>> +	struct dt_node *dev;
>> +	void *fdt = NULL;
>> +
>> +	/* Enable only for power 9 */
>> +	if (proc_gen != proc_gen_p9)
>> +		return;
>> +
>> +	buf = malloc(IMC_DTB_SIZE);
>> +	if (!buf) {
>> +		prerror("IMC: unable to allocate memory\n");
>> +		return;
>> +	}
>> +
>> +	ret = start_preload_resource(RESOURCE_ID_CATALOG,
>> +					pvr, buf, &size);
>> +	if (ret != OPAL_SUCCESS)
>> +		goto err;
>> +
>> +	ret = wait_for_resource_loaded(RESOURCE_ID_CATALOG,
>> +					  pvr);
>> +	if (ret != OPAL_SUCCESS) {
>> +		prerror("IMC: unable to load the catalog\n");
>> +		goto err;
>> +	}
> We should probably provide a helper for this, but that can be fixed
> another day.
>
>> +
>> +	/* Decompress the subpartition now */
>> +	ret = decompress_subpartition(buf, size, &fdt);
>> +	if (ret < 0) {
>> +		goto err;
>> +	}
>> +
>> +	/* Create a device tree entry for imc counters */
>> +	dev = dt_new_root("imc-counters");
>> +	if (!dev) {
>> +		prerror("IMC: Unable to create device node\n");
>> +		goto err;
>> +	}
>> +
>> +	/* Attach the new fdt to the imc-counters node */
>> +	ret = dt_expand_node(dev, fdt, 0);
>> +	if (ret < 0)
>> +		prerror("IMC: device tree couldn't be linked\n");
>> +
>> +	/* Check availability of the PMU units from the availability
>> vector */
>> +	ret = disable_unavailable_units();
> See my comments above. We should be passing dev into
> disable_unavailable_units().

Agreed. Will fix this.

>> +	if (ret < 0)
>> +		prerror("IMC: error in disabling unavailable
>> units\n");
>> +	if (!dt_attach_root(dt_root, dev)) {
>> +		prerror("IMC: error in attaching IMC dt to system
>> dt\n");
>> +		dt_free(dev);
>> +	}
>> +err:
>> +	free(buf);
>> +}
>> diff --git a/include/imc.h b/include/imc.h
>> index d6f765a..b44c0c2 100644
>> --- a/include/imc.h
>> +++ b/include/imc.h
>> @@ -112,4 +112,6 @@ struct imc_chip_cb
>>   #define NEST_IMC_ENABLE			0x1
>>   #define NEST_IMC_DISABLE		0x2
>>   
>> +void imc_init(void);
>> +
>>   #endif /* __IMC_H */
>> diff --git a/include/platform.h b/include/platform.h
>> index 334c0a4..02adb2f 100644
>> --- a/include/platform.h
>> +++ b/include/platform.h
>> @@ -27,6 +27,7 @@ enum resource_id {
>>   	RESOURCE_ID_KERNEL,
>>   	RESOURCE_ID_INITRAMFS,
>>   	RESOURCE_ID_CAPP,
>> +	RESOURCE_ID_CATALOG,
>>   };
>>   #define RESOURCE_SUBID_NONE 0
>>   #define RESOURCE_SUBID_SUPPORTED 1

Thanks a lot for the review.
--
Thanks,
Hemant Kumar



More information about the Skiboot mailing list