[PATCH 2/3] powerpc/pseries: Export hardware trace macro dump via debugfs
Madhavan Srinivasan
maddy at linux.ibm.com
Wed Jun 26 13:51:35 AEST 2024
On 6/22/24 1:10 PM, Ritesh Harjani (IBM) wrote:
> This is a generic review and I haven't looked into the PAPR spec for
> htmdump hcall and it's interface.
Sure
> Madhavan Srinivasan <maddy at linux.ibm.com> writes:
>
>> This patch adds debugfs interface to export Hardware Trace Macro (HTM)
>> function data in a LPAR. New hypervisor call "H_HTM" has been
>> defined to setup, configure, control and dump the HTM data.
>> This patch supports only dumping of HTM data in a LPAR.
>> New debugfs folder called "htmdump" has been added under
>> /sys/kernel/debug/arch path which contains files need to
>> pass required parameters for the H_HTM dump function. New Kconfig
>> option called "CONFIG_HTMDUMP" has been in platform/pseries for the same.
>>
>> With patch series applied and booted, list of files in debugfs path
>>
>> # pwd
>> /sys/kernel/debug/powerpc/htmdump
>> # ls
>> coreindexonchip htmtype nodalchipindex nodeindex trace
>>
>> Signed-off-by: Madhavan Srinivasan <maddy at linux.ibm.com>
>> ---
>> arch/powerpc/platforms/pseries/Kconfig | 8 ++
>> arch/powerpc/platforms/pseries/Makefile | 1 +
>> arch/powerpc/platforms/pseries/htmdump.c | 130 +++++++++++++++++++++++
>> 3 files changed, 139 insertions(+)
>> create mode 100644 arch/powerpc/platforms/pseries/htmdump.c
>>
>> diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
>> index afc0f6a61337..46c0ea605e33 100644
>> --- a/arch/powerpc/platforms/pseries/Kconfig
>> +++ b/arch/powerpc/platforms/pseries/Kconfig
>> @@ -128,6 +128,14 @@ config CMM
>> will be reused for other LPARs. The interface allows firmware to
>> balance memory across many LPARs.
>>
>> +config HTMDUMP
>> + tristate "PHYP HTM data dumper"
> Not sure if we can make machine_device_initcall() as a tristate?
> Did we try compiling it as a module?
>
> It we would like to keep this as a module - then why not use module_init
> call and then make it depend upon...
I will make it as bool and add depends as suggested.
>
> depends on PPC_PSERIES && DEBUG_FS (??)
>
>> + default y
> and then since this is mostly a debug trace facility, then we need not enable
> it by default right?
Yes, we want this to be there, it is up to hypervisor whether to permit
the hcalls.
>
>> + help
>> + Select this option, if you want to enable the kernel debugfs
>> + interface to dump the Hardware Trace Macro (HTM) function data
>> + in the LPAR.
>> +
>> config HV_PERF_CTRS
>> bool "Hypervisor supplied PMU events (24x7 & GPCI)"
>> default y
>> diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
>> index 7bf506f6b8c8..3f3e3492e436 100644
>> --- a/arch/powerpc/platforms/pseries/Makefile
>> +++ b/arch/powerpc/platforms/pseries/Makefile
>> @@ -19,6 +19,7 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
>> obj-$(CONFIG_HVCS) += hvcserver.o
>> obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
>> obj-$(CONFIG_CMM) += cmm.o
>> +obj-$(CONFIG_HTMDUMP) += htmdump.o
>> obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
>> obj-$(CONFIG_LPARCFG) += lparcfg.o
>> obj-$(CONFIG_IBMVIO) += vio.o
>> diff --git a/arch/powerpc/platforms/pseries/htmdump.c b/arch/powerpc/platforms/pseries/htmdump.c
>> new file mode 100644
>> index 000000000000..540cdb7e069c
>> --- /dev/null
>> +++ b/arch/powerpc/platforms/pseries/htmdump.c
>> @@ -0,0 +1,130 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Copyright (C) IBM Corporation, 2024
>> + */
>> +
>> +#define pr_fmt(fmt) "htmdump: " fmt
>> +
>> +#include <linux/bitops.h>
>> +#include <linux/string.h>
>> +#include <linux/init.h>
>> +#include <linux/moduleparam.h>
>> +#include <linux/fs.h>
>> +#include <linux/debugfs.h>
>> +#include <linux/slab.h>
>> +#include <linux/memory.h>
>> +#include <linux/memory_hotplug.h>
>> +#include <linux/numa.h>
>> +#include <linux/memblock.h>
>> +#include <asm/machdep.h>
>> +#include <asm/plpar_wrappers.h>
> Do we need all of the above?
> e.g. slab, memory_hotplug etc are not needed IMO.
>
> Maybe only?
>
> #include <asm/hvcall.h>
> #include <asm/io.h>
> #include <asm/machdep.h>
> #include <asm/plpar_wrappers.h>
>
> #include <linux/debugfs.h>
> #include <linux/module.h>
>
> (module.h depending upon if we make it module_init())
>
Yeah, my bad, Should have handled this. will fix it in v2
>> +
>> +/* This enables us to keep track of the memory removed from each node. */
>> +struct htmdump_entry {
>> + void *buf;
>> + struct dentry *dir;
>> + char name[16];
>> +};
>> +
>> +static u32 nodeindex = 0;
>> +static u32 nodalchipindex = 0;
>> +static u32 coreindexonchip = 0;
>> +static u32 htmtype = 0;
>> +
>> +#define BUFFER_SIZE PAGE_SIZE
>> +
>> +static ssize_t htmdump_read(struct file *filp, char __user *ubuf,
>> + size_t count, loff_t *ppos)
>> +{
>> + struct htmdump_entry *ent = filp->private_data;
>> + unsigned long page, read_size, available;
>> + loff_t offset;
>> + long rc;
>> +
>> + page = ALIGN_DOWN(*ppos, BUFFER_SIZE);
>> + offset = (*ppos) % BUFFER_SIZE;
>> +
>> + rc = htm_get_dump_hardware(nodeindex, nodalchipindex, coreindexonchip,
>> + htmtype, virt_to_phys(ent->buf), BUFFER_SIZE, page);
>> +
>> + switch(rc) {
>> + case H_SUCCESS:
>> + case H_PARTIAL:
>> + break;
>> + case H_NOT_AVAILABLE:
>> + return 0;
>> + case H_BUSY:
>> + case H_LONG_BUSY_ORDER_1_MSEC:
>> + case H_LONG_BUSY_ORDER_10_MSEC:
>> + case H_LONG_BUSY_ORDER_100_MSEC:
>> + case H_LONG_BUSY_ORDER_1_SEC:
>> + case H_LONG_BUSY_ORDER_10_SEC:
>> + case H_LONG_BUSY_ORDER_100_SEC:
>> + case H_PARAMETER:
>> + case H_P2:
>> + case H_P3:
>> + case H_P4:
>> + case H_P5:
>> + case H_P6:
>> + case H_STATE:
>> + case H_AUTHORITY:
>> + return -EINVAL;
>> + }
>> +
>> + available = BUFFER_SIZE - offset;
>> + read_size = min(count, available);
>> + *ppos += read_size;
>> + return simple_read_from_buffer(ubuf, count, &offset, ent->buf, available);
>> +}
>> +
>> +static const struct file_operations htmdump_fops = {
>> + .llseek = default_llseek,
>> + .read = htmdump_read,
>> + .open = simple_open,
>> +};
>> +
>> +static struct dentry *htmdump_debugfs_dir;
>> +
>> +static int htmdump_init_debugfs(void)
>> +{
>> + struct htmdump_entry *ent;
>> +
>> + ent = kcalloc(1, sizeof(struct htmdump_entry), GFP_KERNEL);
>> + if (!ent) {
>> + pr_err("Failed to allocate ent\n");
>> + return -EINVAL;
>> + }
>> +
>> + ent->buf = kmalloc(BUFFER_SIZE, GFP_KERNEL);
>> + if (!ent->buf) {
>> + pr_err("Failed to allocate htmdump buf\n");
>> + return -ENOMEM;
>> + }
>> +
>> + pr_debug("%s: ent:%lx buf:%lx\n",
>> + __func__, (long unsigned int)ent, (long unsigned int)ent->buf);
>> +
>> + htmdump_debugfs_dir = debugfs_create_dir("htmdump",
>> + arch_debugfs_dir);
>> +
>> + debugfs_create_u32("nodeindex", 0600,
>> + htmdump_debugfs_dir, &nodeindex);
>> + debugfs_create_u32("nodalchipindex", 0600,
>> + htmdump_debugfs_dir, &nodalchipindex);
>> + debugfs_create_u32("coreindexonchip", 0600,
>> + htmdump_debugfs_dir, &coreindexonchip);
>> + debugfs_create_u32("htmtype", 0600,
>> + htmdump_debugfs_dir, &htmtype);
> minor nit: For all of the above. S_IRUSR | S_IWUSR instead of 0600.
>
>> + debugfs_create_file("trace", 0400, htmdump_debugfs_dir, ent, &htmdump_fops);
> maybe S_IRUSR instead of 0400.
>
> (makes it more readable).
ok will check and changes.
Thanks for the review comments.
Maddy
>
>> +
>> + return 0;
>> +}
>> +
>> +static int htmdump_init(void)
> maybe put it into __init section?
>
>> +{
>> + if (htmdump_init_debugfs())
>> + return -EINVAL;
>> +
>> + return 0;
>> +}
>> +machine_device_initcall(pseries, htmdump_init);
>> --
>> 2.45.2
More information about the Linuxppc-dev
mailing list