[PATCH v5 16/31] powernv/fadump: process the crashdump by exporting it as /proc/vmcore
Hari Bathini
hbathini at linux.ibm.com
Thu Sep 5 07:01:46 AEST 2019
On 04/09/19 5:12 PM, Michael Ellerman wrote:
> Hari Bathini <hbathini at linux.ibm.com> writes:
>> diff --git a/arch/powerpc/platforms/powernv/opal-fadump.c b/arch/powerpc/platforms/powernv/opal-fadump.c
>> index a755705..10f6086 100644
>> --- a/arch/powerpc/platforms/powernv/opal-fadump.c
>> +++ b/arch/powerpc/platforms/powernv/opal-fadump.c
>> @@ -41,6 +43,37 @@ static void opal_fadump_update_config(struct fw_dump *fadump_conf,
>> fadump_conf->fadumphdr_addr = fdm->fadumphdr_addr;
>> }
>>
>> +/*
>> + * This function is called in the capture kernel to get configuration details
>> + * from metadata setup by the first kernel.
>> + */
>> +static void opal_fadump_get_config(struct fw_dump *fadump_conf,
>> + const struct opal_fadump_mem_struct *fdm)
>> +{
>> + int i;
>> +
>> + if (!fadump_conf->dump_active)
>> + return;
>> +
>> + fadump_conf->boot_memory_size = 0;
>> +
>> + pr_debug("Boot memory regions:\n");
>> + for (i = 0; i < fdm->region_cnt; i++) {
>> + pr_debug("\t%d. base: 0x%llx, size: 0x%llx\n",
>> + (i + 1), fdm->rgn[i].src, fdm->rgn[i].size);
>
> Printing the zero-based array off by one (i + 1) seems confusing.
Hmmm... Indexing the regions from `0` sounded inappropriate..
>
>> +
>> + fadump_conf->boot_memory_size += fdm->rgn[i].size;
>> + }
>> +
>> + /*
>> + * Start address of reserve dump area (permanent reservation) for
>> + * re-registering FADump after dump capture.
>> + */
>> + fadump_conf->reserve_dump_area_start = fdm->rgn[0].dest;
>> +
>> + opal_fadump_update_config(fadump_conf, fdm);
>> +}
>> +
>> /* Initialize kernel metadata */
>> static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
>> {
>> @@ -215,24 +248,114 @@ static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
>> pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
>> }
>>
>> +/*
>> + * Convert CPU state data saved at the time of crash into ELF notes.
>> + */
>> +static int __init opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf)
>> +{
>> + u32 num_cpus, *note_buf;
>> + struct fadump_crash_info_header *fdh = NULL;
>> +
>> + num_cpus = 1;
>> + /* Allocate buffer to hold cpu crash notes. */
>> + fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
>> + fadump_conf->cpu_notes_buf_size =
>> + PAGE_ALIGN(fadump_conf->cpu_notes_buf_size);
>> + note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size);
>> + if (!note_buf) {
>> + pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n",
>> + fadump_conf->cpu_notes_buf_size);
>> + return -ENOMEM;
>> + }
>> + fadump_conf->cpu_notes_buf = __pa(note_buf);
>> +
>> + pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
>> + (num_cpus * sizeof(note_buf_t)), note_buf);
>> +
>> + if (fadump_conf->fadumphdr_addr)
>> + fdh = __va(fadump_conf->fadumphdr_addr);
>> +
>> + if (fdh && (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)) {
>> + note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
>> + final_note(note_buf);
>> +
>> + pr_debug("Updating elfcore header (%llx) with cpu notes\n",
>> + fdh->elfcorehdr_addr);
>> + fadump_update_elfcore_header(fadump_conf,
>> + __va(fdh->elfcorehdr_addr));
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static int __init opal_fadump_process(struct fw_dump *fadump_conf)
>> {
>> - return -EINVAL;
>> + struct fadump_crash_info_header *fdh;
>> + int rc = 0;
> > No need to initialise rc there.
>
rc = -EINVAL;
and
>> + if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
>> + return -EINVAL;
>> +
>> + /* Validate the fadump crash info header */
>> + fdh = __va(fadump_conf->fadumphdr_addr);
>> + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
>> + pr_err("Crash info header is not valid.\n");
>> + return -EINVAL;
return rc; ??
>> + }
>> +
>> + /*
>> + * TODO: To build cpu notes, find a way to map PIR to logical id.
>> + * Also, we may need different method for pseries and powernv.
>> + * The currently booted kernel could have a different PIR to
>> + * logical id mapping. So, try saving info of previous kernel's
>> + * paca to get the right PIR to logical id mapping.
>> + */
>
> That TODO is removed by the end of the series, so please just omit it entirely.
>
>> + rc = opal_fadump_build_cpu_notes(fadump_conf);
>> + if (rc)
>> + return rc;
>
> I think this all runs early in boot, so we don't need to worry about
> another CPU seeing the partially initialised core due to there being no
> barrier here before we set elfcorehdr_addr?
>
This is processed in fs/proc/vmcore.c during fs_initcall() and the data within the core
is processed much later (initrd). So, I think we are good here...
>> + /*
>> + * We are done validating dump info and elfcore header is now ready
>> + * to be exported. set elfcorehdr_addr so that vmcore module will
>> + * export the elfcore header through '/proc/vmcore'.
>> + */
>> + elfcorehdr_addr = fdh->elfcorehdr_addr;
>
>> @@ -283,5 +407,42 @@ int __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, ulong node)
>> fadump_conf->ops = &opal_fadump_ops;
>> fadump_conf->fadump_supported = 1;
>>
>> + /*
>> + * Check if dump has been initiated on last reboot.
>> + */
>> + prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
>> + if (prop) {
>
> if (!prop)
> return 1;
>
> And then everything below can be unindented.
>
>> + u64 addr = 0;
>> + s64 ret;
>> + const struct opal_fadump_mem_struct *r_opal_fdm_active;
>
> *
> / \
> /_\
> |
>
:) Will take care of such instances...
I think this should be added to checkpatch.pl
>> +
>> + ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
>> + if ((ret != OPAL_SUCCESS) || !addr) {
>> + pr_err("Failed to get Kernel metadata (%lld)\n", ret);
>> + return 1;
>> + }
>> +
>> + addr = be64_to_cpu(addr);
>> + pr_debug("Kernel metadata addr: %llx\n", addr);
>> +
>> + opal_fdm_active = __va(addr);
>> + r_opal_fdm_active = (void *)addr;
>
> Why do we need the r_ version?
>
> We're called early in boot, so we are still in real mode, but that's
> fine the CPU will ignore the top bits of the virtual address for us.
I don't know if I am missing a trick here or if there is a bug somewhere
but trying to access `opal_fdm_active->version` is not working for me..
- Hari
More information about the Linuxppc-dev
mailing list