[PATCH v5 3/5] powerpc/crash hp: add crash hotplug support for kexec_file_load

Thu Jun 23 01:11:12 AEST 2022

On 20/06/2022, 09:01:04, Sourabh Jain wrote:
> Two major changes are done to enable the crash CPU hotplug handler.
> Firstly, updated the kexec load path to prepare kimage for hotplug
> changes, and secondly, implemented the arch specific crash hotplug
> handler.
> 
> On the kexec load path, the memsz allocation for the crash FDT segment
> is updated to ensure that it has sufficient buffer space to accommodate
> future hot add CPUs. Additionally, kimage_arch struct member fdt_index
             ^
           added ?
> is initialized with the index of FDT segment in kexec segment array.

May be you should mention that the index of FDT segment is recorded to not
fetch it at each hotplug operation.

> The crash hotplug handler updates the cpus node of crash FDT. While we
> update crash FDT the kexec_crash_image is marked invalid and restored
> after FDT update to avoid race.
> 
> Since memory crash hotplug support is not there yet the crash hotplug
> handler simply warns the user and returns.

It might be good to explain here why CPU hot remove operations are ignored.

> 
> Signed-off-by: Sourabh Jain <sourabhjain at linux.ibm.com>
> ---
>  arch/powerpc/kexec/core_64.c      | 49 ++++++++++++++++++++
>  arch/powerpc/kexec/elf_64.c       | 74 +++++++++++++++++++++++++++++++
>  arch/powerpc/kexec/file_load_64.c |  5 +++
>  3 files changed, 128 insertions(+)
> 
> diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
> index 65b3afb2169a..6d448b55dfad 100644
> --- a/arch/powerpc/kexec/core_64.c
> +++ b/arch/powerpc/kexec/core_64.c
> @@ -465,6 +465,55 @@ int update_cpus_node(void *fdt)
>  	return ret;
>  }
>  
> +#if defined(CONFIG_HOTPLUG_CPU)
> +
> +int crash_hotplug_support(void) { return 1; }
> +
> +/**
> + * arch_crash_hotplug_handler() - Handle hotplug kexec segements changes FDT, elfcorehdr
> + * @image: the active struct kimage
> + * @hp_action: the hot un/plug action being handled
> + * @cpu: when KEXEC_CRASH_HP_ADD/REMOVE_CPU, the cpu affected
> + *
> + * To accurately reflect CPU hot un/plug changes, the FDT
> + * must be updated with the new list of CPUs.
> + */
> +void arch_crash_handle_hotplug_event(struct kimage *image,
> +				     unsigned int hp_action, unsigned int cpu)
> +
> +{
> +	void *fdt;
> +
> +	/* No action needed for CPU hot-unplug */

Why?
I think your comment should explain why CPU hot-unplug operations are ignored?

> +	if (hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
> +		return;
> +
> +	/* crash update on memory hotplug is not support yet */
> +	if (hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY || hp_action == KEXEC_CRASH_HP_ADD_MEMORY) {
> +		pr_info_once("crash hp: crash update is not supported with memory hotplug\n");
> +		return;
> +	}
> +
> +	/* Must have valid FDT index */
> +	if (!image->arch.fdt_index < 0) {

        if (image->arch.fdt_index >= 0) {

I think this is easier to read.

> +		pr_err("crash hp: unable to locate FDT segment");
> +		return;
> +	}
> +
> +	fdt = __va((void *)image->segment[image->arch.fdt_index].mem);
> +
> +	/* Temporarily invalidate the crash image while it is replaced */
> +	xchg(&kexec_crash_image, NULL);
> +
> +	/* update FDT to refelect changes to CPU resrouces */
> +	if (update_cpus_node(fdt))
> +		pr_err("crash hp: failed to update crash FDT");
> +
> +	/* The crash image is now valid once again */
> +	xchg(&kexec_crash_image, image);
> +}
> +#endif
> +
>  #ifdef CONFIG_PPC_64S_HASH_MMU
>  /* Values we need to export to the second kernel via the device tree. */
>  static unsigned long htab_base;
> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
> index eeb258002d1e..8ef18d6c3c32 100644
> --- a/arch/powerpc/kexec/elf_64.c
> +++ b/arch/powerpc/kexec/elf_64.c
> @@ -24,6 +24,68 @@
>  #include <linux/slab.h>
>  #include <linux/types.h>
>  
> +#include <asm/kvm_book3s.h>
> +#include <asm/kvm_ppc.h>

Are these 2 KVM specifics headers really required?

> +
> +#if defined(CONFIG_HOTPLUG_CPU)
> +/**
> + * get_cpu_node_sz() - Calculate the space needed to store a CPU device
> + *		       type node in FDT. The calculation is done based on
> + *		       the existing CPU node in unflatten device tree. Loop
> + *		       through all the properties of the very first CPU type
> + *		       device node found in unflatten device tree and returns
> + *		       the sum of the property length and property string size
> + *		       of all properties of a CPU node.

This indentation style looks weird to me.

> + */
> +static int get_cpu_node_sz(void)
> +{
> +	struct device_node *dn = NULL;
There is no need to initialize dn here, it is assigned right after
> +	struct property *pp;
> +	int cpu_node_size = 0;
> +
> +	dn = of_find_node_by_type(NULL, "cpu");
> +
> +	if (!dn) {
> +		pr_warn("Unable to locate cpu device_type node.\n");
> +		goto out;
You could return 0 here, and remove the initialization of cpu_node_size.
> +	}
> +
> +	/* Every node in FDT starts with FDT_BEGIN_NODE and ends with
> +	 * FDT_END_NODE that takes one byte each.
> +	 */
> +	cpu_node_size = 2;
> +
> +	for_each_property_of_node(dn, pp) {
> +		/**
> +		 * For each property add two bytes extra. One for string null
> +		 * character for property name and other for FDT property start
> +		 * tag FDT_PROP.
> +		 */
> +		cpu_node_size = cpu_node_size + pp->length + strlen(pp->name) + 2;
                cpu_node_size += pp->length + strlen(pp->name) + 2;

This shortens a bit that line.

> +	}
> +
> +out:
> +	return cpu_node_size;
> +}
> +
> +/*
> + * get_crash_fdt_mem_sz() - calcuate mem size for crash kernel FDT
> + * @fdt: pointer to crash kernel FDT
> + *
> + * Calculate the buffer space needed to accommodate more CPU nodes in
> + * crash FDT post capture kernel load due to CPU hotplug events.
> + */
> +static unsigned int get_crash_fdt_mem_sz(void *fdt)
> +{
> +	int fdt_cpu_nodes_sz, offline_cpu_cnt;
> +
> +	offline_cpu_cnt = (num_possible_cpus() - num_present_cpus()) / MAX_SMT_THREADS;
> +	fdt_cpu_nodes_sz = get_cpu_node_sz() * offline_cpu_cnt;
> +
> +	return fdt_totalsize(fdt) + fdt_cpu_nodes_sz;
> +}
> +#endif
> +
>  static void *elf64_load(struct kimage *image, char *kernel_buf,
>  			unsigned long kernel_len, char *initrd,
>  			unsigned long initrd_len, char *cmdline,
> @@ -123,6 +185,18 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>  	kbuf.buf_align = PAGE_SIZE;
>  	kbuf.top_down = true;
>  	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
> +
> +#if defined(CONFIG_HOTPLUG_CPU)
> +	if (image->type == KEXEC_TYPE_CRASH) {
> +		kbuf.memsz = get_crash_fdt_mem_sz(fdt);

Why do you keep the previous assignment of kbuf.memsz few lines ago :

	kbuf.buffer = fdt;
	kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);  <<<<<
	kbuf.buf_align = PAGE_SIZE;
	kbuf.top_down = true;
	kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;

#if defined(CONFIG_HOTPLUG_CPU)
	if (image->type == KEXEC_TYPE_CRASH) {
		kbuf.memsz = get_crash_fdt_mem_sz(fdt);
		fdt_set_totalsize(fdt, kbuf.memsz);
		image->arch.fdt_index = image->nr_segments;
	} else
#endif
	{
		kbuf.memsz = fdt_totalsize(fdt);
	}

And what about kbuf.bufsz?

> +		fdt_set_totalsize(fdt, kbuf.memsz);
> +		image->arch.fdt_index = image->nr_segments;
> +	} else
> +#endif
> +	{
> +		kbuf.memsz = fdt_totalsize(fdt);

Why not setting image->arch.fdt_index to -1 here?

> +	}
> +
>  	ret = kexec_add_buffer(&kbuf);
>  	if (ret)
>  		goto out_free_fdt;
> diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
> index 57f991b0a9da..8345c4f5316d 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -1116,6 +1116,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
>  		return ret;
>  	}
>  
> +#if defined(CONFIG_HOTPLUG_CPU)
> +	/* Mark fdt_index invalid */
> +	image->arch.fdt_index = -1;

Why doing this here instead of elf64_load()?
I think your patch only applies to the ELF image format, isn't it?

Cheers,
Laurent.

> +#endif
> +
>  	return kexec_image_probe_default(image, buf, buf_len);
>  }
>