[RFC PATCH 3/5] powerpc/fadump: enable fadump support on powernv platform

Hari Bathini hbathini at linux.ibm.com
Tue May 15 14:59:15 AEST 2018


From: Hari Bathini <hbathini at linux.vnet.ibm.com>

Firmware-assisted dump support is enabled for POWERNV platform in P9
firmware. Make the corresponding updates in kernel to enable fadump
support on POWERNV platform.

Signed-off-by: Hari Bathini <hbathini at linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig                            |    2 
 arch/powerpc/include/asm/opal-api.h             |    4 
 arch/powerpc/include/asm/opal.h                 |    1 
 arch/powerpc/kernel/fadump.c                    |  397 ++++++++++++++++++++---
 arch/powerpc/kernel/fadump_internal.h           |   27 ++
 arch/powerpc/platforms/powernv/Makefile         |    1 
 arch/powerpc/platforms/powernv/opal-wrappers.S  |    1 
 arch/powerpc/platforms/powernv/powernv_fadump.c |  337 ++++++++++++++++++++
 arch/powerpc/platforms/powernv/powernv_fadump.h |   63 ++++
 arch/powerpc/platforms/pseries/pseries_fadump.c |    8 
 10 files changed, 783 insertions(+), 58 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.c
 create mode 100644 arch/powerpc/platforms/powernv/powernv_fadump.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c32a181..d749f1f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -599,7 +599,7 @@ config CRASH_DUMP
 
 config FA_DUMP
 	bool "Firmware-assisted dump"
-	depends on PPC64 && PPC_RTAS
+	depends on PPC64 && (PPC_RTAS || PPC_POWERNV)
 	select CRASH_CORE
 	select CRASH_DUMP
 	help
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index d886a5b..75e8925 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -206,7 +206,8 @@
 #define OPAL_NPU_TL_SET				161
 #define OPAL_PCI_GET_PBCQ_TUNNEL_BAR		164
 #define OPAL_PCI_SET_PBCQ_TUNNEL_BAR		165
-#define OPAL_LAST				165
+#define OPAL_CONFIGURE_FADUMP			167
+#define OPAL_LAST				167
 
 /* Device tree flags */
 
@@ -1040,6 +1041,7 @@ enum OpalSysCooling {
 enum {
 	OPAL_REBOOT_NORMAL		= 0,
 	OPAL_REBOOT_PLATFORM_ERROR	= 1,
+	OPAL_REBOOT_MPIPL		= 3,
 };
 
 /* Argument to OPAL_PCI_TCE_KILL */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 03e1a92..4c1f483 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -43,6 +43,7 @@ int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
 				uint64_t PE_handle);
 int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
 			uint64_t rate_phys, uint32_t size);
+int64_t opal_configure_fadump(uint64_t command, void *data, uint64_t data_size);
 int64_t opal_console_write(int64_t term_number, __be64 *length,
 			   const uint8_t *buffer);
 int64_t opal_console_read(int64_t term_number, __be64 *length,
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 88fafe1..a27e4af 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -53,8 +53,12 @@ static struct cma *fadump_cma;
 struct fadump_ops_t *fadump_ops;
 
 static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
+struct fadump_memory_range crash_memory_ranges[INIT_CRASHMEM_RANGES];
 int crash_mem_ranges;
+struct fadump_memory_range reserved_ranges[INIT_MEMBLOCK_REGIONS];
+int reserved_ranges_cnt;
+struct fadump_memory_range memory_ranges[2 * INIT_MEMBLOCK_REGIONS];
+int memory_ranges_cnt;
 
 #ifdef CONFIG_CMA
 /*
@@ -120,6 +124,9 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
 	if (strcmp(uname, "rtas") == 0)
 		return pseries_dt_scan_fadump(&fw_dump, node);
 
+	if (strcmp(uname, "ibm,dump") == 0)
+		return powernv_dt_scan_fadump(&fw_dump, node);
+
 	return 0;
 }
 
@@ -156,6 +163,8 @@ int is_fadump_active(void)
 /* Print firmware assisted dump configurations for debugging purpose. */
 static void fadump_show_config(void)
 {
+	int i;
+
 	pr_debug("Support for firmware-assisted dump (fadump): %s\n",
 			(fw_dump.fadump_supported ? "present" : "no support"));
 
@@ -170,6 +179,13 @@ static void fadump_show_config(void)
 	pr_debug("    CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
 	pr_debug("    HPTE region size   : %lx\n", fw_dump.hpte_region_size);
 	pr_debug("Boot memory size  : %lx\n", fw_dump.boot_memory_size);
+	pr_debug("Real memory region hole size  : %lx\n",
+		 fw_dump.boot_memory_hole_size);
+	pr_debug("Real meory regions count : %lx\n", fw_dump.rmr_regions_cnt);
+	for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+		pr_debug("%d. RMR base = %lx, size = %lx\n", (i+1),
+			 fw_dump.rmr_src_addr[i], fw_dump.rmr_src_size[i]);
+	}
 }
 
 /**
@@ -243,6 +259,157 @@ static inline unsigned long fadump_calculate_reserve_size(void)
 	return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
 }
 
+static void __init fadump_get_reserved_ranges(void)
+{
+	unsigned long i, j, dt_root;
+	unsigned long long base, size;
+	struct fadump_memory_range tmp_range;
+	const __be32 *prop;
+	int len, idx;
+
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
+	dt_root = of_get_flat_dt_root();
+
+	prop = of_get_flat_dt_prop(dt_root, "reserved-ranges", &len);
+
+	if (!prop)
+		return;
+
+	/*
+	 * Each reserved range is an (address,size) pair, 2 cells each,
+	 * totalling 4 cells per range.
+	 */
+	for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+		u64 base, size;
+
+		base = of_read_number(prop + (i * 4) + 0, 2);
+		size = of_read_number(prop + (i * 4) + 2, 2);
+		if (!size)
+			continue;
+
+		reserved_ranges[reserved_ranges_cnt].base = base;
+		reserved_ranges[reserved_ranges_cnt].size = size;
+		reserved_ranges_cnt++;
+
+	}
+
+	if (!reserved_ranges_cnt)
+		return;
+
+	/* Sort the reserved ranges */
+	for (i = 0; i < reserved_ranges_cnt; i++) {
+		idx = i;
+		for (j = i + 1; j < reserved_ranges_cnt; j++) {
+			if (reserved_ranges[idx].base > reserved_ranges[j].base)
+				idx = j;
+		}
+		if (idx != i) {
+			tmp_range = reserved_ranges[idx];
+			reserved_ranges[idx] = reserved_ranges[i];
+			reserved_ranges[i] = tmp_range;
+		}
+	}
+
+	/* Merge adjacent reserved ranges */
+	idx = 0;
+	for (i = 1; i < reserved_ranges_cnt; i++) {
+		base = reserved_ranges[i-1].base;
+		size = reserved_ranges[i-1].size;
+		if (reserved_ranges[i].base == (base + size))
+			reserved_ranges[idx].size += reserved_ranges[i].size;
+		else {
+			idx++;
+			if (i == idx)
+				continue;
+
+			reserved_ranges[idx] = reserved_ranges[i];
+		}
+	}
+	reserved_ranges_cnt = idx + 1;
+}
+
+static inline void fadump_add_memory_range(unsigned long long base,
+					   unsigned long long size)
+{
+	if (!size)
+		return;
+
+	pr_debug("memory_range[%d] [%#016llx-%#016llx), %#llx bytes\n",
+		 memory_ranges_cnt, base, base + size, size);
+	memory_ranges[memory_ranges_cnt].base = base;
+	memory_ranges[memory_ranges_cnt].size = size;
+	memory_ranges_cnt++;
+}
+
+static void fadump_setup_memory_ranges(void)
+{
+	unsigned long i, j;
+	unsigned long long base, end, size;
+	struct memblock_region *reg;
+	struct fadump_memory_range tmp_ranges[INIT_MEMBLOCK_REGIONS];
+	int tmp_ranges_cnt;
+
+	/* get memory ranges */
+	tmp_ranges_cnt = 0;
+	for_each_memblock(memory, reg) {
+		tmp_ranges[tmp_ranges_cnt].base = (unsigned long long)reg->base;
+		tmp_ranges[tmp_ranges_cnt].size = (unsigned long long)reg->size;
+		tmp_ranges_cnt++;
+	}
+
+	/* exclude reserved ranges */
+	memory_ranges_cnt = 0;
+	for (i = 0; i < tmp_ranges_cnt; i++) {
+		unsigned long long mem_base, mem_end, rsrv_base, rsrv_end;
+		int add = 1;
+
+		base = mem_base = tmp_ranges[i].base;
+		end = mem_end = base + tmp_ranges[i].size;
+		for (j = 0; j < reserved_ranges_cnt; j++) {
+			rsrv_base = reserved_ranges[j].base;
+			rsrv_end  = rsrv_base + reserved_ranges[j].size;
+
+			if (mem_base > rsrv_end)
+				continue;
+
+			if ((j < (reserved_ranges_cnt - 1)) &&
+			    (reserved_ranges[j + 1].base < mem_end))
+				mem_end = reserved_ranges[j + 1].base;
+
+			if ((rsrv_base < mem_end) && (rsrv_end > mem_base)) {
+				if ((mem_base < rsrv_base) &&
+				    (mem_end > rsrv_end)) {
+					size = rsrv_base - mem_base;
+					fadump_add_memory_range(mem_base, size);
+					size = mem_end - rsrv_end;
+					fadump_add_memory_range(rsrv_end, size);
+				} else if (mem_base < rsrv_base) {
+					size = rsrv_base - mem_base;
+					fadump_add_memory_range(mem_base, size);
+				} else if (mem_end > rsrv_end) {
+					size = mem_end - rsrv_end;
+					fadump_add_memory_range(rsrv_end, size);
+				}
+
+				add = 0;
+			}
+
+			if (mem_end == end)
+				break;
+
+			mem_base = mem_end;
+			mem_end = end;
+			add = 1;
+		}
+
+		if (add)
+			fadump_add_memory_range(mem_base, mem_end - mem_base);
+	}
+}
+
+
 /*
  * Calculate the total memory size required to be reserved for
  * firmware-assisted dump registration.
@@ -264,24 +431,113 @@ static unsigned long get_fadump_area_size(void)
 	return size;
 }
 
-static void __init fadump_reserve_crash_area(unsigned long base,
-					     unsigned long size)
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(unsigned long base)
 {
-	struct memblock_region *reg;
-	unsigned long mstart, mend, msize;
+	int i;
+	unsigned long mstart, msize;
 
-	for_each_memblock(memory, reg) {
-		mstart = max_t(unsigned long, base, reg->base);
-		mend = reg->base + reg->size;
-		mend = min(base + size, mend);
-
-		if (mstart < mend) {
-			msize = mend - mstart;
-			memblock_reserve(mstart, msize);
-			pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
-				(msize >> 20), mstart);
+	for (i = 0; i < memory_ranges_cnt; i++) {
+		mstart = memory_ranges[i].base;
+		msize = memory_ranges[i].size;
+		if ((mstart + msize) < base)
+			continue;
+
+		if (mstart < base) {
+			msize -= (base - mstart);
+			mstart = base;
 		}
+		pr_info("Reserving %luMB of memory at %#016lx for saving crash dump",
+			(msize >> 20), mstart);
+		memblock_reserve(mstart, msize);
+	}
+}
+
+static int __init add_rmr_region(unsigned long rmr_start,
+				 unsigned long rmr_size)
+{
+	int i = fw_dump.rmr_regions_cnt++;
+
+	if (fw_dump.rmr_regions_cnt > MAX_REAL_MEM_REGIONS)
+		return 0;
+
+	pr_debug("Added real memory range[%d] [%#016lx-%#016lx)\n",
+		 i, rmr_start, (rmr_start + rmr_size));
+	fw_dump.rmr_src_addr[i] = rmr_start;
+	fw_dump.rmr_src_size[i] = rmr_size;
+	return 1;
+}
+
+/*
+ * Platforms like PowerNV have an upper limit on the size.
+ * If 'rmr_size' is bigger than that limit, split this memory range
+ * into multiple entries.
+ */
+static int __init add_rmr_regions(unsigned long rmr_start,
+				  unsigned long rmr_size)
+{
+	unsigned long rstart, rsize, max_size;
+	int ret = 1;
+
+	rstart = rmr_start;
+	max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : rmr_size;
+	while (rmr_size) {
+		if (rmr_size > max_size)
+			rsize = max_size;
+		else
+			rsize = rmr_size;
+
+		ret = add_rmr_region(rstart, rsize);
+		if (!ret)
+			break;
+
+		rmr_size -= rsize;
+		rstart += rsize;
 	}
+
+	return ret;
+}
+
+static int __init fadump_get_rmr_regions(void)
+{
+	int i, ret = 1;
+	unsigned long base, size, last_end;
+	unsigned long mem_size = fw_dump.boot_memory_size;
+
+	fw_dump.rmr_regions_cnt = 0;
+	fw_dump.boot_memory_hole_size = 0;
+
+	/*
+	 * TODO: Extent support for multiple real memory regions on
+	 *       pseries platform too.
+	 */
+	if (fw_dump.fadump_platform == FADUMP_PLATFORM_PSERIES) {
+		ret = add_rmr_regions(RMA_START, fw_dump.boot_memory_size);
+		return ret;
+	}
+
+	last_end = memory_ranges[0].base;
+	for (i = 0; i < memory_ranges_cnt; i++) {
+		base = memory_ranges[i].base;
+		size = memory_ranges[i].size;
+
+		if (base > last_end)
+			fw_dump.boot_memory_hole_size += (base - last_end);
+
+		if (size >= mem_size) {
+			ret = add_rmr_regions(base, mem_size);
+			break;
+		}
+
+		mem_size -= size;
+		ret = add_rmr_regions(base, size);
+		if (!ret)
+			break;
+
+		last_end = base + size;
+	}
+
+	return ret;
 }
 
 int __init fadump_reserve_mem(void)
@@ -297,6 +553,10 @@ int __init fadump_reserve_mem(void)
 		fw_dump.fadump_enabled = 0;
 		return 0;
 	}
+
+	fadump_get_reserved_ranges();
+	fadump_setup_memory_ranges();
+
 	/*
 	 * Initialize boot memory size
 	 * If dump is active then we have already calculated the size during
@@ -311,6 +571,11 @@ int __init fadump_reserve_mem(void)
 						 FADUMP_CMA_ALIGNMENT);
 #endif
 		fw_dump.rmr_source_len = fw_dump.boot_memory_size;
+		if (!fadump_get_rmr_regions()) {
+			fw_dump.fadump_enabled = 0;
+			pr_err("Too many holes in boot memory area to enable fadump\n");
+			return 0;
+		}
 	}
 
 	size = get_fadump_area_size();
@@ -335,6 +600,7 @@ int __init fadump_reserve_mem(void)
 	else
 		memory_boundary = memblock_end_of_DRAM();
 
+	base = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
 	if (fw_dump.dump_active) {
 #ifdef CONFIG_HUGETLB_PAGE
 		/*
@@ -348,11 +614,9 @@ int __init fadump_reserve_mem(void)
 		 * If last boot has crashed then reserve all the memory
 		 * above boot_memory_size so that we don't touch it until
 		 * dump is written to disk by userspace tool. This memory
-		 * will be released for general use once the dump is saved.
+		 * can be released for general use by invalidating fadump.
 		 */
-		base = fw_dump.boot_memory_size;
-		size = memory_boundary - base;
-		fadump_reserve_crash_area(base, size);
+		fadump_reserve_crash_area(base);
 
 		fw_dump.fadumphdr_addr =
 			fadump_ops->get_meta_area_start(&fw_dump);
@@ -366,23 +630,22 @@ int __init fadump_reserve_mem(void)
 		 * use memblock_find_in_range() here since it doesn't allocate
 		 * from bottom to top.
 		 */
-		for (base = fw_dump.boot_memory_size;
-		     base <= (memory_boundary - size);
-		     base += size) {
+		while (base <= (memory_boundary - size)) {
 			if (memblock_is_region_memory(base, size) &&
 			    !memblock_is_region_reserved(base, size))
 				break;
+
+			base += size;
 		}
+
 		if ((base > (memory_boundary - size)) ||
 		    memblock_reserve(base, size)) {
 			pr_err("Failed to reserve memory\n");
 			return 0;
 		}
 
-		pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
-			"assisted dump (System RAM: %ldMB)\n",
-			(unsigned long)(size >> 20),
-			(unsigned long)(base >> 20),
+		pr_info("Reserved %ldMB of memory at %#016lx (System RAM: %ldMB)\n",
+			(unsigned long)(size >> 20), base,
 			(unsigned long)(memblock_phys_mem_size() >> 20));
 
 		fw_dump.reserve_dump_area_start = base;
@@ -543,23 +806,28 @@ static int fadump_init_elfcore_header(char *bufp)
  */
 static void fadump_setup_crash_memory_ranges(void)
 {
-	struct memblock_region *reg;
-	unsigned long long start, end;
+	unsigned long long start, end, offset;
+	int i;
 
 	pr_debug("Setup crash memory ranges.\n");
 	crash_mem_ranges = 0;
+	offset = fw_dump.boot_memory_size + fw_dump.boot_memory_hole_size;
+
 	/*
-	 * add the first memory chunk (RMA_START through boot_memory_size) as
-	 * a separate memory chunk. The reason is, at the time crash firmware
-	 * will move the content of this memory chunk to different location
-	 * specified during fadump registration. We need to create a separate
-	 * program header for this chunk with the correct offset.
+	 * Add real memory region(s) whose content is going to be moved to
+	 * a different location, specified during fadump registration, by
+	 * firmware at the time of crash. We need to create separate program
+	 * header(s) for this memory chunk with the correct offset.
 	 */
-	fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+	for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+		start = fw_dump.rmr_src_addr[i];
+		end = start + fw_dump.rmr_src_size[i];
+		fadump_add_crash_memory(start, end);
+	}
 
-	for_each_memblock(memory, reg) {
-		start = (unsigned long long)reg->base;
-		end = start + (unsigned long long)reg->size;
+	for (i = 0; i < memory_ranges_cnt; i++) {
+		start = memory_ranges[i].base;
+		end = start + memory_ranges[i].size;
 
 		/*
 		 * skip the first memory chunk that is already added (RMA_START
@@ -567,9 +835,9 @@ static void fadump_setup_crash_memory_ranges(void)
 		 * when RMA_START changes to a non-zero value.
 		 */
 		BUILD_BUG_ON(RMA_START != 0);
-		if (start < fw_dump.boot_memory_size) {
-			if (end > fw_dump.boot_memory_size)
-				start = fw_dump.boot_memory_size;
+		if (start < offset) {
+			if (end > offset)
+				start = offset;
 			else
 				continue;
 		}
@@ -586,17 +854,32 @@ static void fadump_setup_crash_memory_ranges(void)
  */
 static inline unsigned long fadump_relocate(unsigned long paddr)
 {
-	if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
-		return fw_dump.rmr_destination_addr + paddr;
-	else
-		return paddr;
+	unsigned long raddr, rstart, rend, offset;
+	int i;
+
+	offset = 0;
+	raddr = paddr;
+	for (i = 0; i < fw_dump.rmr_regions_cnt; i++) {
+		rstart = fw_dump.rmr_src_addr[i];
+		rend = rstart + fw_dump.rmr_src_size[i];
+
+		if (paddr > rstart && paddr < rend) {
+			raddr += fw_dump.rmr_destination_addr + offset;
+			break;
+		}
+
+		offset += fw_dump.rmr_src_size[i];
+	}
+
+	return raddr;
 }
 
 static int fadump_create_elfcore_headers(char *bufp)
 {
 	struct elfhdr *elf;
 	struct elf_phdr *phdr;
-	int i;
+	unsigned long long raddr, offset;
+	int i, j;
 
 	fadump_init_elfcore_header(bufp);
 	elf = (struct elfhdr *)bufp;
@@ -639,9 +922,12 @@ static int fadump_create_elfcore_headers(char *bufp)
 	(elf->e_phnum)++;
 
 	/* setup PT_LOAD sections. */
-
+	j = 0;
+	offset = 0;
+	raddr = fw_dump.rmr_src_addr[0];
 	for (i = 0; i < crash_mem_ranges; i++) {
 		unsigned long long mbase, msize;
+
 		mbase = crash_memory_ranges[i].base;
 		msize = crash_memory_ranges[i].size;
 
@@ -654,13 +940,17 @@ static int fadump_create_elfcore_headers(char *bufp)
 		phdr->p_flags	= PF_R|PF_W|PF_X;
 		phdr->p_offset	= mbase;
 
-		if (mbase == RMA_START) {
+		if (mbase == raddr) {
 			/*
 			 * The entire RMA region will be moved by firmware
 			 * to the specified destination_address. Hence set
 			 * the correct offset.
 			 */
-			phdr->p_offset = fw_dump.rmr_destination_addr;
+			phdr->p_offset = fw_dump.rmr_destination_addr + offset;
+			if (j < (fw_dump.rmr_regions_cnt - 1)) {
+				offset += fw_dump.rmr_src_size[j];
+				raddr = fw_dump.rmr_src_addr[++j];
+			}
 		}
 
 		phdr->p_paddr = mbase;
@@ -707,6 +997,7 @@ static int register_fadump(void)
 	if (!fw_dump.reserve_dump_area_size)
 		return -ENODEV;
 
+	fadump_setup_memory_ranges();
 	fadump_setup_crash_memory_ranges();
 
 	addr = fadump_ops->get_meta_area_start(&fw_dump);
@@ -822,14 +1113,14 @@ static void fadump_invalidate_release_mem(void)
 	 * later for releasing the memory for general use.
 	 */
 	reserved_area_start = fw_dump.reserve_dump_area_start;
-	reserved_area_end = reserved_area_start +
-			fw_dump.reserve_dump_area_size;
+	reserved_area_end =
+		memory_limit ? memory_limit : memblock_end_of_DRAM();
+
 	/*
-	 * Setup reserve_dump_area_start and its size so that we can
-	 * reuse this reserved memory for Re-registration.
+	 * Setup reserve_dump_area_start so that we can reuse this
+	 * reserved memory for Re-registration.
 	 */
 	fw_dump.reserve_dump_area_start = destination_address;
-	fw_dump.reserve_dump_area_size = get_fadump_area_size();
 
 	fadump_release_memory(reserved_area_start, reserved_area_end);
 	if (fw_dump.cpu_notes_buf) {
diff --git a/arch/powerpc/kernel/fadump_internal.h b/arch/powerpc/kernel/fadump_internal.h
index 3791da7..eae4b55 100644
--- a/arch/powerpc/kernel/fadump_internal.h
+++ b/arch/powerpc/kernel/fadump_internal.h
@@ -49,6 +49,7 @@
 
 /* Firmware-Assited Dump platforms */
 #define FADUMP_PLATFORM_PSERIES		1
+#define FADUMP_PLATFORM_POWERNV		2
 
 #define FADUMP_CPU_ID_MASK		((1UL << 32) - 1)
 
@@ -92,11 +93,14 @@ struct fadump_crash_info_header {
 /* Crash memory ranges */
 #define INIT_CRASHMEM_RANGES	(INIT_MEMBLOCK_REGIONS + 2)
 
-struct fad_crash_memory_ranges {
+struct fadump_memory_range {
 	unsigned long long	base;
 	unsigned long long	size;
 };
 
+/* Maximum no. of real memory regions supported by the kernel */
+#define MAX_REAL_MEM_REGIONS		6
+
 /* Firmware-assisted dump configuration details. */
 struct fw_dump {
 	unsigned long	cpu_state_data_size;
@@ -114,6 +118,17 @@ struct fw_dump {
 	unsigned long	rmr_source_len;
 	unsigned long	rmr_destination_addr;
 
+	unsigned long	boot_memory_hole_size;
+	unsigned long	rmr_regions_cnt;
+	unsigned long	rmr_src_addr[MAX_REAL_MEM_REGIONS];
+	unsigned long	rmr_src_size[MAX_REAL_MEM_REGIONS];
+
+	/*
+	 * Maximum size supported by firmware to copy from source to
+	 * destination address per entry.
+	 */
+	unsigned long	max_copy_size;
+
 	int		ibm_configure_kernel_dump;
 
 	unsigned long	fadump_enabled:1;
@@ -157,4 +172,14 @@ pseries_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
 }
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+extern int powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node);
+#else
+static inline int
+powernv_dt_scan_fadump(struct fw_dump *fadump_config, ulong node)
+{
+	return 1;
+}
+#endif
+
 #endif /* __PPC64_FA_DUMP_INTERNAL_H__ */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 703a350..0d106b5 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -6,6 +6,7 @@ obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 obj-y			+= opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
+obj-$(CONFIG_FA_DUMP)	+= powernv_fadump.o
 obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 3da30c2..20bbb9c 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -325,3 +325,4 @@ OPAL_CALL(opal_npu_spa_clear_cache,		OPAL_NPU_SPA_CLEAR_CACHE);
 OPAL_CALL(opal_npu_tl_set,			OPAL_NPU_TL_SET);
 OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,		OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
 OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,		OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_configure_fadump,		OPAL_CONFIGURE_FADUMP);
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.c b/arch/powerpc/platforms/powernv/powernv_fadump.c
new file mode 100644
index 0000000..6d4b515
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.c
@@ -0,0 +1,337 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini at linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#undef DEBUG
+#define pr_fmt(fmt) "powernv fadump: " fmt
+
+#include <linux/string.h>
+#include <linux/memblock.h>
+#include <linux/bug.h>
+#include <linux/seq_file.h>
+#include <linux/crash_dump.h>
+
+#include <asm/page.h>
+#include <asm/prom.h>
+#include <asm/opal.h>
+#include <asm/fadump.h>
+
+#include "../../kernel/fadump_internal.h"
+#include "powernv_fadump.h"
+
+static struct powernv_fadump_mem_struct fdm;
+static const struct powernv_fadump_mem_struct *fdm_active;
+unsigned long fdm_actual_size;
+
+static void update_fadump_config(struct fw_dump *fadump_conf,
+				 const struct powernv_fadump_mem_struct *fdm)
+{
+	unsigned long base, size, last_end;
+	int section_cnt = be16_to_cpu(fdm->section_count);
+	int unused_sections  = (POWERNV_MAX_SECTIONS - section_cnt);
+	int i, j;
+
+	pr_debug("section_cnt: %d\n", section_cnt);
+	WARN_ON(unused_sections < 0);
+	fdm_actual_size = sizeof(*fdm) -
+		(unused_sections * sizeof(struct powernv_fadump_section));
+
+	/*
+	 * The first real memory region entry is the real memory
+	 * regions destination address.
+	 */
+	fadump_conf->rmr_destination_addr = 0;
+	for (i = 0; i < section_cnt; i++) {
+		if (fdm->section[i].src_type ==
+		    POWERNV_FADUMP_REAL_MODE_REGION) {
+			fadump_conf->rmr_destination_addr =
+				be64_to_cpu(fdm->section[i].dest_addr);
+			break;
+		}
+	}
+	pr_debug("Destination address of real memory regions: %#016lx\n",
+		 fadump_conf->rmr_destination_addr);
+
+	if (fadump_conf->dump_active) {
+		j = 0;
+		last_end = 0;
+		fadump_conf->rmr_source_len = 0;
+		fadump_conf->boot_memory_hole_size = 0;
+		for (i = 0; i < section_cnt; i++) {
+			if (fdm->section[i].src_type ==
+			    POWERNV_FADUMP_REAL_MODE_REGION) {
+				base = be64_to_cpu(fdm->section[i].src_addr);
+				size = be64_to_cpu(fdm->section[i].src_size);
+				pr_debug("%d. RMR base: 0x%lx, size: 0x%lx\n",
+					 (i + 1), base, size);
+
+				fadump_conf->rmr_src_addr[j] = base;
+				fadump_conf->rmr_src_size[j] = size;
+				fadump_conf->rmr_source_len += size;
+
+				if (base > last_end) {
+					fadump_conf->boot_memory_hole_size +=
+						(base - last_end);
+				}
+
+				last_end = base + size;
+				j++;
+			}
+		}
+		fadump_conf->rmr_regions_cnt = j;
+		pr_debug("Real memory regions count: %lu\n",
+			 fadump_conf->rmr_regions_cnt);
+	}
+}
+
+static ulong powernv_init_fadump_mem_struct(struct fw_dump *fadump_conf,
+					    ulong addr)
+{
+	int i, section_cnt = 0;
+
+	fdm.section_size = cpu_to_be16(sizeof(struct powernv_fadump_section));
+
+	/* RMA region sections */
+	for (i = 0; i < fadump_conf->rmr_regions_cnt; i++) {
+		fdm.section[RMR_REGION_INPUT_IDX + i].src_type  =
+			POWERNV_FADUMP_REAL_MODE_REGION;
+		fdm.section[RMR_REGION_INPUT_IDX + i].src_addr  =
+			cpu_to_be64(fadump_conf->rmr_src_addr[i]);
+		fdm.section[RMR_REGION_INPUT_IDX + i].dest_addr =
+			cpu_to_be64(addr);
+		fdm.section[RMR_REGION_INPUT_IDX + i].src_size  =
+			fdm.section[RMR_REGION_INPUT_IDX + i].dest_size =
+			cpu_to_be64(fadump_conf->rmr_src_size[i]);
+
+		section_cnt++;
+		addr += fadump_conf->rmr_src_size[i];
+	}
+
+	fdm.section_count = cpu_to_be16(section_cnt);
+	update_fadump_config(fadump_conf, &fdm);
+
+	return addr;
+}
+
+static int powernv_register_fadump(struct fw_dump *fadump_conf)
+{
+	int rc, err = -EIO;
+
+	rc = opal_configure_fadump(FADUMP_REGISTER, &fdm, fdm_actual_size);
+	switch (rc) {
+	default:
+		pr_err("Failed to register. Unknown Error(%d).\n", rc);
+		break;
+	case OPAL_UNSUPPORTED:
+		pr_err("Support not available.\n");
+		fadump_conf->fadump_supported = 0;
+		fadump_conf->fadump_enabled = 0;
+		break;
+	case OPAL_INTERNAL_ERROR:
+		pr_err("Failed to register. Hardware Error(%d).\n", rc);
+		break;
+	case OPAL_PARAMETER:
+		pr_err("Failed to register. Parameter Error(%d).\n", rc);
+		break;
+	case OPAL_PERMISSION:
+		pr_err("Already registered!\n");
+		fadump_conf->dump_registered = 1;
+		err = -EEXIST;
+		break;
+	case OPAL_SUCCESS:
+		pr_err("Registration is successful!\n");
+		fadump_conf->dump_registered = 1;
+		err = 0;
+		break;
+	}
+
+	return err;
+}
+
+static int powernv_unregister_fadump(struct fw_dump *fadump_conf)
+{
+	int rc;
+
+	rc = opal_configure_fadump(FADUMP_UNREGISTER, &fdm, fdm_actual_size);
+	if (rc) {
+		pr_err("Failed to un-register - unexpected Error(%d).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_registered = 0;
+	return 0;
+}
+
+static ulong powernv_get_preserv_area_start(struct fw_dump *fadump_conf)
+{
+	return fadump_conf->rmr_destination_addr;
+}
+
+static ulong powernv_get_meta_area_start(struct fw_dump *fadump_conf)
+{
+	return (fadump_conf->rmr_destination_addr +
+		fadump_conf->rmr_source_len);
+}
+
+static int powernv_invalidate_fadump(struct fw_dump *fadump_conf)
+{
+	int rc;
+
+	rc = opal_configure_fadump(FADUMP_INVALIDATE, (void *)fdm_active,
+				   fdm_actual_size);
+	if (rc) {
+		pr_err("Failed to invalidate - unexpected Error(%d).\n", rc);
+		return -EIO;
+	}
+
+	fadump_conf->dump_active = 0;
+	fdm_active = NULL;
+	return 0;
+}
+
+/*
+ * Read CPU state dump data and convert it into ELF notes.
+ * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
+ * used to access the data to allow for additional fields to be added without
+ * affecting compatibility. Each list of registers for a CPU starts with
+ * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
+ * 8 Byte ASCII identifier and 8 Byte register value. The register entry
+ * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
+ * of register value. For more details refer to PAPR document.
+ *
+ * Only for the crashing cpu we ignore the CPU dump data and get exact
+ * state from fadump crash info structure populated by first kernel at the
+ * time of crash.
+ */
+static int __init fadump_build_cpu_notes(struct fw_dump *fadump_conf)
+{
+	u32 num_cpus = 1, *note_buf;
+	struct fadump_crash_info_header *fdh = NULL;
+
+	/* Allocate buffer to hold cpu crash notes. */
+	fadump_conf->cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
+	fadump_conf->cpu_notes_buf_size =
+		PAGE_ALIGN(fadump_conf->cpu_notes_buf_size);
+	note_buf = fadump_cpu_notes_buf_alloc(fadump_conf->cpu_notes_buf_size);
+	if (!note_buf) {
+		pr_err("Failed to allocate 0x%lx bytes for cpu notes buffer\n",
+		       fadump_conf->cpu_notes_buf_size);
+		return -ENOMEM;
+	}
+	fadump_conf->cpu_notes_buf = __pa(note_buf);
+
+	pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
+		 (num_cpus * sizeof(note_buf_t)), note_buf);
+
+	if (fadump_conf->fadumphdr_addr)
+		fdh = __va(fadump_conf->fadumphdr_addr);
+
+	if (fdh && (fdh->crashing_cpu != CPU_UNKNOWN)) {
+		note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+		final_note(note_buf);
+
+		pr_debug("Updating elfcore header (%llx) with cpu notes\n",
+			 fdh->elfcorehdr_addr);
+		fadump_update_elfcore_header(fadump_conf,
+					     __va(fdh->elfcorehdr_addr));
+	}
+
+	return 0;
+}
+
+static int __init powernv_process_fadump(struct fw_dump *fadump_conf)
+{
+	struct fadump_crash_info_header *fdh;
+	int rc = 0;
+
+	if (!fdm_active || !fadump_conf->fadumphdr_addr)
+		return -EINVAL;
+
+	/* Validate the fadump crash info header */
+	fdh = __va(fadump_conf->fadumphdr_addr);
+	if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+		pr_err("Crash info header is not valid.\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * TODO: To build cpu notes, find a way to map PIR to logical id.
+	 *       Also, we may need different method for pseries and powernv.
+	 *       The currently booted kernel could have a different PIR to
+	 *       logical id mapping. So, try saving info of previous kernel's
+	 *       paca to get the right PIR to logical id mapping.
+	 */
+	rc = fadump_build_cpu_notes(fadump_conf);
+	if (rc)
+		return rc;
+
+	/*
+	 * We are done validating dump info and elfcore header is now ready
+	 * to be exported. set elfcorehdr_addr so that vmcore module will
+	 * export the elfcore header through '/proc/vmcore'.
+	 */
+	elfcorehdr_addr = fdh->elfcorehdr_addr;
+
+	return rc;
+}
+
+static void powernv_fadump_region_show(struct fw_dump *fadump_conf,
+				       struct seq_file *m)
+{
+}
+
+static void powernv_crash_fadump(const char *msg)
+{
+	int rc;
+
+	rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
+	if (rc == OPAL_UNSUPPORTED)
+		pr_emerg("Reboot type %d not supported\n", OPAL_REBOOT_MPIPL);
+	else if (rc == OPAL_HARDWARE)
+		pr_emerg("No backend support for MPIPL!\n");
+}
+
+static struct fadump_ops_t powernv_fadump_ops = {
+	.init_fadump_mem_struct	= powernv_init_fadump_mem_struct,
+	.register_fadump	= powernv_register_fadump,
+	.unregister_fadump	= powernv_unregister_fadump,
+	.get_preserv_area_start	= powernv_get_preserv_area_start,
+	.get_meta_area_start	= powernv_get_meta_area_start,
+	.invalidate_fadump	= powernv_invalidate_fadump,
+	.process_fadump		= powernv_process_fadump,
+	.fadump_region_show	= powernv_fadump_region_show,
+	.crash_fadump		= powernv_crash_fadump,
+};
+
+int __init powernv_dt_scan_fadump(struct fw_dump *fadump_conf, ulong node)
+{
+	/*
+	 * Firmware currently supports only 32-bit value for size,
+	 * align it to 1MB size.
+	 */
+	fadump_conf->max_copy_size = _ALIGN_DOWN(0xFFFFFFFF, (1 << 20));
+
+	/*
+	 * Check if dump has been initiated on last reboot.
+	 */
+	fdm_active = of_get_flat_dt_prop(node, "result-table", NULL);
+	if (fdm_active) {
+		pr_info("Firmware-assisted dump is active.\n");
+		fadump_conf->dump_active = 1;
+		update_fadump_config(fadump_conf, (void *)__pa(fdm_active));
+	}
+
+	fadump_ops = &powernv_fadump_ops;
+	fadump_conf->fadump_supported = 1;
+	fadump_conf->fadump_platform = FADUMP_PLATFORM_POWERNV;
+
+	return 1;
+}
diff --git a/arch/powerpc/platforms/powernv/powernv_fadump.h b/arch/powerpc/platforms/powernv/powernv_fadump.h
new file mode 100644
index 0000000..224a142
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/powernv_fadump.h
@@ -0,0 +1,63 @@
+/*
+ * Firmware-Assisted Dump support on POWERNV platform.
+ *
+ * Copyright 2018, IBM Corporation
+ * Author: Hari Bathini <hbathini at linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef __PPC64_POWERNV_FA_DUMP_H__
+#define __PPC64_POWERNV_FA_DUMP_H__
+
+#define POWERNV_FADUMP_CPU_STATE_DATA	0x0000
+/* OPAL : 0x01 – 0x39 */
+#define POWERNV_FADUMP_OPAL_REGION	0x0001
+/* Firmware/SMF : 0x40 – 0x79 */
+#define POWERNV_FADUMP_FW_REGION	0x0040
+/* Kernel memory region : 0x80 – 0xb9 */
+#define POWERNV_FADUMP_REAL_MODE_REGION	0x0080
+/* Reserved for future use : 0xc0 – 0xff */
+#define POWERNV_FADUMP_RESERVED_REGION	0x00c0
+
+enum powernv_fadump_section_types {
+	CPU_STATE_TYPE		= 0,
+	OPAL_REGION_TYPE,
+	FW_REGION_TYPE,
+	RMR_REGION_TYPE,
+	POWERNV_SECTIONS
+};
+
+/* Starting index of RMR region in dump sections while registering */
+#define RMR_REGION_INPUT_IDX		0
+
+#define POWERNV_MAX_SECTIONS		(POWERNV_SECTIONS + \
+					 MAX_REAL_MEM_REGIONS - 1)
+
+/* Kernel Dump section info */
+struct powernv_fadump_section {
+	u8	src_type;
+	u8	reserved[7];
+	__be64	src_addr;
+	__be64	src_size;
+	__be64	dest_addr;
+	__be64	dest_size;
+};
+
+/*
+ * Firmware Assisted dump memory structure. This structure is required for
+ * registering future kernel dump with power firmware through opal call.
+ */
+struct powernv_fadump_mem_struct {
+
+	__be16	section_size;		/*sizeof(struct fadump_section) */
+	__be16	section_count;		/* number of sections */
+	__be32	reserved;
+
+	struct powernv_fadump_section	section[POWERNV_MAX_SECTIONS];
+};
+
+#endif /* __PPC64_POWERNV_FA_DUMP_H__ */
diff --git a/arch/powerpc/platforms/pseries/pseries_fadump.c b/arch/powerpc/platforms/pseries/pseries_fadump.c
index ac54501..ef7e59a 100644
--- a/arch/powerpc/platforms/pseries/pseries_fadump.c
+++ b/arch/powerpc/platforms/pseries/pseries_fadump.c
@@ -40,8 +40,12 @@ static void update_fadump_config(struct fw_dump *fadump_conf,
 		be64_to_cpu(fdm->rmr_region.destination_address);
 
 	if (fadump_conf->dump_active) {
-		fadump_conf->rmr_source_len =
-			be64_to_cpu(fdm->rmr_region.source_len);
+		fadump_conf->rmr_src_addr[0] =
+			be64_to_cpu(fdm->rmr_region.source_address);
+		fadump_conf->rmr_src_size[0] = be64_to_cpu(fdm->rmr_region.source_len);
+		fadump_conf->rmr_regions_cnt = 1;
+		fadump_conf->rmr_source_len = fadump_conf->rmr_src_size[0];
+		fadump_conf->boot_memory_hole_size = 0;
 	}
 }
 



More information about the Linuxppc-dev mailing list