[Skiboot] [PATCH] core/fast-reboot.c: Add sreset opal call

Stewart Smith stewart at linux.vnet.ibm.com
Fri Dec 23 11:32:02 AEDT 2016


Alistair Popple <alistair at popple.id.au> writes:
> Sending a NMI to other CPUs regardless of their current state requires
> a way to reset them. POWER hardware has a method of directly injecting
> resets via direct thread control, however this only works if the
> thread is not active (eg. in a sleep or nap state).
>
> Resetting an active thread can be performed either via forcing the
> threads to an inactive state (as fast reboot does) or by ramming an
> instruction sequence that simulates an sreset. This patch implements
> the latter as forcing a thread to the inactive state is not ideal for
> debug purposes as the threads loose state.

do we have an idea about what's going to be involved with it on P9 ?

> @@ -225,6 +255,35 @@ static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
>  	xscom_write(chip_id, xscom_addr, bits);
>  }
>
> +static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t thread_id)
> +{
> +	uint32_t core_id = pir_to_core_id(cpu->pir);
> +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +	uint32_t xscom_addr;
> +	uint64_t ras_status;
> +
> +	xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_TCTL_RAS_STATUS(thread_id));
> +	xscom_read(chip_id, xscom_addr, &ras_status);

check return val?

> +
> +	return ras_status;
> +}
> +
> +static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t thread_id, bool active)
> +{
> +	uint32_t core_id = pir_to_core_id(cpu->pir);
> +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +	uint32_t xscom_addr;
> +	uint64_t thread_active;
> +
> +	xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_THREAD_ACTIVE);
> +	xscom_read(chip_id, xscom_addr, &thread_active);

check result?

> +	if (active)
> +		thread_active |= PPC_BIT(8) >> thread_id;
> +	else
> +		thread_active &= ~(PPC_BIT(8) >> thread_id);
> +	xscom_write(chip_id, xscom_addr, thread_active);

same here, check result?

> +}
> +
>  static bool fast_reset_p8(void)
>  {
>  	struct cpu_thread *cpu;
> @@ -247,7 +306,7 @@ static bool fast_reset_p8(void)
>  	/* Put everybody in stop except myself */
>  	for_each_cpu(cpu) {
>  		if (cpu != this_cpu())
> -			set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
> +			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_STOP);
>
>  		/* Also make sure that saved_r1 is 0 ! That's what will
>  		 * make our reset vector jump to fast_reboot_entry
> @@ -264,7 +323,7 @@ static bool fast_reset_p8(void)
>  	/* Put everybody in pre-nap except myself */
>  	for_each_cpu(cpu) {
>  		if (cpu != this_cpu())
> -			set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
> +			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_PRENAP);
>  	}
>
>  	prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
> @@ -272,7 +331,7 @@ static bool fast_reset_p8(void)
>  	/* Reset everybody except my own core threads */
>  	for_each_cpu(cpu) {
>  		if (cpu != this_cpu())
> -			set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
> +			set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_SRESET);
>  	}
>
>  	return true;
> @@ -441,8 +500,8 @@ void __noreturn fast_reboot_entry(void)
>  	lock(&reset_lock);
>  	if (last_man_standing && next_cpu(first_cpu())) {
>  		prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
> -		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
> -		set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
> +		set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
> +		set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
>  	}
>  	last_man_standing = NULL;
>  	unlock(&reset_lock);
> @@ -544,3 +603,275 @@ void __noreturn fast_reboot_entry(void)
>  	/* Load and boot payload */
>  	load_and_boot_kernel(true);
>  }
> +
> +/*
> + * RAMs the opcodes in *opcodes and store the results of each opcode
> + * into *results. *results must point to an array the same size as
> + * *opcodes. Each entry from *results is put into SCR0 prior to
> + * executing an opcode so that it may also be used to pass in
> + * data. Note that only register r0 is saved and restored so opcodes
> + * must not touch other registers.
> + */
> +static int ram_instructions(struct cpu_thread *cpu, uint32_t thread_id, uint64_t *opcodes,
> +			    uint64_t *results, int len, unsigned int lpar)
> +{
> +	int i, rc = OPAL_SUCCESS;
> +	uint64_t ram_mode, val, opcode, r0 = 0;
> +	uint32_t chip_id = pir_to_chip_id(cpu->pir);
> +	uint32_t core_id = pir_to_core_id(cpu->pir);
> +
> +
> +	/* Activate RAM mode */
> +	xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id,
> P8_EX_RAM_MODE_REG), &ram_mode);

check return value.

> +	ram_mode |= RAM_MODE_ENABLE;
> +
> +	/* Enable HV mode on thread */
> +	ram_mode |= PPC_BIT(2) >> thread_id*2;
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> +
> +	/* Setup SPRC to use SPRD */
> +	val = SPR_MODE_SPRC_WR_EN;
> +	val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
> +	val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 - thread_id));
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPR_MODE_REG), val);
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPRC_REG),
> SPRC_REG_SCRATCH_SPR);

check return values?

> +
> +	for (i = -1; i <= len; i++) {
> +		if (i < 0)
> +			/* Save r0 (assumes opcodes don't touch other registers) */
> +			opcode = MTSPR(277, 0);
> +		else if (i < len) {
> +			xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
> +			opcode = opcodes[i];
> +		} else if (i >= len) {
> +			/* Restore r0 */
> +			xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
> +			opcode = MFSPR(0, 277);
> +		}
> +
> +		/* ram instruction */
> +		val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL, thread_id);
> +		val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
> +		xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_CTRL_REG), val);
> +
> +		/* wait for completion */
> +		do {
> +			xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
> +		} while (!val);
> +
> +		if (!(val & RAM_STATUS)) {
> +			prlog(PR_ERR, "Instruction ramming failed with status 0x%llx\n", val);
> +			rc = OPAL_HARDWARE;
> +		}
> +
> +		/* Save the results */
> +		xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), &val);
> +		if (i < 0)
> +			r0 = val;
> +		else if (i < len)
> +			results[i] = val;
> +	}
> +
> +	/* Disable RAM mode */
> +	ram_mode &= ~RAM_MODE_ENABLE;
> +
> +	/* Disable HV mode on thread */
> +	ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> +	xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> P8_EX_RAM_MODE_REG), ram_mode);

I'm guessing the only real *sensible* way to deal with any of these
xscoms failing is to bail out and return a "good luck with that" error
code back to the OS so it can then... just cry really.


> +/*
> + * Apply an sreset to the given threads in a core. When ramming
> + * instructions the whole core must be quiesced so we can't apply an
> + * sreset to active threads on the same core as we're running
> + */
> +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
> +{
> +	uint32_t thread_id;
> +	uint32_t sreset_mask = 0, ram_mask = 0;
> +	uint64_t ras_status;
> +	int timeout;
> +	int64_t rc = 0;
> +	unsigned int max_thread_id = pir_to_thread_id(-1U);
> +	unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
> +
> +	assert(cpu == cpu->primary);
> +	thread_mask &= max_thread_mask;
> +	if (this_cpu() == cpu) {
> +		prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
> +		return OPAL_PARAMETER;
> +	}
> +
> +	prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
> +	      cpu->pir, thread_mask);
> +
> +	/* cpu is the primary thread */
> +	if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> +		return OPAL_BUSY;
> +
> +	/* Stop threads selected for sreset */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		if (!((1 << thread_id) & thread_mask))
> +			continue;
> +
> +		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +		ras_status = get_ras_status(cpu, thread_id);
> +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> +			sreset_mask |= 1 << thread_id;
> +	}
> +
> +	if (thread_mask == sreset_mask)
> +		/* All threads selected for sreset can be sreset directly */
> +		prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
> +		for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +			if ((1 << thread_id) & thread_mask) {
> +				set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> +				rc = OPAL_SUCCESS;
> +				goto out;
> +			}
> +
> +	/* Need to emulate sreset so stop all other threads */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +		if (!((1 << thread_id) & thread_mask))
> +			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +
> +	/* Work out which threads to sreset and which need sreset emulation */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		ras_status = get_ras_status(cpu, thread_id);
> +		if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> +			sreset_mask |= 1 << thread_id;
> +		else {
> +			for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
> +				ras_status = get_ras_status(cpu, thread_id);
> +				if ((ras_status & RAS_STATUS_SRQ_EMPTY)
> +				    && (ras_status & RAS_STATUS_LSU_QUIESCED)
> +				    && (ras_status & RAS_STATUS_TS_QUIESCE)) {
> +					ram_mask |= 1 << thread_id;
> +					break;
> +				}
> +			}
> +		}
> +	}
> +
> +	/*
> +	 * To emulate sreset we need to make sure all threads on a core are either:
> +	 *  a) Quiesced
> +	 *  b) Not active (recorded in sreset_mask)
> +	 * So skip ramming threads if we're not in the right state.
> +	 */
> +	if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
> +		prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> +		      sreset_mask, ram_mask);
> +		ram_mask = 0;
> +		rc = OPAL_PARTIAL;
> +	}
> +
> +	/* We need to ram threads before doing the direct sresets as
> +	 * that makes the threads active */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		if (!((1 << thread_id) & thread_mask))
> +			continue;
> +
> +		if ((1 << thread_id) & ram_mask)
> +			emulate_sreset(cpu, thread_id);
> +	}
> +
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> +		if (!((1 << thread_id) & thread_mask))
> +			continue;
> +
> +		if ((1 << thread_id) & sreset_mask) {
> +			prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
> +			      cpu->pir, thread_id);
> +			set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> +		}
> +	}
> +
> +out:
> +	/* Start all threads */
> +	for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> +		set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
> +
> +	clr_special_wakeup(cpu);
> +
> +	return rc;
> +}
> +
> +#define SYS_RESET_ALLBUTSELF -2

should be in opal-api.h ?

> +int64_t signal_system_reset(int cpu_nr)
> +{
> +	int64_t rc = 0;
> +	struct cpu_thread *cpu;
> +	uint32_t thread_id;
> +
> +	if (proc_gen != proc_gen_p8)
> +		return OPAL_UNSUPPORTED;
> +
> +	/* Reset a single CPU */
> +	if (cpu_nr >= 0) {
> +		cpu = find_cpu_by_server(cpu_nr);
> +		if (!cpu)
> +			return OPAL_PARAMETER;
> +
> +		thread_id = pir_to_thread_id(cpu->pir);
> +		cpu = cpu->primary;
> +		return sreset_core(cpu, 1 << thread_id);
> +	}
> +
> +	/* Otherwise reset all CPUs */
> +	for_each_cpu(cpu) {
> +		if (cpu->primary == this_cpu()->primary)
> +			continue;
> +
> +		if (cpu->primary != cpu)
> +			continue;
> +
> +		/* sreset all threads on a core */
> +		rc |= sreset_core(cpu, -1);
> +	}
> +
> +	return rc;
> +}
> diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
> new file mode 100644
> index 0000000..bb1b869
> --- /dev/null
> +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> @@ -0,0 +1,30 @@
> +OPAL_SIGNAL_SYSTEM_RESET
> +-------------------
> +
> +#define OPAL_SIGNAL_SYSTEM_RESET			128
> +
> +int64_t signal_system_reset(int cpu_nr)
> +
> +Arguments:
> +
> +  int cpu_nr
> +    Either the cpu server number of the target cpu to reset or
> +    SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> +    should be reset.

I'm thinking we should just copy what's going on in papr and also accept
-1 = target all online threads including the caller

even if we just return OPAL_UNSUPPORTED or OPAL_PARTIAL or something for
the -1 case.

> +This OPAL call causes the specified cpu(s) to be reset to the system
> +reset exception handler (0x100). Sleeping cpus will be woken with
> +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
> +power saving mode. Active cpus will also indicate interrupt caused by
> +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> +during a power saving mode.
> +
> +Resetting active threads on the same core as this call is run is
 > +currently not supported.
> +
> +Return Values:
> +OPAL_SUCCESS: the power down was updated successful
> +OPAL_PARAMETER: a parameter was incorrect
> +OPAL_HARDWARE: hardware indicated failure during instruction ramming
> +OPAL_PARTIAL: not all requested cpus could be reset at this time
> +OPAL_UNSUPPORTED: this processor generation is not supported

                     or requested operation is not supported?
                     or should that be OPAL_PARAMETER?
                     (for -1)


> index e1a8a4d..e4761ee 100644
> --- a/platforms/astbmc/common.c
> +++ b/platforms/astbmc/common.c
> @@ -356,4 +356,6 @@ void astbmc_early_init(void)
>  	uart_init();
>
>  	prd_init();
> +
> +	opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
>  }

Anything that makes this specific to astbmc systems?

-- 
Stewart Smith
OPAL Architect, IBM.



More information about the Skiboot mailing list