[Skiboot] [PATCH] core/fast-reboot.c: Add sreset opal call
Stewart Smith
stewart at linux.vnet.ibm.com
Fri Dec 23 11:32:02 AEDT 2016
Alistair Popple <alistair at popple.id.au> writes:
> Sending a NMI to other CPUs regardless of their current state requires
> a way to reset them. POWER hardware has a method of directly injecting
> resets via direct thread control, however this only works if the
> thread is not active (eg. in a sleep or nap state).
>
> Resetting an active thread can be performed either via forcing the
> threads to an inactive state (as fast reboot does) or by ramming an
> instruction sequence that simulates an sreset. This patch implements
> the latter as forcing a thread to the inactive state is not ideal for
> debug purposes as the threads loose state.
do we have an idea about what's going to be involved with it on P9 ?
> @@ -225,6 +255,35 @@ static void set_direct_ctl(struct cpu_thread *cpu, uint64_t bits)
> xscom_write(chip_id, xscom_addr, bits);
> }
>
> +static uint64_t get_ras_status(struct cpu_thread *cpu, uint32_t thread_id)
> +{
> + uint32_t core_id = pir_to_core_id(cpu->pir);
> + uint32_t chip_id = pir_to_chip_id(cpu->pir);
> + uint32_t xscom_addr;
> + uint64_t ras_status;
> +
> + xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_TCTL_RAS_STATUS(thread_id));
> + xscom_read(chip_id, xscom_addr, &ras_status);
check return val?
> +
> + return ras_status;
> +}
> +
> +static void set_ram_thread_active(struct cpu_thread *cpu, uint32_t thread_id, bool active)
> +{
> + uint32_t core_id = pir_to_core_id(cpu->pir);
> + uint32_t chip_id = pir_to_chip_id(cpu->pir);
> + uint32_t xscom_addr;
> + uint64_t thread_active;
> +
> + xscom_addr = XSCOM_ADDR_P8_EX(core_id, P8_EX_THREAD_ACTIVE);
> + xscom_read(chip_id, xscom_addr, &thread_active);
check result?
> + if (active)
> + thread_active |= PPC_BIT(8) >> thread_id;
> + else
> + thread_active &= ~(PPC_BIT(8) >> thread_id);
> + xscom_write(chip_id, xscom_addr, thread_active);
same here, check result?
> +}
> +
> static bool fast_reset_p8(void)
> {
> struct cpu_thread *cpu;
> @@ -247,7 +306,7 @@ static bool fast_reset_p8(void)
> /* Put everybody in stop except myself */
> for_each_cpu(cpu) {
> if (cpu != this_cpu())
> - set_direct_ctl(cpu, P8_DIRECT_CTL_STOP);
> + set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_STOP);
>
> /* Also make sure that saved_r1 is 0 ! That's what will
> * make our reset vector jump to fast_reboot_entry
> @@ -264,7 +323,7 @@ static bool fast_reset_p8(void)
> /* Put everybody in pre-nap except myself */
> for_each_cpu(cpu) {
> if (cpu != this_cpu())
> - set_direct_ctl(cpu, P8_DIRECT_CTL_PRENAP);
> + set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_PRENAP);
> }
>
> prlog(PR_DEBUG, "RESET: Resetting all threads but one...\n");
> @@ -272,7 +331,7 @@ static bool fast_reset_p8(void)
> /* Reset everybody except my own core threads */
> for_each_cpu(cpu) {
> if (cpu != this_cpu())
> - set_direct_ctl(cpu, P8_DIRECT_CTL_SRESET);
> + set_direct_ctl(cpu, pir_to_thread_id(cpu->pir), P8_DIRECT_CTL_SRESET);
> }
>
> return true;
> @@ -441,8 +500,8 @@ void __noreturn fast_reboot_entry(void)
> lock(&reset_lock);
> if (last_man_standing && next_cpu(first_cpu())) {
> prlog(PR_DEBUG, "RESET: last man standing fixup...\n");
> - set_direct_ctl(last_man_standing, P8_DIRECT_CTL_PRENAP);
> - set_direct_ctl(last_man_standing, P8_DIRECT_CTL_SRESET);
> + set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_PRENAP);
> + set_direct_ctl(last_man_standing, pir_to_thread_id(last_man_standing->pir), P8_DIRECT_CTL_SRESET);
> }
> last_man_standing = NULL;
> unlock(&reset_lock);
> @@ -544,3 +603,275 @@ void __noreturn fast_reboot_entry(void)
> /* Load and boot payload */
> load_and_boot_kernel(true);
> }
> +
> +/*
> + * RAMs the opcodes in *opcodes and store the results of each opcode
> + * into *results. *results must point to an array the same size as
> + * *opcodes. Each entry from *results is put into SCR0 prior to
> + * executing an opcode so that it may also be used to pass in
> + * data. Note that only register r0 is saved and restored so opcodes
> + * must not touch other registers.
> + */
> +static int ram_instructions(struct cpu_thread *cpu, uint32_t thread_id, uint64_t *opcodes,
> + uint64_t *results, int len, unsigned int lpar)
> +{
> + int i, rc = OPAL_SUCCESS;
> + uint64_t ram_mode, val, opcode, r0 = 0;
> + uint32_t chip_id = pir_to_chip_id(cpu->pir);
> + uint32_t core_id = pir_to_core_id(cpu->pir);
> +
> +
> + /* Activate RAM mode */
> + xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id,
> P8_EX_RAM_MODE_REG), &ram_mode);
check return value.
> + ram_mode |= RAM_MODE_ENABLE;
> +
> + /* Enable HV mode on thread */
> + ram_mode |= PPC_BIT(2) >> thread_id*2;
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_MODE_REG), ram_mode);
> +
> + /* Setup SPRC to use SPRD */
> + val = SPR_MODE_SPRC_WR_EN;
> + val = SETFIELD(SPR_MODE_SPRC_SEL, val, 1 << (3 - lpar));
> + val = SETFIELD(SPR_MODE_SPRC_T_SEL, val, 1 << (7 - thread_id));
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPR_MODE_REG), val);
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SPRC_REG),
> SPRC_REG_SCRATCH_SPR);
check return values?
> +
> + for (i = -1; i <= len; i++) {
> + if (i < 0)
> + /* Save r0 (assumes opcodes don't touch other registers) */
> + opcode = MTSPR(277, 0);
> + else if (i < len) {
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), results[i]);
> + opcode = opcodes[i];
> + } else if (i >= len) {
> + /* Restore r0 */
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), r0);
> + opcode = MFSPR(0, 277);
> + }
> +
> + /* ram instruction */
> + val = SETFIELD(RAM_CTRL_THREAD_SELECT, 0ULL, thread_id);
> + val = SETFIELD(RAM_CTRL_INSTR, val, opcode);
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_CTRL_REG), val);
> +
> + /* wait for completion */
> + do {
> + xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_RAM_STATUS_REG), &val);
> + } while (!val);
> +
> + if (!(val & RAM_STATUS)) {
> + prlog(PR_ERR, "Instruction ramming failed with status 0x%llx\n", val);
> + rc = OPAL_HARDWARE;
> + }
> +
> + /* Save the results */
> + xscom_read(chip_id, XSCOM_ADDR_P8_EX(core_id, P8_EX_SCR0_REG), &val);
> + if (i < 0)
> + r0 = val;
> + else if (i < len)
> + results[i] = val;
> + }
> +
> + /* Disable RAM mode */
> + ram_mode &= ~RAM_MODE_ENABLE;
> +
> + /* Disable HV mode on thread */
> + ram_mode &= ~(PPC_BIT(2) >> thread_id*2);
> + xscom_write(chip_id, XSCOM_ADDR_P8_EX(core_id,
> P8_EX_RAM_MODE_REG), ram_mode);
I'm guessing the only real *sensible* way to deal with any of these
xscoms failing is to bail out and return a "good luck with that" error
code back to the OS so it can then... just cry really.
> +/*
> + * Apply an sreset to the given threads in a core. When ramming
> + * instructions the whole core must be quiesced so we can't apply an
> + * sreset to active threads on the same core as we're running
> + */
> +static int64_t sreset_core(struct cpu_thread *cpu, unsigned int thread_mask)
> +{
> + uint32_t thread_id;
> + uint32_t sreset_mask = 0, ram_mask = 0;
> + uint64_t ras_status;
> + int timeout;
> + int64_t rc = 0;
> + unsigned int max_thread_id = pir_to_thread_id(-1U);
> + unsigned int max_thread_mask = (1 << (max_thread_id + 1)) - 1;
> +
> + assert(cpu == cpu->primary);
> + thread_mask &= max_thread_mask;
> + if (this_cpu() == cpu) {
> + prlog(PR_WARNING, "SRESET: Unable to reset threads on self\n");
> + return OPAL_PARAMETER;
> + }
> +
> + prlog(PR_INFO, "SRESET: Start reset for cpu 0x%x thread_mask 0x%x\n",
> + cpu->pir, thread_mask);
> +
> + /* cpu is the primary thread */
> + if (set_special_wakeup(cpu) != OPAL_SUCCESS)
> + return OPAL_BUSY;
> +
> + /* Stop threads selected for sreset */
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> + if (!((1 << thread_id) & thread_mask))
> + continue;
> +
> + set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> + ras_status = get_ras_status(cpu, thread_id);
> + if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> + sreset_mask |= 1 << thread_id;
> + }
> +
> + if (thread_mask == sreset_mask)
> + /* All threads selected for sreset can be sreset directly */
> + prlog(PR_INFO, "SRESET: All threads inactive, doing direct sreset\n");
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> + if ((1 << thread_id) & thread_mask) {
> + set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> + rc = OPAL_SUCCESS;
> + goto out;
> + }
> +
> + /* Need to emulate sreset so stop all other threads */
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> + if (!((1 << thread_id) & thread_mask))
> + set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_STOP);
> +
> + /* Work out which threads to sreset and which need sreset emulation */
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> + ras_status = get_ras_status(cpu, thread_id);
> + if (!(ras_status & RAS_STATUS_THREAD_ACTIVE))
> + sreset_mask |= 1 << thread_id;
> + else {
> + for (timeout = 0; timeout < RAS_STATUS_TIMEOUT; timeout++) {
> + ras_status = get_ras_status(cpu, thread_id);
> + if ((ras_status & RAS_STATUS_SRQ_EMPTY)
> + && (ras_status & RAS_STATUS_LSU_QUIESCED)
> + && (ras_status & RAS_STATUS_TS_QUIESCE)) {
> + ram_mask |= 1 << thread_id;
> + break;
> + }
> + }
> + }
> + }
> +
> + /*
> + * To emulate sreset we need to make sure all threads on a core are either:
> + * a) Quiesced
> + * b) Not active (recorded in sreset_mask)
> + * So skip ramming threads if we're not in the right state.
> + */
> + if ((ram_mask & thread_mask) && (ram_mask | sreset_mask) != max_thread_mask) {
> + prlog(PR_ERR, "SRESET: Unable to quiesce all threads for ramming (sreset_mask 0x%08x / ram_mask 0x%08x)\n",
> + sreset_mask, ram_mask);
> + ram_mask = 0;
> + rc = OPAL_PARTIAL;
> + }
> +
> + /* We need to ram threads before doing the direct sresets as
> + * that makes the threads active */
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> + if (!((1 << thread_id) & thread_mask))
> + continue;
> +
> + if ((1 << thread_id) & ram_mask)
> + emulate_sreset(cpu, thread_id);
> + }
> +
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++) {
> + if (!((1 << thread_id) & thread_mask))
> + continue;
> +
> + if ((1 << thread_id) & sreset_mask) {
> + prlog(PR_ERR, "SRESET: cpu 0x%x thread 0x%x not active, applying SRESET directly\n",
> + cpu->pir, thread_id);
> + set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_SRESET);
> + }
> + }
> +
> +out:
> + /* Start all threads */
> + for (thread_id = 0; thread_id <= max_thread_id; thread_id++)
> + set_direct_ctl(cpu, thread_id, P8_DIRECT_CTL_START);
> +
> + clr_special_wakeup(cpu);
> +
> + return rc;
> +}
> +
> +#define SYS_RESET_ALLBUTSELF -2
should be in opal-api.h ?
> +int64_t signal_system_reset(int cpu_nr)
> +{
> + int64_t rc = 0;
> + struct cpu_thread *cpu;
> + uint32_t thread_id;
> +
> + if (proc_gen != proc_gen_p8)
> + return OPAL_UNSUPPORTED;
> +
> + /* Reset a single CPU */
> + if (cpu_nr >= 0) {
> + cpu = find_cpu_by_server(cpu_nr);
> + if (!cpu)
> + return OPAL_PARAMETER;
> +
> + thread_id = pir_to_thread_id(cpu->pir);
> + cpu = cpu->primary;
> + return sreset_core(cpu, 1 << thread_id);
> + }
> +
> + /* Otherwise reset all CPUs */
> + for_each_cpu(cpu) {
> + if (cpu->primary == this_cpu()->primary)
> + continue;
> +
> + if (cpu->primary != cpu)
> + continue;
> +
> + /* sreset all threads on a core */
> + rc |= sreset_core(cpu, -1);
> + }
> +
> + return rc;
> +}
> diff --git a/doc/opal-api/opal-signal-system-reset-128.txt b/doc/opal-api/opal-signal-system-reset-128.txt
> new file mode 100644
> index 0000000..bb1b869
> --- /dev/null
> +++ b/doc/opal-api/opal-signal-system-reset-128.txt
> @@ -0,0 +1,30 @@
> +OPAL_SIGNAL_SYSTEM_RESET
> +-------------------
> +
> +#define OPAL_SIGNAL_SYSTEM_RESET 128
> +
> +int64_t signal_system_reset(int cpu_nr)
> +
> +Arguments:
> +
> + int cpu_nr
> + Either the cpu server number of the target cpu to reset or
> + SYS_RESET_ALLBUTSELF (-2) to indicate all but the current cpu
> + should be reset.
I'm thinking we should just copy what's going on in papr and also accept
-1 = target all online threads including the caller
even if we just return OPAL_UNSUPPORTED or OPAL_PARTIAL or something for
the -1 case.
> +This OPAL call causes the specified cpu(s) to be reset to the system
> +reset exception handler (0x100). Sleeping cpus will be woken with
> +SRR1[42:45] = 0x0100 indicating an interrupt caused by SCOM when in
> +power saving mode. Active cpus will also indicate interrupt caused by
> +SCOM but will have SRR1[46:47] = 0 as the interrupt did not occur
> +during a power saving mode.
> +
> +Resetting active threads on the same core as this call is run is
> +currently not supported.
> +
> +Return Values:
> +OPAL_SUCCESS: the power down was updated successful
> +OPAL_PARAMETER: a parameter was incorrect
> +OPAL_HARDWARE: hardware indicated failure during instruction ramming
> +OPAL_PARTIAL: not all requested cpus could be reset at this time
> +OPAL_UNSUPPORTED: this processor generation is not supported
or requested operation is not supported?
or should that be OPAL_PARAMETER?
(for -1)
> index e1a8a4d..e4761ee 100644
> --- a/platforms/astbmc/common.c
> +++ b/platforms/astbmc/common.c
> @@ -356,4 +356,6 @@ void astbmc_early_init(void)
> uart_init();
>
> prd_init();
> +
> + opal_register(OPAL_SIGNAL_SYSTEM_RESET, signal_system_reset, 1);
> }
Anything that makes this specific to astbmc systems?
--
Stewart Smith
OPAL Architect, IBM.
More information about the Skiboot
mailing list