[Skiboot] [RFC PATCH] nvram: Flag dangerous NVRAM options

Oliver oohall at gmail.com
Fri May 3 15:04:42 AEST 2019


On Fri, May 3, 2019 at 1:48 PM Michael Neuling <mikey at neuling.org> wrote:
>
> Most nvram options used by skiboot are just for debug or testing for
> regressions. They should never be used long term.
>
> We've hit a number of issues in testing and the field where nvram
> options have been set "temporaily" but haven't been properly cleared
> after, resulting in crashes or real bugs being masked.
>
> This patch marks most nvram options used by skiboot as dangerous and
> prints a chicken to remind users of the problem.
>
> Signed-off-by: Michael Neuling <mikey at neuling.org>
> ---
>  core/hmi.c                   |  2 +-
>  core/init.c                  |  8 ++++----
>  core/nvram-format.c          | 22 ++++++++++++++++++----
>  core/platform.c              |  2 +-
>  core/test/run-nvram-format.c | 10 +++++-----
>  hw/lpc-uart.c                |  2 +-
>  hw/npu2-common.c             |  2 +-
>  hw/npu2-opencapi.c           |  2 +-
>  hw/phb4.c                    | 11 +++++------
>  hw/slw.c                     |  2 +-
>  hw/xscom.c                   |  2 +-
>  include/nvram.h              |  4 ++--
>  libstb/secureboot.c          |  2 +-
>  libstb/trustedboot.c         |  2 +-
>  14 files changed, 43 insertions(+), 30 deletions(-)
>
> diff --git a/core/hmi.c b/core/hmi.c
> index e813286009..c6e0f62126 100644
> --- a/core/hmi.c
> +++ b/core/hmi.c
> @@ -672,7 +672,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
>         if (!total_errors)
>                 return;
>
> -       npu2_hmi_verbose = nvram_query_eq("npu2-hmi-verbose", "true");
> +       npu2_hmi_verbose = nvram_query_eq("npu2-hmi-verbose", "true", false);
>         /* Force this for now until we sort out something better */
>         npu2_hmi_verbose = true;
>
> diff --git a/core/init.c b/core/init.c
> index 0fe6c16820..c3e2a80963 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -582,7 +582,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
>         fsp_console_select_stdout();
>
>         /* Use nvram bootargs over device tree */

> -       cmdline = nvram_query("bootargs");
> +       cmdline = nvram_query("bootargs", false);

The thing that motivated this patch was people disabling STOP states
on CI boxes. Is allowing 'powersave=off' in the petitkernel's bootargs
really any better?

>         if (cmdline) {
>                 dt_check_del_prop(dt_chosen, "bootargs");
>                 dt_add_property_string(dt_chosen, "bootargs", cmdline);
> @@ -740,7 +740,7 @@ static void console_log_level(void)
>
>         /* console log level:
>          *   high 4 bits in memory, low 4 bits driver (e.g. uart). */
> -       s = nvram_query("log-level-driver");
> +       s = nvram_query("log-level-driver", false);
>         if (s) {
>                 level = console_get_level(s);
>                 debug_descriptor.console_log_levels =
> @@ -749,7 +749,7 @@ static void console_log_level(void)
>                 prlog(PR_NOTICE, "console: Setting driver log level to %i\n",
>                       level & 0x0f);
>         }
> -       s = nvram_query("log-level-memory");
> +       s = nvram_query("log-level-memory", false);
>         if (s) {
>                 level = console_get_level(s);
>                 debug_descriptor.console_log_levels =
> @@ -867,7 +867,7 @@ static void pci_nvram_init(void)
>
>         pcie_max_link_speed = 0;
>
> -       nvram_speed = nvram_query("pcie-max-link-speed");
> +       nvram_speed = nvram_query("pcie-max-link-speed", true);
>         if (nvram_speed) {
>                 pcie_max_link_speed = atoi(nvram_speed);
>                 prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n",
> diff --git a/core/nvram-format.c b/core/nvram-format.c
> index b2fc960b33..6cdadff215 100644
> --- a/core/nvram-format.c
> +++ b/core/nvram-format.c
> @@ -206,13 +206,26 @@ static const char *find_next_key(const char *start, const char *end)
>         return NULL;
>  }

> +static void nvram_dangerous(const char *key)
> +{
> +       prlog(PR_ERR, " ___________________________________________________________\n");
> +       prlog(PR_ERR, "<  Dangerous NVRAM option: %s\n", key);
> +       prlog(PR_ERR, " -----------------------------------------------------------\n");
> +       prlog(PR_ERR, "                  \\                         \n");
> +       prlog(PR_ERR, "                   \\   WW                   \n");
> +       prlog(PR_ERR, "                      <^ \\___/|             \n");
> +       prlog(PR_ERR, "                       \\      /             \n");
> +       prlog(PR_ERR, "                        \\_  _/              \n");
> +       prlog(PR_ERR, "                          }{                 \n");
> +}

We're going to have a whole barnyard soon.

>
> +
>  /*
>   * nvram_query() - Searches skiboot NVRAM partition for a key=value pair.
>   *
>   * Returns a pointer to a NUL terminated string that contains the value
>   * associated with the given key.
>   */
> -const char *nvram_query(const char *key)
> +const char *nvram_query(const char *key, bool dangerous)
>  {
>         const char *part_end, *start;
>         int key_len = strlen(key);
> @@ -269,6 +282,8 @@ const char *nvram_query(const char *key)
>                         prlog(PR_DEBUG, "NVRAM: Searched for '%s' found '%s'\n",
>                                 key, value);
>
> +                       if (dangerous)
> +                               nvram_dangerous(start);

Adding random boolean parameters is bad API design since you
inevitably end up having to look up what the flag does. Make a new
function called nvram_find_hack() or something instead that dumps the
warning if needed.

>                         return value;
>                 }
>
> @@ -280,7 +295,6 @@ const char *nvram_query(const char *key)
>         return NULL;
>  }
>
> -
>  /*
>   * nvram_query_eq() - Check if the given 'key' exists and
>   * is set to 'value'.
> @@ -289,9 +303,9 @@ const char *nvram_query(const char *key)
>   * by passing 'value == NULL' as a key's value can never be
>   * NULL in nvram.
>   */
> -bool nvram_query_eq(const char *key, const char *value)
> +bool nvram_query_eq(const char *key, const char *value, bool dangerous)
>  {
> -       const char *s = nvram_query(key);
> +       const char *s = nvram_query(key, dangerous);
>
>         if (!s)
>                 return false;
> diff --git a/core/platform.c b/core/platform.c
> index 570a4309a2..3c479d5a1c 100644
> --- a/core/platform.c
> +++ b/core/platform.c
> @@ -60,7 +60,7 @@ static int64_t opal_cec_reboot(void)
>         opal_quiesce(QUIESCE_HOLD, -1);
>
>         /* Try fast-reset unless explicitly disabled */

> -       if (!nvram_query_eq("fast-reset","0"))
> +       if (!nvram_query_eq("fast-reset","0", true))

Ah yeah, lets make disabling the half-assed hack that causes all
manner of stupid bugs "dangerous." What a great idea.

>                 fast_reboot();
>
>         console_complete_flush();
> diff --git a/core/test/run-nvram-format.c b/core/test/run-nvram-format.c
> index d86b1dcfa9..0db6d30f7e 100644
> --- a/core/test/run-nvram-format.c
> +++ b/core/test/run-nvram-format.c
> @@ -144,23 +144,23 @@ int main(void)
>
>         /* does an empty partition break us? */
>         data = nvram_reset(nvram_image, 128*1024);
> -       assert(nvram_query("test") == NULL);
> +       assert(nvram_query("test", false) == NULL);
>
>         /* does a zero length key break us? */
>         data = nvram_reset(nvram_image, 128*1024);
>         data[0] = '=';
> -       assert(nvram_query("test") == NULL);
> +       assert(nvram_query("test", false) == NULL);
>
>         /* does a missing = break us? */
>         data = nvram_reset(nvram_image, 128*1024);
>         data[0] = 'a';
> -       assert(nvram_query("test") == NULL);
> +       assert(nvram_query("test", false) == NULL);
>
>         /* does an empty value break us? */
>         data = nvram_reset(nvram_image, 128*1024);
>         data[0] = 'a';
>         data[1] = '=';
> -       result = nvram_query("a");
> +       result = nvram_query("a", false);
>         assert(result);
>         assert(strlen(result) == 0);
>
> @@ -168,7 +168,7 @@ int main(void)
>         data = nvram_reset(nvram_image, 128*1024);
>  #define TEST_1 "a\0a=\0test=test\0"
>         memcpy(data, TEST_1, sizeof(TEST_1));
> -       result = nvram_query("test");
> +       result = nvram_query("test", false);
>         assert(result);
>         assert(strcmp(result, "test") == 0);
>
> diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
> index 365bf3e270..85d02562a7 100644
> --- a/hw/lpc-uart.c
> +++ b/hw/lpc-uart.c
> @@ -506,7 +506,7 @@ static void uart_init_opal_console(void)
>         /* Update the policy if the corresponding nvram variable
>          * is present
>          */
> -       nv_policy = nvram_query("uart-con-policy");
> +       nv_policy = nvram_query("uart-con-policy", true);
>         if (nv_policy) {
>                 if (!strcmp(nv_policy, "opal"))
>                         uart_console_policy = UART_CONSOLE_OPAL;
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index d4c0f851d6..018335e2ab 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -660,7 +660,7 @@ void probe_npu2(void)
>         }
>
>         /* Check for a zcal override */
> -       zcal = nvram_query("nv_zcal_override");
> +       zcal = nvram_query("nv_zcal_override", true);
>         if (zcal) {
>                 nv_zcal_nominal = atoi(zcal);
>                 prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal);
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 9df51b22ed..4ee6235733 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -1727,7 +1727,7 @@ static void read_nvram_training_state(void)
>  {
>         const char *state;
>
> -       state = nvram_query("opencapi-link-training");
> +       state = nvram_query("opencapi-link-training", true);
>         if (state) {
>                 if (!strcmp(state, "prbs31"))
>                         npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> diff --git a/hw/phb4.c b/hw/phb4.c
> index 52aedc890f..f2898263d4 100644
> --- a/hw/phb4.c
> +++ b/hw/phb4.c
> @@ -5919,16 +5919,16 @@ void probe_phb4(void)
>         struct dt_node *np;
>         const char *s;
>

> -       verbose_eeh = nvram_query_eq("pci-eeh-verbose", "true");
> +       verbose_eeh = nvram_query_eq("pci-eeh-verbose", "true", true);
Not dangerous

>         /* REMOVEME: force this for now until we stabalise PCIe */
>         verbose_eeh = 1;
>         if (verbose_eeh)
>                 prlog(PR_INFO, "PHB4: Verbose EEH enabled\n");
>
> -       pci_tracing = nvram_query_eq("pci-tracing", "true");
> -       pci_eeh_mmio = !nvram_query_eq("pci-eeh-mmio", "disabled");
> -       pci_retry_all = nvram_query_eq("pci-retry-all", "true");
> -       s = nvram_query("phb-rx-err-max");

> +       pci_tracing = nvram_query_eq("pci-tracing", "true", true);
Not dangerous

> +       pci_retry_all = nvram_query_eq("pci-retry-all", "true", true);
Questionable

> +       s = nvram_query("phb-rx-err-max", true);
>         if (s) {
>                 rx_err_max = atoi(s);
>
> @@ -5937,7 +5937,6 @@ void probe_phb4(void)
>                 rx_err_max = MIN(rx_err_max, 255);
>         }
>         prlog(PR_DEBUG, "PHB4: Maximum RX errors during training: %d\n", rx_err_max);
> -
>         /* Look for PBCQ XSCOM nodes */
>         dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
>                 phb4_probe_pbcq(np);
> diff --git a/hw/slw.c b/hw/slw.c
> index adbfdce950..b0d503cc7f 100644
> --- a/hw/slw.c
> +++ b/hw/slw.c
> @@ -883,7 +883,7 @@ void add_cpu_idle_state_properties(void)
>                 if (wakeup_engine_state == WAKEUP_ENGINE_PRESENT)
>                         supported_states_mask |= OPAL_PM_WINKLE_ENABLED;
>         }
> -       nvram_disable_str = nvram_query("opal-stop-state-disable-mask");
> +       nvram_disable_str = nvram_query("opal-stop-state-disable-mask", true);
>         if (nvram_disable_str)
>                 nvram_disabled_states_mask = strtol(nvram_disable_str, NULL, 0);
>         prlog(PR_DEBUG, "NVRAM stop disable mask: %x\n", nvram_disabled_states_mask);
> diff --git a/hw/xscom.c b/hw/xscom.c
> index b652e61702..7b642bad25 100644
> --- a/hw/xscom.c
> +++ b/hw/xscom.c
> @@ -833,7 +833,7 @@ int64_t xscom_trigger_xstop(void)
>         int rc = OPAL_UNSUPPORTED;
>         bool xstop_disabled = false;
>
> -       if (nvram_query_eq("opal-sw-xstop", "disable"))
> +       if (nvram_query_eq("opal-sw-xstop", "disable", true))
>                 xstop_disabled = true;
>
>         if (xstop_disabled) {
> diff --git a/include/nvram.h b/include/nvram.h
> index 012c107f17..7c88d3bb2e 100644
> --- a/include/nvram.h
> +++ b/include/nvram.h
> @@ -24,7 +24,7 @@ bool nvram_validate(void);
>  bool nvram_has_loaded(void);
>  bool nvram_wait_for_load(void);
>
> -const char *nvram_query(const char *name);
> -bool nvram_query_eq(const char *key, const char *value);
> +const char *nvram_query(const char *name, bool dangerous);
> +bool nvram_query_eq(const char *key, const char *value, bool dangerous);
>
>  #endif /* __NVRAM_H */
> diff --git a/libstb/secureboot.c b/libstb/secureboot.c
> index 4f6a301d5e..5714bec178 100644
> --- a/libstb/secureboot.c
> +++ b/libstb/secureboot.c
> @@ -104,7 +104,7 @@ void secureboot_init(void)
>
>         prlog(PR_DEBUG, "Found %s\n", compat);
>

> -       if (nvram_query_eq("force-secure-mode", "always")) {
> +       if (nvram_query_eq("force-secure-mode", "always", true)) {

I'm not convinced this is a bad thing, but ok.

>                 secure_mode = true;
>                 prlog(PR_NOTICE, "secure mode on (FORCED by nvram)\n");
>         } else {
> diff --git a/libstb/trustedboot.c b/libstb/trustedboot.c
> index ae2cc55646..570972c858 100644
> --- a/libstb/trustedboot.c
> +++ b/libstb/trustedboot.c
> @@ -102,7 +102,7 @@ void trustedboot_init(void)
>                 return;
>         }
>
> -       if (nvram_query_eq("force-trusted-mode", "true")) {
> +       if (nvram_query_eq("force-trusted-mode", "true", true)) {

Same here.

>                 trusted_mode = true;
>                 prlog(PR_NOTICE, "trusted mode on (FORCED by nvram)\n");
>         } else {
> --
> 2.21.0
>


More information about the Skiboot mailing list