[Skiboot] [RFC PATCH] nvram: Flag dangerous NVRAM options

Michael Neuling mikey at neuling.org
Fri May 3 15:24:47 AEST 2019


On Fri, 2019-05-03 at 15:04 +1000, Oliver wrote:
> On Fri, May 3, 2019 at 1:48 PM Michael Neuling <mikey at neuling.org> wrote:
> > Most nvram options used by skiboot are just for debug or testing for
> > regressions. They should never be used long term.
> > 
> > We've hit a number of issues in testing and the field where nvram
> > options have been set "temporaily" but haven't been properly cleared
> > after, resulting in crashes or real bugs being masked.
> > 
> > This patch marks most nvram options used by skiboot as dangerous and
> > prints a chicken to remind users of the problem.
> > 
> > Signed-off-by: Michael Neuling <mikey at neuling.org>
> > ---
> >  core/hmi.c                   |  2 +-
> >  core/init.c                  |  8 ++++----
> >  core/nvram-format.c          | 22 ++++++++++++++++++----
> >  core/platform.c              |  2 +-
> >  core/test/run-nvram-format.c | 10 +++++-----
> >  hw/lpc-uart.c                |  2 +-
> >  hw/npu2-common.c             |  2 +-
> >  hw/npu2-opencapi.c           |  2 +-
> >  hw/phb4.c                    | 11 +++++------
> >  hw/slw.c                     |  2 +-
> >  hw/xscom.c                   |  2 +-
> >  include/nvram.h              |  4 ++--
> >  libstb/secureboot.c          |  2 +-
> >  libstb/trustedboot.c         |  2 +-
> >  14 files changed, 43 insertions(+), 30 deletions(-)
> > 
> > diff --git a/core/hmi.c b/core/hmi.c
> > index e813286009..c6e0f62126 100644
> > --- a/core/hmi.c
> > +++ b/core/hmi.c
> > @@ -672,7 +672,7 @@ static void find_npu2_checkstop_reason(int flat_chip_id,
> >         if (!total_errors)
> >                 return;
> > 
> > -       npu2_hmi_verbose = nvram_query_eq("npu2-hmi-verbose", "true");
> > +       npu2_hmi_verbose = nvram_query_eq("npu2-hmi-verbose", "true",
> > false);
> >         /* Force this for now until we sort out something better */
> >         npu2_hmi_verbose = true;
> > 
> > diff --git a/core/init.c b/core/init.c
> > index 0fe6c16820..c3e2a80963 100644
> > --- a/core/init.c
> > +++ b/core/init.c
> > @@ -582,7 +582,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
> >         fsp_console_select_stdout();
> > 
> >         /* Use nvram bootargs over device tree */
> > -       cmdline = nvram_query("bootargs");
> > +       cmdline = nvram_query("bootargs", false);
> 
> The thing that motivated this patch was people disabling STOP states
> on CI boxes. 

Yep and the new option suggested by Brian and Alexey.

> Is allowing 'powersave=off' in the petitkernel's bootargs
> really any better?

I think this make life better but it's not perfect for sure. 

> >         if (cmdline) {
> >                 dt_check_del_prop(dt_chosen, "bootargs");
> >                 dt_add_property_string(dt_chosen, "bootargs", cmdline);
> > @@ -740,7 +740,7 @@ static void console_log_level(void)
> > 
> >         /* console log level:
> >          *   high 4 bits in memory, low 4 bits driver (e.g. uart). */
> > -       s = nvram_query("log-level-driver");
> > +       s = nvram_query("log-level-driver", false);
> >         if (s) {
> >                 level = console_get_level(s);
> >                 debug_descriptor.console_log_levels =
> > @@ -749,7 +749,7 @@ static void console_log_level(void)
> >                 prlog(PR_NOTICE, "console: Setting driver log level to
> > %i\n",
> >                       level & 0x0f);
> >         }
> > -       s = nvram_query("log-level-memory");
> > +       s = nvram_query("log-level-memory", false);
> >         if (s) {
> >                 level = console_get_level(s);
> >                 debug_descriptor.console_log_levels =
> > @@ -867,7 +867,7 @@ static void pci_nvram_init(void)
> > 
> >         pcie_max_link_speed = 0;
> > 
> > -       nvram_speed = nvram_query("pcie-max-link-speed");
> > +       nvram_speed = nvram_query("pcie-max-link-speed", true);
> >         if (nvram_speed) {
> >                 pcie_max_link_speed = atoi(nvram_speed);
> >                 prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n",
> > diff --git a/core/nvram-format.c b/core/nvram-format.c
> > index b2fc960b33..6cdadff215 100644
> > --- a/core/nvram-format.c
> > +++ b/core/nvram-format.c
> > @@ -206,13 +206,26 @@ static const char *find_next_key(const char *start,
> > const char *end)
> >         return NULL;
> >  }
> > +static void nvram_dangerous(const char *key)
> > +{
> > +       prlog(PR_ERR, "
> > ___________________________________________________________\n");
> > +       prlog(PR_ERR, "<  Dangerous NVRAM option: %s\n", key);
> > +       prlog(PR_ERR, " ----------------------------------------------------
> > -------\n");
> > +       prlog(PR_ERR, "                  \\                         \n");
> > +       prlog(PR_ERR, "                   \\   WW                   \n");
> > +       prlog(PR_ERR, "                      <^ \\___/|             \n");
> > +       prlog(PR_ERR, "                       \\      /             \n");
> > +       prlog(PR_ERR, "                        \\_  _/              \n");
> > +       prlog(PR_ERR, "                          }{                 \n");
> > +}
> 
> We're going to have a whole barnyard soon.

Stewart "Old MacDonald" Smith ?

> > +
> >  /*
> >   * nvram_query() - Searches skiboot NVRAM partition for a key=value pair.
> >   *
> >   * Returns a pointer to a NUL terminated string that contains the value
> >   * associated with the given key.
> >   */
> > -const char *nvram_query(const char *key)
> > +const char *nvram_query(const char *key, bool dangerous)
> >  {
> >         const char *part_end, *start;
> >         int key_len = strlen(key);
> > @@ -269,6 +282,8 @@ const char *nvram_query(const char *key)
> >                         prlog(PR_DEBUG, "NVRAM: Searched for '%s' found
> > '%s'\n",
> >                                 key, value);
> > 
> > +                       if (dangerous)
> > +                               nvram_dangerous(start);
> 
> Adding random boolean parameters is bad API design since you
> inevitably end up having to look up what the flag does. Make a new
> function called nvram_find_hack() or something instead that dumps the
> warning if needed.

Yeah, good point.

> 
> >                         return value;
> >                 }
> > 
> > @@ -280,7 +295,6 @@ const char *nvram_query(const char *key)
> >         return NULL;
> >  }
> > 
> > -
> >  /*
> >   * nvram_query_eq() - Check if the given 'key' exists and
> >   * is set to 'value'.
> > @@ -289,9 +303,9 @@ const char *nvram_query(const char *key)
> >   * by passing 'value == NULL' as a key's value can never be
> >   * NULL in nvram.
> >   */
> > -bool nvram_query_eq(const char *key, const char *value)
> > +bool nvram_query_eq(const char *key, const char *value, bool dangerous)
> >  {
> > -       const char *s = nvram_query(key);
> > +       const char *s = nvram_query(key, dangerous);
> > 
> >         if (!s)
> >                 return false;
> > diff --git a/core/platform.c b/core/platform.c
> > index 570a4309a2..3c479d5a1c 100644
> > --- a/core/platform.c
> > +++ b/core/platform.c
> > @@ -60,7 +60,7 @@ static int64_t opal_cec_reboot(void)
> >         opal_quiesce(QUIESCE_HOLD, -1);
> > 
> >         /* Try fast-reset unless explicitly disabled */
> > -       if (!nvram_query_eq("fast-reset","0"))
> > +       if (!nvram_query_eq("fast-reset","0", true))
> 
> Ah yeah, lets make disabling the half-assed hack that causes all
> manner of stupid bugs "dangerous." What a great idea.

Haha, I knew you'd love that... I'll mark as not dangerious

> >                 fast_reboot();
> > 
> >         console_complete_flush();
> > diff --git a/core/test/run-nvram-format.c b/core/test/run-nvram-format.c
> > index d86b1dcfa9..0db6d30f7e 100644
> > --- a/core/test/run-nvram-format.c
> > +++ b/core/test/run-nvram-format.c
> > @@ -144,23 +144,23 @@ int main(void)
> > 
> >         /* does an empty partition break us? */
> >         data = nvram_reset(nvram_image, 128*1024);
> > -       assert(nvram_query("test") == NULL);
> > +       assert(nvram_query("test", false) == NULL);
> > 
> >         /* does a zero length key break us? */
> >         data = nvram_reset(nvram_image, 128*1024);
> >         data[0] = '=';
> > -       assert(nvram_query("test") == NULL);
> > +       assert(nvram_query("test", false) == NULL);
> > 
> >         /* does a missing = break us? */
> >         data = nvram_reset(nvram_image, 128*1024);
> >         data[0] = 'a';
> > -       assert(nvram_query("test") == NULL);
> > +       assert(nvram_query("test", false) == NULL);
> > 
> >         /* does an empty value break us? */
> >         data = nvram_reset(nvram_image, 128*1024);
> >         data[0] = 'a';
> >         data[1] = '=';
> > -       result = nvram_query("a");
> > +       result = nvram_query("a", false);
> >         assert(result);
> >         assert(strlen(result) == 0);
> > 
> > @@ -168,7 +168,7 @@ int main(void)
> >         data = nvram_reset(nvram_image, 128*1024);
> >  #define TEST_1 "a\0a=\0test=test\0"
> >         memcpy(data, TEST_1, sizeof(TEST_1));
> > -       result = nvram_query("test");
> > +       result = nvram_query("test", false);
> >         assert(result);
> >         assert(strcmp(result, "test") == 0);
> > 
> > diff --git a/hw/lpc-uart.c b/hw/lpc-uart.c
> > index 365bf3e270..85d02562a7 100644
> > --- a/hw/lpc-uart.c
> > +++ b/hw/lpc-uart.c
> > @@ -506,7 +506,7 @@ static void uart_init_opal_console(void)
> >         /* Update the policy if the corresponding nvram variable
> >          * is present
> >          */
> > -       nv_policy = nvram_query("uart-con-policy");
> > +       nv_policy = nvram_query("uart-con-policy", true);
> >         if (nv_policy) {
> >                 if (!strcmp(nv_policy, "opal"))
> >                         uart_console_policy = UART_CONSOLE_OPAL;
> > diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> > index d4c0f851d6..018335e2ab 100644
> > --- a/hw/npu2-common.c
> > +++ b/hw/npu2-common.c
> > @@ -660,7 +660,7 @@ void probe_npu2(void)
> >         }
> > 
> >         /* Check for a zcal override */
> > -       zcal = nvram_query("nv_zcal_override");
> > +       zcal = nvram_query("nv_zcal_override", true);
> >         if (zcal) {
> >                 nv_zcal_nominal = atoi(zcal);
> >                 prlog(PR_WARNING, "NPU2: Using ZCAL impedance override =
> > %d\n", nv_zcal_nominal);
> > diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> > index 9df51b22ed..4ee6235733 100644
> > --- a/hw/npu2-opencapi.c
> > +++ b/hw/npu2-opencapi.c
> > @@ -1727,7 +1727,7 @@ static void read_nvram_training_state(void)
> >  {
> >         const char *state;
> > 
> > -       state = nvram_query("opencapi-link-training");
> > +       state = nvram_query("opencapi-link-training", true);
> >         if (state) {
> >                 if (!strcmp(state, "prbs31"))
> >                         npu2_ocapi_training_state = NPU2_TRAIN_PRBS31;
> > diff --git a/hw/phb4.c b/hw/phb4.c
> > index 52aedc890f..f2898263d4 100644
> > --- a/hw/phb4.c
> > +++ b/hw/phb4.c
> > @@ -5919,16 +5919,16 @@ void probe_phb4(void)
> >         struct dt_node *np;
> >         const char *s;
> > 
> > -       verbose_eeh = nvram_query_eq("pci-eeh-verbose", "true");
> > +       verbose_eeh = nvram_query_eq("pci-eeh-verbose", "true", true);
> Not dangerous

Agreed

> >         /* REMOVEME: force this for now until we stabalise PCIe */
> >         verbose_eeh = 1;
> >         if (verbose_eeh)
> >                 prlog(PR_INFO, "PHB4: Verbose EEH enabled\n");
> > 
> > -       pci_tracing = nvram_query_eq("pci-tracing", "true");
> > -       pci_eeh_mmio = !nvram_query_eq("pci-eeh-mmio", "disabled");
> > -       pci_retry_all = nvram_query_eq("pci-retry-all", "true");
> > -       s = nvram_query("phb-rx-err-max");
> > +       pci_tracing = nvram_query_eq("pci-tracing", "true", true);
> Not dangerous

Agreed.

> > +       pci_retry_all = nvram_query_eq("pci-retry-all", "true", true);
> Questionable

If we have to set this, then we should fix the long term problem, not work
around it with this, hence I think it's dangerous.

> 
> > +       s = nvram_query("phb-rx-err-max", true);
> >         if (s) {
> >                 rx_err_max = atoi(s);
> > 
> > @@ -5937,7 +5937,6 @@ void probe_phb4(void)
> >                 rx_err_max = MIN(rx_err_max, 255);
> >         }
> >         prlog(PR_DEBUG, "PHB4: Maximum RX errors during training: %d\n",
> > rx_err_max);
> > -
> >         /* Look for PBCQ XSCOM nodes */
> >         dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
> >                 phb4_probe_pbcq(np);
> > diff --git a/hw/slw.c b/hw/slw.c
> > index adbfdce950..b0d503cc7f 100644
> > --- a/hw/slw.c
> > +++ b/hw/slw.c
> > @@ -883,7 +883,7 @@ void add_cpu_idle_state_properties(void)
> >                 if (wakeup_engine_state == WAKEUP_ENGINE_PRESENT)
> >                         supported_states_mask |= OPAL_PM_WINKLE_ENABLED;
> >         }
> > -       nvram_disable_str = nvram_query("opal-stop-state-disable-mask");
> > +       nvram_disable_str = nvram_query("opal-stop-state-disable-mask",
> > true);
> >         if (nvram_disable_str)
> >                 nvram_disabled_states_mask = strtol(nvram_disable_str, NULL,
> > 0);
> >         prlog(PR_DEBUG, "NVRAM stop disable mask: %x\n",
> > nvram_disabled_states_mask);
> > diff --git a/hw/xscom.c b/hw/xscom.c
> > index b652e61702..7b642bad25 100644
> > --- a/hw/xscom.c
> > +++ b/hw/xscom.c
> > @@ -833,7 +833,7 @@ int64_t xscom_trigger_xstop(void)
> >         int rc = OPAL_UNSUPPORTED;
> >         bool xstop_disabled = false;
> > 
> > -       if (nvram_query_eq("opal-sw-xstop", "disable"))
> > +       if (nvram_query_eq("opal-sw-xstop", "disable", true))
> >                 xstop_disabled = true;
> > 
> >         if (xstop_disabled) {
> > diff --git a/include/nvram.h b/include/nvram.h
> > index 012c107f17..7c88d3bb2e 100644
> > --- a/include/nvram.h
> > +++ b/include/nvram.h
> > @@ -24,7 +24,7 @@ bool nvram_validate(void);
> >  bool nvram_has_loaded(void);
> >  bool nvram_wait_for_load(void);
> > 
> > -const char *nvram_query(const char *name);
> > -bool nvram_query_eq(const char *key, const char *value);
> > +const char *nvram_query(const char *name, bool dangerous);
> > +bool nvram_query_eq(const char *key, const char *value, bool dangerous);
> > 
> >  #endif /* __NVRAM_H */
> > diff --git a/libstb/secureboot.c b/libstb/secureboot.c
> > index 4f6a301d5e..5714bec178 100644
> > --- a/libstb/secureboot.c
> > +++ b/libstb/secureboot.c
> > @@ -104,7 +104,7 @@ void secureboot_init(void)
> > 
> >         prlog(PR_DEBUG, "Found %s\n", compat);
> > 
> > -       if (nvram_query_eq("force-secure-mode", "always")) {
> > +       if (nvram_query_eq("force-secure-mode", "always", true)) {
> 
> I'm not convinced this is a bad thing, but ok.

The documentation suggests to me that we can make a machine look secure that
isn't. This seems like a dangerous option.

I may be misinterpreting the doc though. Claudio/Stewart? 

> >                 secure_mode = true;
> >                 prlog(PR_NOTICE, "secure mode on (FORCED by nvram)\n");
> >         } else {
> > diff --git a/libstb/trustedboot.c b/libstb/trustedboot.c
> > index ae2cc55646..570972c858 100644
> > --- a/libstb/trustedboot.c
> > +++ b/libstb/trustedboot.c
> > @@ -102,7 +102,7 @@ void trustedboot_init(void)
> >                 return;
> >         }
> > 
> > -       if (nvram_query_eq("force-trusted-mode", "true")) {
> > +       if (nvram_query_eq("force-trusted-mode", "true", true)) {
> 
> Same here.

Same for me too.

Mikey



More information about the Skiboot mailing list