[PATCH 2/3] of_pmem: Add memory ranges which took a mce to bad range

Oliver O'Halloran oohall at gmail.com
Tue Aug 20 18:41:08 AEST 2019


On Tue, Aug 20, 2019 at 12:30 PM Santosh Sivaraj <santosh at fossix.org> wrote:
>
> Subscribe to the MCE notification and add the physical address which
> generated a memory error to nvdimm bad range.

Uh... I should have looked a bit closer at this on v1.

a) of_pmem.c isn't part of the powerpc tree. You should have CCed this
to the nvdimm list since you'll need an Ack from Dan Williams.
b) of_pmem isn't powerpc specific. Adding a pile of powerpc specific
machine check parsing means it's not going to compile on other DT
platforms.
c) all this appears to be copied and pasted from the papr_scm version of this.

Considering this is pretty similar in spirit to what's done on x86
today (drivers/acpi/nfit/mce.c) I think you would get more milage out
of moving the address matching into libnvdimm itself. Machine check
handling is always going to be arch specific, but memory errors could
be re-emitted by the arch code into a more generic notifier chain that
libnvdimm use. There's probably other uses in mm/ for such a chain
too.

Oliver


> Signed-off-by: Santosh Sivaraj <santosh at fossix.org>
> ---
>  drivers/nvdimm/of_pmem.c | 151 +++++++++++++++++++++++++++++++++------
>  1 file changed, 131 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
> index a0c8dcfa0bf9..155e56862fdf 100644
> --- a/drivers/nvdimm/of_pmem.c
> +++ b/drivers/nvdimm/of_pmem.c
> @@ -8,6 +8,9 @@
>  #include <linux/module.h>
>  #include <linux/ioport.h>
>  #include <linux/slab.h>
> +#include <linux/string.h>
> +#include <linux/nd.h>
> +#include <asm/mce.h>
>
>  static const struct attribute_group *region_attr_groups[] = {
>         &nd_region_attribute_group,
> @@ -25,11 +28,77 @@ struct of_pmem_private {
>         struct nvdimm_bus *bus;
>  };
>
> +struct of_pmem_region {
> +       struct of_pmem_private *priv;
> +       struct nd_region_desc *region_desc;
> +       struct nd_region *region;
> +       struct list_head region_list;
> +};
> +
> +LIST_HEAD(pmem_regions);
> +DEFINE_MUTEX(pmem_region_lock);
> +
> +static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
> +                        void *data)
> +{
> +       struct machine_check_event *evt = data;
> +       struct of_pmem_region *pmem_region;
> +       u64 aligned_addr, phys_addr;
> +       bool found = false;
> +
> +       if (evt->error_type != MCE_ERROR_TYPE_UE)
> +               return NOTIFY_DONE;
> +
> +       if (list_empty(&pmem_regions))
> +               return NOTIFY_DONE;
> +
> +       phys_addr = evt->u.ue_error.physical_address +
> +               (evt->u.ue_error.effective_address & ~PAGE_MASK);
> +
> +       if (!evt->u.ue_error.physical_address_provided ||
> +           !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
> +               return NOTIFY_DONE;
> +
> +       mutex_lock(&pmem_region_lock);
> +       list_for_each_entry(pmem_region, &pmem_regions, region_list) {
> +               struct resource *res = pmem_region->region_desc->res;
> +
> +               if (phys_addr >= res->start && phys_addr <= res->end) {
> +                       found = true;
> +                       break;
> +               }
> +       }
> +       mutex_unlock(&pmem_region_lock);
> +
> +       if (!found)
> +               return NOTIFY_DONE;
> +
> +       aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
> +
> +       if (nvdimm_bus_add_badrange(pmem_region->priv->bus, aligned_addr,
> +                                   L1_CACHE_BYTES))
> +               return NOTIFY_DONE;
> +
> +       pr_debug("Add memory range (0x%llx -- 0x%llx) as bad range\n",
> +                aligned_addr, aligned_addr + L1_CACHE_BYTES);
> +
> +
> +       nvdimm_region_notify(pmem_region->region, NVDIMM_REVALIDATE_POISON);
> +
> +       return NOTIFY_OK;
> +}
> +
> +static struct notifier_block mce_ue_nb = {
> +       .notifier_call = handle_mce_ue
> +};
> +
>  static int of_pmem_region_probe(struct platform_device *pdev)
>  {
>         struct of_pmem_private *priv;
>         struct device_node *np;
>         struct nvdimm_bus *bus;
> +       struct of_pmem_region *pmem_region;
> +       struct nd_region_desc *ndr_desc;
>         bool is_volatile;
>         int i;
>
> @@ -58,32 +127,49 @@ static int of_pmem_region_probe(struct platform_device *pdev)
>                         is_volatile ? "volatile" : "non-volatile",  np);
>
>         for (i = 0; i < pdev->num_resources; i++) {
> -               struct nd_region_desc ndr_desc;
>                 struct nd_region *region;
>
> -               /*
> -                * NB: libnvdimm copies the data from ndr_desc into it's own
> -                * structures so passing a stack pointer is fine.
> -                */
> -               memset(&ndr_desc, 0, sizeof(ndr_desc));
> -               ndr_desc.attr_groups = region_attr_groups;
> -               ndr_desc.numa_node = dev_to_node(&pdev->dev);
> -               ndr_desc.target_node = ndr_desc.numa_node;
> -               ndr_desc.res = &pdev->resource[i];
> -               ndr_desc.of_node = np;
> -               set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
> +               ndr_desc = kzalloc(sizeof(struct nd_region_desc), GFP_KERNEL);
> +               if (!ndr_desc) {
> +                       nvdimm_bus_unregister(priv->bus);
> +                       kfree(priv);
> +                       return -ENOMEM;
> +               }
> +
> +               ndr_desc->attr_groups = region_attr_groups;
> +               ndr_desc->numa_node = dev_to_node(&pdev->dev);
> +               ndr_desc->target_node = ndr_desc->numa_node;
> +               ndr_desc->res = &pdev->resource[i];
> +               ndr_desc->of_node = np;
> +               set_bit(ND_REGION_PAGEMAP, &ndr_desc->flags);
>
>                 if (is_volatile)
> -                       region = nvdimm_volatile_region_create(bus, &ndr_desc);
> +                       region = nvdimm_volatile_region_create(bus, ndr_desc);
>                 else
> -                       region = nvdimm_pmem_region_create(bus, &ndr_desc);
> +                       region = nvdimm_pmem_region_create(bus, ndr_desc);
>
> -               if (!region)
> +               if (!region) {
>                         dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
> -                                       ndr_desc.res, np);
> -               else
> -                       dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
> -                                       ndr_desc.res, np);
> +                                       ndr_desc->res, np);
> +                       continue;
> +               }
> +
> +               dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
> +                       ndr_desc->res, np);
> +
> +               pmem_region = kzalloc(sizeof(struct of_pmem_region),
> +                                     GFP_KERNEL);
> +               if (!pmem_region)
> +                       continue;
> +
> +               pmem_region->region_desc = ndr_desc;
> +               pmem_region->region = region;
> +               pmem_region->priv = priv;
> +
> +               /* Save regions registered for use by the notification code */
> +               mutex_lock(&pmem_region_lock);
> +               list_add_tail(&pmem_region->region_list, &pmem_regions);
> +               mutex_unlock(&pmem_region_lock);
>         }
>
>         return 0;
> @@ -92,6 +178,13 @@ static int of_pmem_region_probe(struct platform_device *pdev)
>  static int of_pmem_region_remove(struct platform_device *pdev)
>  {
>         struct of_pmem_private *priv = platform_get_drvdata(pdev);
> +       struct of_pmem_region *r, *t;
> +
> +       list_for_each_entry_safe(r, t, &pmem_regions, region_list) {
> +               list_del(&(r->region_list));
> +               kfree(r->region_desc);
> +               kfree(r);
> +       }
>
>         nvdimm_bus_unregister(priv->bus);
>         kfree(priv);
> @@ -113,7 +206,25 @@ static struct platform_driver of_pmem_region_driver = {
>         },
>  };
>
> -module_platform_driver(of_pmem_region_driver);
> +static int __init of_pmem_init(void)
> +{
> +       int ret;
> +
> +       ret = platform_driver_register(&of_pmem_region_driver);
> +       if (!ret)
> +               mce_register_notifier(&mce_ue_nb);
> +
> +       return ret;
> +}
> +module_init(of_pmem_init);
> +
> +static void __exit of_pmem_exit(void)
> +{
> +       mce_unregister_notifier(&mce_ue_nb);
> +       platform_driver_unregister(&of_pmem_region_driver);
> +}
> +module_exit(of_pmem_exit);
> +
>  MODULE_DEVICE_TABLE(of, of_pmem_region_match);
>  MODULE_LICENSE("GPL");
>  MODULE_AUTHOR("IBM Corporation");
> --
> 2.21.0
>


More information about the Linuxppc-dev mailing list