[PATCH 1/6 v5] Kernel DLPAR Infrastructure
Nathan Fontenot
nfont at austin.ibm.com
Tue Nov 3 03:27:25 EST 2009
Benjamin Herrenschmidt wrote:
> On Wed, 2009-10-28 at 15:53 -0500, Nathan Fontenot wrote:
>> This patch provides the kernel DLPAR infrastructure in a new filed named
>> dlpar.c. The functionality provided is for acquiring and releasing a resource
>> from firmware and the parsing of information returned from the
>> ibm,configure-connector rtas call. Additionally this exports the pSeries
>> reconfiguration notifier chain so that it can be invoked when device tree
>> updates are made.
>>
>> Signed-off-by: Nathan Fontenot <nfont at austin.ibm.com>
>> ---
>
> Hi Nathan !
>
> Finally I get to review this stuff :-)
>
Thanks!
>> +#define CFG_CONN_WORK_SIZE 4096
>> +static char workarea[CFG_CONN_WORK_SIZE];
>> +static DEFINE_SPINLOCK(workarea_lock);
>
> So I'm not a huge fan of this workarea static. First a static is in
> effect a global name (as far as System.map etc... are concerned) so it
> would warrant a better name. Then, do we really want that 4K of BSS
> taken even on platforms that don't do dlpar ? Any reason why you don't
> just pop a free page with __get_free_page() inside of
> configure_connector() ?
>
I'm not either, having a static buffer and a lock feels like overkill
for this. I tried kmalloc, but that didn't work. I'll try using
__get_free_page.
>> +struct cc_workarea {
>> + u32 drc_index;
>> + u32 zero;
>> + u32 name_offset;
>> + u32 prop_length;
>> + u32 prop_offset;
>> +};
>> +
>> +static struct property *parse_cc_property(char *workarea)
>> +{
>> + struct property *prop;
>> + struct cc_workarea *ccwa;
>> + char *name;
>> + char *value;
>> +
>> + prop = kzalloc(sizeof(*prop), GFP_KERNEL);
>> + if (!prop)
>> + return NULL;
>> +
>> + ccwa = (struct cc_workarea *)workarea;
>> + name = workarea + ccwa->name_offset;
>> + prop->name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> + if (!prop->name) {
>> + kfree(prop);
>> + return NULL;
>> + }
>> +
>> + strcpy(prop->name, name);
>> +
>> + prop->length = ccwa->prop_length;
>> + value = workarea + ccwa->prop_offset;
>> + prop->value = kzalloc(prop->length, GFP_KERNEL);
>> + if (!prop->value) {
>> + kfree(prop->name);
>> + kfree(prop);
>> + return NULL;
>> + }
>> +
>> + memcpy(prop->value, value, prop->length);
>> + return prop;
>> +}
>> +
>> +static void free_property(struct property *prop)
>> +{
>> + kfree(prop->name);
>> + kfree(prop->value);
>> + kfree(prop);
>> +}
>> +
>> +static struct device_node *parse_cc_node(char *work_area)
>> +{
>
> const char* maybe ?
sure.
>
>> + struct device_node *dn;
>> + struct cc_workarea *ccwa;
>> + char *name;
>> +
>> + dn = kzalloc(sizeof(*dn), GFP_KERNEL);
>> + if (!dn)
>> + return NULL;
>> +
>> + ccwa = (struct cc_workarea *)work_area;
>> + name = work_area + ccwa->name_offset;
>
> I'm wondering whether work_area should be a struct cc_workarea * in the
> first place with a char data[] at the end, but that would mean probably
> tweaking the offsets... no big deal, up to you.
>
I'll look onto that. Anything that makes this easier to understand is good.
>> + dn->full_name = kzalloc(strlen(name) + 1, GFP_KERNEL);
>> + if (!dn->full_name) {
>> + kfree(dn);
>> + return NULL;
>> + }
>> +
>> + strcpy(dn->full_name, name);
>
> kstrdup ?
yep, should have used kstrdup.
>
> .../...
>
>> +#define NEXT_SIBLING 1
>> +#define NEXT_CHILD 2
>> +#define NEXT_PROPERTY 3
>> +#define PREV_PARENT 4
>> +#define MORE_MEMORY 5
>> +#define CALL_AGAIN -2
>> +#define ERR_CFG_USE -9003
>> +
>> +struct device_node *configure_connector(u32 drc_index)
>> +{
>
> It's a global exported function, I'd rather you call it
> dlpar_configure_connector()
>
ok.
>> + struct device_node *dn;
>> + struct device_node *first_dn = NULL;
>> + struct device_node *last_dn = NULL;
>> + struct property *property;
>> + struct property *last_property = NULL;
>> + struct cc_workarea *ccwa;
>> + int cc_token;
>> + int rc;
>> +
>> + cc_token = rtas_token("ibm,configure-connector");
>> + if (cc_token == RTAS_UNKNOWN_SERVICE)
>> + return NULL;
>> +
>> + spin_lock(&workarea_lock);
>> +
>> + ccwa = (struct cc_workarea *)&workarea[0];
>> + ccwa->drc_index = drc_index;
>> + ccwa->zero = 0;
>
> Popping a free page with gfp (or just kmalloc'ing 4K) would avoid the
> need for the lock too.
yes, see comment at beginning.
>
>> + rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
>> + while (rc) {
>> + switch (rc) {
>> + case NEXT_SIBLING:
>> + dn = parse_cc_node(workarea);
>> + if (!dn)
>> + goto cc_error;
>> +
>> + dn->parent = last_dn->parent;
>> + last_dn->sibling = dn;
>> + last_dn = dn;
>> + break;
>> +
>> + case NEXT_CHILD:
>> + dn = parse_cc_node(workarea);
>> + if (!dn)
>> + goto cc_error;
>> +
>> + if (!first_dn)
>> + first_dn = dn;
>> + else {
>> + dn->parent = last_dn;
>> + if (last_dn)
>> + last_dn->child = dn;
>> + }
>> +
>> + last_dn = dn;
>> + break;
>> +
>> + case NEXT_PROPERTY:
>> + property = parse_cc_property(workarea);
>> + if (!property)
>> + goto cc_error;
>> +
>> + if (!last_dn->properties)
>> + last_dn->properties = property;
>> + else
>> + last_property->next = property;
>> +
>> + last_property = property;
>> + break;
>> +
>> + case PREV_PARENT:
>> + last_dn = last_dn->parent;
>> + break;
>> +
>> + case CALL_AGAIN:
>> + break;
>> +
>> + case MORE_MEMORY:
>> + case ERR_CFG_USE:
>> + default:
>> + printk(KERN_ERR "Unexpected Error (%d) "
>> + "returned from configure-connector\n", rc);
>> + goto cc_error;
>> + }
>> +
>> + rc = rtas_call(cc_token, 2, 1, NULL, workarea, NULL);
>> + }
>> +
>> + spin_unlock(&workarea_lock);
>> + return first_dn;
>> +
>> +cc_error:
>> + spin_unlock(&workarea_lock);
>> +
>> + if (first_dn)
>> + free_cc_nodes(first_dn);
>> +
>> + return NULL;
>> +}
>> +
>> +static struct device_node *derive_parent(const char *path)
>> +{
>> + struct device_node *parent;
>> + char parent_path[128];
>> + int parent_path_len;
>> +
>> + parent_path_len = strrchr(path, '/') - path + 1;
>> + strlcpy(parent_path, path, parent_path_len);
>> +
>> + parent = of_find_node_by_path(parent_path);
>> +
>> + return parent;
>> +}
>
> This ...
>
>> +static int add_one_node(struct device_node *dn)
>> +{
>> + struct proc_dir_entry *ent;
>> + int rc;
>> +
>> + of_node_set_flag(dn, OF_DYNAMIC);
>> + kref_init(&dn->kref);
>> + dn->parent = derive_parent(dn->full_name);
>> +
>> + rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
>> + PSERIES_RECONFIG_ADD, dn);
>> + if (rc == NOTIFY_BAD) {
>> + printk(KERN_ERR "Failed to add device node %s\n",
>> + dn->full_name);
>> + return -ENOMEM; /* For now, safe to assume kmalloc failure */
>> + }
>> +
>> + of_attach_node(dn);
>> +
>> +#ifdef CONFIG_PROC_DEVICETREE
>> + ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
>> + if (ent)
>> + proc_device_tree_add_node(dn, ent);
>> +#endif
>> +
>> + of_node_put(dn->parent);
>> + return 0;
>> +}
>
> ... and this ...
>
>> +int add_device_tree_nodes(struct device_node *dn)
>> +{
>> + struct device_node *child = dn->child;
>> + struct device_node *sibling = dn->sibling;
>> + int rc;
>> +
>> + dn->child = NULL;
>> + dn->sibling = NULL;
>> + dn->parent = NULL;
>> +
>> + rc = add_one_node(dn);
>> + if (rc)
>> + return rc;
>> +
>> + if (child) {
>> + rc = add_device_tree_nodes(child);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + if (sibling)
>> + rc = add_device_tree_nodes(sibling);
>> +
>> + return rc;
>> +}
>
> ... and this ...
>
>> +static int remove_one_node(struct device_node *dn)
>> +{
>> + struct device_node *parent = dn->parent;
>> + struct property *prop = dn->properties;
>> +
>> +#ifdef CONFIG_PROC_DEVICETREE
>> + while (prop) {
>> + remove_proc_entry(prop->name, dn->pde);
>> + prop = prop->next;
>> + }
>> +
>> + if (dn->pde)
>> + remove_proc_entry(dn->pde->name, parent->pde);
>> +#endif
>> +
>> + blocking_notifier_call_chain(&pSeries_reconfig_chain,
>> + PSERIES_RECONFIG_REMOVE, dn);
>> + of_detach_node(dn);
>> + of_node_put(dn); /* Must decrement the refcount */
>> +
>> + return 0;
>> +}
>
> ... and this ...
>
>> +static int _remove_device_tree_nodes(struct device_node *dn)
>> +{
>> + int rc;
>> +
>> + if (dn->child) {
>> + rc = _remove_device_tree_nodes(dn->child);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + if (dn->sibling) {
>> + rc = _remove_device_tree_nodes(dn->sibling);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + rc = remove_one_node(dn);
>> + return rc;
>> +}
>
> ... repeat myself ...
>
>> +int remove_device_tree_nodes(struct device_node *dn)
>> +{
>> + int rc;
>> +
>> + if (dn->child) {
>> + rc = _remove_device_tree_nodes(dn->child);
>> + if (rc)
>> + return rc;
>> + }
>> +
>> + rc = remove_one_node(dn);
>> + return rc;
>> +}
>
> ... should probably all go to something like drivers/of/dynamic.c or at
> least for now arch/powerpc/kernel/of_dynamic.c along with everything
> related to dynamically adding and removing nodes. I see that potentially
> useful for more than just DLPAR (though DLPAR is the only user right
> now) and should also all be prefixed with of_*
I agree, there should be at least a powerpc generic implementation of these
routines. The reason I put them here is that I am doing some oddities with
the next, child, and sibling pointers since they point to items not yet in
the device tree.
I saw that Grant Likely is doing updates to all of the of_* stuff right now,
would it be ok to have these routines here, renamed as dlpar_*, and look
to merge them in with Grant's updates when he finishes?
>
>> +#define DR_ENTITY_SENSE 9003
>> +#define DR_ENTITY_PRESENT 1
>> +#define DR_ENTITY_UNUSABLE 2
>> +#define ALLOCATION_STATE 9003
>> +#define ALLOC_UNUSABLE 0
>> +#define ALLOC_USABLE 1
>> +#define ISOLATION_STATE 9001
>> +#define ISOLATE 0
>> +#define UNISOLATE 1
>> +
>> +int acquire_drc(u32 drc_index)
>> +{
>> + int dr_status, rc;
>> +
>> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> + DR_ENTITY_SENSE, drc_index);
>> + if (rc || dr_status != DR_ENTITY_UNUSABLE)
>> + return -1;
>> +
>> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
>> + if (rc)
>> + return rc;
>> +
>> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> + if (rc) {
>> + rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> + return rc;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +int release_drc(u32 drc_index)
>> +{
>> + int dr_status, rc;
>> +
>> + rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
>> + DR_ENTITY_SENSE, drc_index);
>> + if (rc || dr_status != DR_ENTITY_PRESENT)
>> + return -1;
>> +
>> + rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
>> + if (rc)
>> + return rc;
>> +
>> + rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
>> + if (rc) {
>> + rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
>> + return rc;
>> + }
>> +
>> + return 0;
>> +}
>
> Both above should have a dlpar_* prefix
will do.
>
>> +static int pseries_dlpar_init(void)
>> +{
>> + if (!machine_is(pseries))
>> + return 0;
>> +
>> + return 0;
>> +}
>> +device_initcall(pseries_dlpar_init);
>
> What the point ? :-)
Yeah, its a bit odd looking but later patches actually add code to the init routine
to set up memory probe/release and cpu probe/release handlers.
I'll look to add ifdef's around the initcall for cases where no work is to be done.
-Nathan Fontenot
>
> Cheers
> Ben.
>
More information about the Linuxppc-dev
mailing list