[Skiboot] [PATCH v2 2/2] prd: PRD framework

Thu Feb 12 16:35:24 AEDT 2015

On 02/12/2015 09:36 AM, Stewart Smith wrote:
> Neelesh Gupta <neelegup at linux.vnet.ibm.com> writes:
>> diff --git a/hw/prd.c b/hw/prd.c
>> new file mode 100644
>> index 0000000..b574558
>> --- /dev/null
>> +++ b/hw/prd.c
>> @@ -0,0 +1,293 @@
>> +struct prd_node {
>> +	struct list_node	link;
>> +	uint32_t		proc;
>> +	struct opal_prd_msg	prd_msg;
>> +};
>> +
>> +static LIST_HEAD(prd_free_list);
>> +static LIST_HEAD(prd_in_use_list);
>> +static uint32_t token;
>> +static int (*prd_functions[OPAL_PRD_MSG_TYPE_MAX])(void);
>> +static struct lock prd_lock = LOCK_UNLOCKED;
>> +
>> +/* Entry from the below HW */
>> +void prd_interrupt(uint32_t proc, enum opal_prd_msg_type type)
>> +{
>> +	struct prd_node *node;
>> +
>> +	lock(&prd_lock);
>> +	node = list_pop(&prd_free_list, struct prd_node, link);
>> +	if (!node) { /* Free list exhausted */
>> +		node = zalloc(sizeof(*node));
>> +		if (!node) {
>> +			prlog(PR_ERR, "Failed to allocate prd node\n");
>> +			unlock(&prd_lock);
>> +			return;
>> +		}
>> +	}
>> +
>> +	node->proc = proc;
>> +	node->prd_msg.type = type;
>> +	node->prd_msg.token = ++token;
>> +
>> +	list_add_tail(&prd_in_use_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	if (prd_functions[type])
>> +		prd_functions[type]();
>> +}
>> +
>> +static int prd_msg_attn_ack(void)
>> +{
>> +	struct prd_node *node_attn, *node_ack;
>> +	int rc;
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node_ack, link)
>> +		if (node_ack->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN_ACK)
>> +			break;
>> +
>> +	if (!node_ack) {
>> +		unlock(&prd_lock);
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	list_for_each(&prd_in_use_list, node_attn, link)
>> +		/* prd node of ATTN type that matches the token of ATTN_ACK */
>> +		if (node_attn->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN &&
>> +		    node_attn->prd_msg.token == node_ack->prd_msg.token)
>> +			break;
>> +
>> +	if (!node_attn) {
>> +		unlock(&prd_lock);
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	/* ATTN acknowledged by the host, unmask the IPOLL */
>> +	rc = xscom_write(node_attn->proc, PRD_IPOLL_MASK_REG,
>> +			 ~(node_ack->prd_msg.attn_ack.ipoll_ack) &
>> +			 PRD_IPOLL_MASK);
>> +
>> +	/* Done. Now move both the ATTN & ATTN_ACK nodes to the free list */
>> +	list_del(&node_attn->link);
>> +	list_add_tail(&prd_free_list, &node_attn->link);
>> +
>> +	list_del(&node_ack->link);
>> +	list_add_tail(&prd_free_list, &node_ack->link);
>> +	unlock(&prd_lock);
>> +
>> +	return rc;
>> +}
>> +
>> +static int prd_msg_attn(void)
>> +{
>> +	uint64_t status, mask;
>> +	struct prd_node *node;
>> +	uint64_t *prd_msg;
>> +	int rc;
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node, link)
>> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_ATTN)
>> +			break;
>> +	unlock(&prd_lock);
>> +
>> +	if (!node)
>> +		return OPAL_RESOURCE;
>> +
>> +	rc = xscom_read(node->proc, PRD_ERROR_STATUS, &status);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to read the ipoll status\n");
>> +		goto exit;
>> +	}
>> +
>> +	/* Mask IPOLL for all the bits set in the 'status' */
>> +	mask = status & PRD_IPOLL_MASK;
>> +	rc = xscom_write(node->proc, PRD_IPOLL_MASK_REG, mask);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to mask the IPOLL\n");
>> +		goto exit;
>> +	}
>> +
>> +	/* Fill up the attention fields */
>> +	node->prd_msg.attn.proc = node->proc;		/* params[1] */
>> +	node->prd_msg.attn.ipoll_status = status;	/* params[2] */
>> +	node->prd_msg.attn.ipoll_mask = mask;		/* params[3] */
>> +
>> +	prd_msg = (uint64_t *)&node->prd_msg;
>> +
>> +	rc = opal_queue_msg(OPAL_PRD_MSG, NULL, NULL, prd_msg[0], prd_msg[1],
>> +			    prd_msg[2], prd_msg[3]);
>> +	if (rc) {
>> +		prlog(PR_ERR, "Failed to queue up the ATTN\n");
>> +		goto exit;
>> +	}
>> +
>> +	return 0;
>> +
>> +	/* In the error case, delete the node from 'in_use' list and add it
>> +	 * to the 'free' list as the ACK is never going to come from the host
>> +	 */
>> +exit:
>> +	lock(&prd_lock);
>> +	list_del(&node->link);
>> +	list_add_tail(&prd_free_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return rc;
>> +}
>> +
>> +static int prd_msg_finish(void)
>> +{
>> +	struct proc_chip *chip;
>> +	struct prd_node *node;
>> +	int rc;
>> +
>> +	/* Mask the interrupts on all the cores */
>> +	for_each_chip(chip) {
>> +		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG, PRD_IPOLL_MASK);
>> +		if (rc)
>> +			prlog(PR_ERR, "Failed to mask the IPOLL on %d chip\n",
>> +			      chip->id);
>> +	}
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node, link)
>> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_FINI)
>> +			break;
>> +
>> +	if (!node) { /* should not happen though */
>> +		unlock(&prd_lock);
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	list_del(&node->link);
>> +	list_add_tail(&prd_free_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return 0;
>> +}
>> +
>> +static int prd_msg_init(void)
>> +{
>> +	struct proc_chip *chip;
>> +	struct prd_node *node;
>> +	/* XXX We will use it for enabling the functionalities
>> +	 * uint32_t version;
>> +	 */
>> +	uint64_t ipoll;
>> +	int rc;
>> +
>> +	lock(&prd_lock);
>> +	list_for_each(&prd_in_use_list, node, link)
>> +		if (node->prd_msg.type == OPAL_PRD_MSG_TYPE_INIT)
>> +			break;
>> +	unlock(&prd_lock);
> Why are we doing this look instead of having prd_msg_XXX() take a
> prd_msg as a parameter?
>
> It seems as though we're maintaining the prd_in_use list for no real
> reason? we only add one to it per interrupt and then immediately
> process, same with from an OPAL call.

The node gets processed where linux makes an OPAL call, apparently it 
will be
immediate but depends on the linux when it consumes it. Moreover, I put the
list thinking that there could be simultaneous ATTNs from multiple procs or
interrupts of different 'prd_msg_type'..

>
>> +
>> +	if (!node)
>> +		return OPAL_RESOURCE;
>> +
>> +	ipoll = node->prd_msg.init.ipoll;
>> +	/* Unmask these ATTNs which are supported */
>> +	for_each_chip(chip) {
>> +		rc = xscom_write(chip->id, PRD_IPOLL_MASK_REG,
>> +				 ~ipoll & PRD_IPOLL_MASK);
>> +		if (rc) {
>> +			prlog(PR_ERR, "Failed to unmask the IPOLL on %d chip\n",
>> +			      chip->id);
>> +			rc = OPAL_HARDWARE;
>> +			goto exit;
>> +		}
>> +	}
>> +
>> +	memset(prd_functions, 0, sizeof(prd_functions));
>> +
>> +	/*
>> +	 * XXX
>> +	 * version = node->prd_msg.init.version;
>> +	 *
>> +	 * Use the version to initialise the prd_functions[]()
>> +	 * supported by the application, otherwise NULL.
>> +	 * Currently, supporting 'ATTN' & 'ATTN_ACK' in default
>> +	 */
>> +	prd_functions[OPAL_PRD_MSG_TYPE_ATTN] = prd_msg_attn;
>> +	prd_functions[OPAL_PRD_MSG_TYPE_ATTN_ACK] = prd_msg_attn_ack;
>> +
>> +exit:
>> +	lock(&prd_lock);
>> +	list_del(&node->link);
>> +	list_add_tail(&prd_free_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return rc;
>> +}
>> +
>> +/* Entry from the host above */
>> +static int64_t opal_prd_msg(uint64_t *buffer)
> Why not struct opal_prd_msg *buffer as arg?

Yes, will change.

>
>> +{
>> +	struct opal_prd_msg prd_msg;
>> +	struct prd_node *node;
>> +
>> +	memcpy(&prd_msg, buffer, sizeof(prd_msg));
>> +
>> +	if (!prd_functions[prd_msg.type])
>> +		return OPAL_UNSUPPORTED;
> Please range/validity check anything that's coming in through an OPAL
> call as much as possible.

Yes, will add these checks.

>
>> +	lock(&prd_lock);
>> +	node = list_pop(&prd_free_list, struct prd_node, link);
>> +	if (!node) { /* Free list exhausted */
>> +		node = zalloc(sizeof(*node));
>> +		if (!node) {
>> +			prlog(PR_ERR, "Failed to allocate prd node\n");
>> +			unlock(&prd_lock);
>> +			return OPAL_NO_MEM;
>> +		}
>> +	}
> Why not just dynamically allocate everything? Why have a free list at
> all if we're just going to allocate until ENOMEM?

Have sufficiently allocated during init() and added them to the free list to
cater to multiple messages which should not exhaust the free list and reduce
fragmentation, but if it does then it is not an error and we dynamically 
allocate..
further add and maintain it in the list.

>
> Also, I see extra allocation but nowhere where these extra ones are freed?

These are not freed, added and maintained as part of the 'free' list.

>
>> +	memcpy(&node->prd_msg, buffer, sizeof(node->prd_msg));
>> +	list_add_tail(&prd_in_use_list, &node->link);
>> +	unlock(&prd_lock);
>> +
>> +	return prd_functions[node->prd_msg.type]();
>> +}
>> +opal_call(OPAL_PRD_MSG, opal_prd_msg, 1);
>> +
>> +void prd_init(void)
>> +{
>> +	struct prd_node *node;
>> +	int i;
>> +
>> +	node = zalloc(sizeof(*node) * OPAL_PRD_MSG_TYPE_MAX);
>> +	if (!node)
>> +		return;
>> +
>> +	for (i = 0; i < OPAL_PRD_MSG_TYPE_MAX; i++)
>> +		list_add_tail(&prd_free_list, &node->link);
>> +
>> +	/* Basic init and finish functions */
>> +	prd_functions[OPAL_PRD_MSG_TYPE_INIT] = prd_msg_init;
>> +	prd_functions[OPAL_PRD_MSG_TYPE_FINI] = prd_msg_finish;
>> +}
> What's the logic behind preallocating OPAL_PRD_MSG_TYPE_MAX amount? Why
> not just 1 or some other number?

Considering these many types of prd messages can be handled 
simultaneously without
having to go for dynamic allocation when they arrive .. just pop from 
'free' list, move
to 'in_use' list and move back to the 'free' list when done.

>
>> --- a/include/platform.h
>> +++ b/include/platform.h
>> @@ -100,6 +100,8 @@ struct platform {
>>   	 */
>>   	void		(*external_irq)(unsigned int chip_id);
>>   
>> +	void		(*local_irq)(unsigned int chip_id);
>> +
>>   	/*
>>   	 * nvram ops.
>>   	 *
> Why is this a platform op? It seems identical for the time being... or
> is it just missing in rhesus?

Is not the prd interrupts platform dependent? they reach to OPAL thru 
same channel
but could be of different nature between the platforms..?

Neelesh.