[PATCH v4.7 6/7] net/ncsi: Rework the channel monitoring

Gavin Shan gwshan at linux.vnet.ibm.com
Wed Sep 28 17:18:03 AEST 2016


On Wed, Sep 28, 2016 at 03:20:32PM +0930, Joel Stanley wrote:
>On Wed, 2016-09-28 at 12:48 +1000, Gavin Shan wrote:
>> The original NCSI channel monitoring was implemented based on a
>> backoff algorithm: the GLS response should be received in the
>> specified interval. Otherwise, the channel is regarded as dead
>> and failover should be taken if current channel is an active one.
>> There are several problems in the implementation: (A) On BCM5718,
>> we found when the IID (Instance ID) in the GLS command packet
>> changes from 255 to 1, the response corresponding to IID#1 never
>> comes in. It means we cannot make the unfair judgement that the
>> channel is dead when one response is missed. (B) The code's
>> readability should be improved. (C) We should do failover when
>> current channel is active one and the channel monitoring should
>> be marked as disabled before doing failover.
>> 
>> This reworks the channel monitoring to address all above issues.
>> The fields for channel monitoring is put into separate struct
>> and the state of channel monitoring is predefined. The channel
>> is regarded alive if the network controller responses to one of
>> two GLS commands or both of them in 5 seconds.
>> 
>> Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
>> ---
>>  net/ncsi/internal.h    | 12 +++++++++---
>>  net/ncsi/ncsi-manage.c | 46 ++++++++++++++++++++++++++------------
>> --------
>>  net/ncsi/ncsi-rsp.c    |  2 +-
>>  3 files changed, 36 insertions(+), 24 deletions(-)
>> 
>> diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
>> index a2e3af9..d9b3705 100644
>> --- a/net/ncsi/internal.h
>> +++ b/net/ncsi/internal.h
>> @@ -187,9 +187,15 @@ struct ncsi_channel {
>>  	struct ncsi_channel_mode    modes[NCSI_MODE_MAX];
>>  	struct ncsi_channel_filter  *filters[NCSI_FILTER_MAX];
>>  	struct ncsi_channel_stats   stats;
>> -	struct timer_list           timer;	/* Link monitor
>> timer  */
>> -	bool                        enabled;	/* Timer is
>> enabled    */
>> -	unsigned int                timeout;	/* Times of
>> timeout    */
>> +	struct {
>> +		struct timer_list   timer;	/* Link monitor
>> timer  */
>> +		bool                enabled;	/* Timer is
>> enabled    */
>> +		unsigned int        state;	/* Link monitor
>> state  */
>
>These comments don't add much value.
>

Joel, thanks for your comments. I agree and will drop those comments
in next revision.

>> +#define NCSI_CHANNEL_MONITOR_START	0
>> +#define NCSI_CHANNEL_MONITOR_RETRY	1
>> +#define NCSI_CHANNEL_MONITOR_WAIT	2
>> +#define NCSI_CHANNEL_MONITOR_WAIT_MAX	5
>> +	} monitor;
>>  	struct list_head            node;
>>  	struct list_head            link;
>>  };
>> diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
>> index 3019432..5813ebe 100644
>> --- a/net/ncsi/ncsi-manage.c
>> +++ b/net/ncsi/ncsi-manage.c
>> @@ -165,13 +165,13 @@ static void ncsi_channel_monitor(unsigned long
>> data)
>>  	struct ncsi_dev_priv *ndp = np->ndp;
>>  	struct ncsi_cmd_arg nca;
>>  	bool enabled;
>> -	unsigned int timeout;
>> +	unsigned int monitor_state;
>>  	unsigned long flags;
>>  	int state, ret;
>>  
>>  	spin_lock_irqsave(&nc->lock, flags);
>> -	timeout = nc->timeout;
>> -	enabled = nc->enabled;
>> +	monitor_state = nc->monitor.state;
>> +	enabled = nc->monitor.enabled;
>>  	spin_unlock_irqrestore(&nc->lock, flags);
>>  
>>  	if (!enabled || !list_empty(&nc->link))
>> @@ -181,7 +181,9 @@ static void ncsi_channel_monitor(unsigned long
>> data)
>>  	if (state != NCSI_CHANNEL_INACTIVE && state !=
>> NCSI_CHANNEL_ACTIVE)
>>  		return;
>>  
>> -	if (!(timeout % 2)) {
>> +	switch (monitor_state) {
>> +	case NCSI_CHANNEL_MONITOR_START:
>> +	case NCSI_CHANNEL_MONITOR_RETRY:
>>  		nca.ndp = ndp;
>>  		nca.package = np->id;
>>  		nca.channel = nc->id;
>> @@ -193,15 +195,19 @@ static void ncsi_channel_monitor(unsigned long
>> data)
>>  				   ret);
>>  			return;
>>  		}
>> -	}
>>  
>> -	if (timeout + 1 >= 3) {
>> +		break;
>> +	case NCSI_CHANNEL_MONITOR_WAIT ...
>> NCSI_CHANNEL_MONITOR_WAIT_MAX:
>
>It's not clear why you have the MAX_WAIT state if it's doing the same
>thing as WAIT.
>

@nc->monitor.state tracks two types of information: (A) the current state
of the channel monitor; (B) the time since last GLS reponse was received
successfully. The general idea behind is: we have two GLS commands sent
and at least one response should be received in 5 seconds, which is
represented by NCSI_CHANNEL_MONITOR_WAIT_MAX.

>> +		break;
>> +	default:
>>  		if (!(ndp->flags & NCSI_DEV_HWA) &&
>> -		    state == NCSI_CHANNEL_ACTIVE)
>> +		    state == NCSI_CHANNEL_ACTIVE) {
>>  			ncsi_report_link(ndp, true);
>> +			ndp->flags |= NCSI_DEV_RESHUFFLE;
>> +		}
>>  
>>  		spin_lock_irqsave(&ndp->lock, flags);
>> -		atomic_set(&nc->state, NCSI_CHANNEL_INACTIVE);
>> +		atomic_set(&nc->state, NCSI_CHANNEL_ACTIVE);
>>  		list_add_tail_rcu(&nc->link, &ndp->channel_queue);
>>  		spin_unlock_irqrestore(&ndp->lock, flags);
>>  		ncsi_process_next_channel(ndp);
>> @@ -209,10 +215,9 @@ static void ncsi_channel_monitor(unsigned long
>> data)
>>  	}
>>  
>>  	spin_lock_irqsave(&nc->lock, flags);
>> -	nc->timeout = timeout + 1;
>> -	nc->enabled = true;
>> +	nc->monitor.state++;
>>  	spin_unlock_irqrestore(&nc->lock, flags);
>> -	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout /
>> 2)));
>> +	mod_timer(&nc->monitor.timer, jiffies + HZ);
>>  }
>>  
>>  void ncsi_start_channel_monitor(struct ncsi_channel *nc)
>> @@ -220,12 +225,12 @@ void ncsi_start_channel_monitor(struct
>> ncsi_channel *nc)
>>  	unsigned long flags;
>>  
>>  	spin_lock_irqsave(&nc->lock, flags);
>> -	WARN_ON_ONCE(nc->enabled);
>> -	nc->timeout = 0;
>> -	nc->enabled = true;
>> +	WARN_ON_ONCE(nc->monitor.enabled);
>> +	nc->monitor.enabled = true;
>> +	nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
>>  	spin_unlock_irqrestore(&nc->lock, flags);
>>  
>> -	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout /
>> 2)));
>> +	mod_timer(&nc->monitor.timer, jiffies + HZ);
>>  }
>>  
>>  void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
>> @@ -233,14 +238,14 @@ void ncsi_stop_channel_monitor(struct
>> ncsi_channel *nc)
>>  	unsigned long flags;
>>  
>>  	spin_lock_irqsave(&nc->lock, flags);
>> -	if (!nc->enabled) {
>> +	if (!nc->monitor.enabled) {
>>  		spin_unlock_irqrestore(&nc->lock, flags);
>>  		return;
>>  	}
>> -	nc->enabled = false;
>> +	nc->monitor.enabled = false;
>>  	spin_unlock_irqrestore(&nc->lock, flags);
>>  
>> -	del_timer_sync(&nc->timer);
>> +	del_timer_sync(&nc->monitor.timer);
>>  }
>>  
>>  struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
>> @@ -269,8 +274,9 @@ struct ncsi_channel *ncsi_add_channel(struct
>> ncsi_package *np, unsigned char id)
>>  	nc->id = id;
>>  	nc->package = np;
>>  	atomic_set(&nc->state, NCSI_CHANNEL_INACTIVE);
>> -	nc->enabled = false;
>> -	setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned
>> long)nc);
>> +	nc->monitor.enabled = false;
>> +	setup_timer(&nc->monitor.timer,
>> +		    ncsi_channel_monitor, (unsigned long)nc);
>>  	spin_lock_init(&nc->lock);
>>  	INIT_LIST_HEAD(&nc->link);
>>  	for (index = 0; index < NCSI_CAP_MAX; index++)
>> diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
>> index 1429e8b..eec88c7 100644
>> --- a/net/ncsi/ncsi-rsp.c
>> +++ b/net/ncsi/ncsi-rsp.c
>> @@ -322,7 +322,7 @@ static int ncsi_rsp_handler_gls(struct
>> ncsi_request *nr)
>>  
>>  	/* Reset the channel monitor if it has been enabled */
>>  	spin_lock_irqsave(&nc->lock, flags);
>> -	nc->timeout = 0;
>> +	nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
>>  	spin_unlock_irqrestore(&nc->lock, flags);
>>  
>>  	return 0;
>> 
>



More information about the openbmc mailing list