[PATCH v4.7 6/7] net/ncsi: Rework the channel monitoring

Joel Stanley joel at jms.id.au
Wed Sep 28 15:50:32 AEST 2016


On Wed, 2016-09-28 at 12:48 +1000, Gavin Shan wrote:
> The original NCSI channel monitoring was implemented based on a
> backoff algorithm: the GLS response should be received in the
> specified interval. Otherwise, the channel is regarded as dead
> and failover should be taken if current channel is an active one.
> There are several problems in the implementation: (A) On BCM5718,
> we found when the IID (Instance ID) in the GLS command packet
> changes from 255 to 1, the response corresponding to IID#1 never
> comes in. It means we cannot make the unfair judgement that the
> channel is dead when one response is missed. (B) The code's
> readability should be improved. (C) We should do failover when
> current channel is active one and the channel monitoring should
> be marked as disabled before doing failover.
> 
> This reworks the channel monitoring to address all above issues.
> The fields for channel monitoring is put into separate struct
> and the state of channel monitoring is predefined. The channel
> is regarded alive if the network controller responses to one of
> two GLS commands or both of them in 5 seconds.
> 
> Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
> ---
>  net/ncsi/internal.h    | 12 +++++++++---
>  net/ncsi/ncsi-manage.c | 46 ++++++++++++++++++++++++++------------
> --------
>  net/ncsi/ncsi-rsp.c    |  2 +-
>  3 files changed, 36 insertions(+), 24 deletions(-)
> 
> diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
> index a2e3af9..d9b3705 100644
> --- a/net/ncsi/internal.h
> +++ b/net/ncsi/internal.h
> @@ -187,9 +187,15 @@ struct ncsi_channel {
>  	struct ncsi_channel_mode    modes[NCSI_MODE_MAX];
>  	struct ncsi_channel_filter  *filters[NCSI_FILTER_MAX];
>  	struct ncsi_channel_stats   stats;
> -	struct timer_list           timer;	/* Link monitor
> timer  */
> -	bool                        enabled;	/* Timer is
> enabled    */
> -	unsigned int                timeout;	/* Times of
> timeout    */
> +	struct {
> +		struct timer_list   timer;	/* Link monitor
> timer  */
> +		bool                enabled;	/* Timer is
> enabled    */
> +		unsigned int        state;	/* Link monitor
> state  */

These comments don't add much value.

> +#define NCSI_CHANNEL_MONITOR_START	0
> +#define NCSI_CHANNEL_MONITOR_RETRY	1
> +#define NCSI_CHANNEL_MONITOR_WAIT	2
> +#define NCSI_CHANNEL_MONITOR_WAIT_MAX	5
> +	} monitor;
>  	struct list_head            node;
>  	struct list_head            link;
>  };
> diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
> index 3019432..5813ebe 100644
> --- a/net/ncsi/ncsi-manage.c
> +++ b/net/ncsi/ncsi-manage.c
> @@ -165,13 +165,13 @@ static void ncsi_channel_monitor(unsigned long
> data)
>  	struct ncsi_dev_priv *ndp = np->ndp;
>  	struct ncsi_cmd_arg nca;
>  	bool enabled;
> -	unsigned int timeout;
> +	unsigned int monitor_state;
>  	unsigned long flags;
>  	int state, ret;
>  
>  	spin_lock_irqsave(&nc->lock, flags);
> -	timeout = nc->timeout;
> -	enabled = nc->enabled;
> +	monitor_state = nc->monitor.state;
> +	enabled = nc->monitor.enabled;
>  	spin_unlock_irqrestore(&nc->lock, flags);
>  
>  	if (!enabled || !list_empty(&nc->link))
> @@ -181,7 +181,9 @@ static void ncsi_channel_monitor(unsigned long
> data)
>  	if (state != NCSI_CHANNEL_INACTIVE && state !=
> NCSI_CHANNEL_ACTIVE)
>  		return;
>  
> -	if (!(timeout % 2)) {
> +	switch (monitor_state) {
> +	case NCSI_CHANNEL_MONITOR_START:
> +	case NCSI_CHANNEL_MONITOR_RETRY:
>  		nca.ndp = ndp;
>  		nca.package = np->id;
>  		nca.channel = nc->id;
> @@ -193,15 +195,19 @@ static void ncsi_channel_monitor(unsigned long
> data)
>  				   ret);
>  			return;
>  		}
> -	}
>  
> -	if (timeout + 1 >= 3) {
> +		break;
> +	case NCSI_CHANNEL_MONITOR_WAIT ...
> NCSI_CHANNEL_MONITOR_WAIT_MAX:

It's not clear why you have the MAX_WAIT state if it's doing the same
thing as WAIT.

> +		break;
> +	default:
>  		if (!(ndp->flags & NCSI_DEV_HWA) &&
> -		    state == NCSI_CHANNEL_ACTIVE)
> +		    state == NCSI_CHANNEL_ACTIVE) {
>  			ncsi_report_link(ndp, true);
> +			ndp->flags |= NCSI_DEV_RESHUFFLE;
> +		}
>  
>  		spin_lock_irqsave(&ndp->lock, flags);
> -		atomic_set(&nc->state, NCSI_CHANNEL_INACTIVE);
> +		atomic_set(&nc->state, NCSI_CHANNEL_ACTIVE);
>  		list_add_tail_rcu(&nc->link, &ndp->channel_queue);
>  		spin_unlock_irqrestore(&ndp->lock, flags);
>  		ncsi_process_next_channel(ndp);
> @@ -209,10 +215,9 @@ static void ncsi_channel_monitor(unsigned long
> data)
>  	}
>  
>  	spin_lock_irqsave(&nc->lock, flags);
> -	nc->timeout = timeout + 1;
> -	nc->enabled = true;
> +	nc->monitor.state++;
>  	spin_unlock_irqrestore(&nc->lock, flags);
> -	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout /
> 2)));
> +	mod_timer(&nc->monitor.timer, jiffies + HZ);
>  }
>  
>  void ncsi_start_channel_monitor(struct ncsi_channel *nc)
> @@ -220,12 +225,12 @@ void ncsi_start_channel_monitor(struct
> ncsi_channel *nc)
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&nc->lock, flags);
> -	WARN_ON_ONCE(nc->enabled);
> -	nc->timeout = 0;
> -	nc->enabled = true;
> +	WARN_ON_ONCE(nc->monitor.enabled);
> +	nc->monitor.enabled = true;
> +	nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
>  	spin_unlock_irqrestore(&nc->lock, flags);
>  
> -	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout /
> 2)));
> +	mod_timer(&nc->monitor.timer, jiffies + HZ);
>  }
>  
>  void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
> @@ -233,14 +238,14 @@ void ncsi_stop_channel_monitor(struct
> ncsi_channel *nc)
>  	unsigned long flags;
>  
>  	spin_lock_irqsave(&nc->lock, flags);
> -	if (!nc->enabled) {
> +	if (!nc->monitor.enabled) {
>  		spin_unlock_irqrestore(&nc->lock, flags);
>  		return;
>  	}
> -	nc->enabled = false;
> +	nc->monitor.enabled = false;
>  	spin_unlock_irqrestore(&nc->lock, flags);
>  
> -	del_timer_sync(&nc->timer);
> +	del_timer_sync(&nc->monitor.timer);
>  }
>  
>  struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
> @@ -269,8 +274,9 @@ struct ncsi_channel *ncsi_add_channel(struct
> ncsi_package *np, unsigned char id)
>  	nc->id = id;
>  	nc->package = np;
>  	atomic_set(&nc->state, NCSI_CHANNEL_INACTIVE);
> -	nc->enabled = false;
> -	setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned
> long)nc);
> +	nc->monitor.enabled = false;
> +	setup_timer(&nc->monitor.timer,
> +		    ncsi_channel_monitor, (unsigned long)nc);
>  	spin_lock_init(&nc->lock);
>  	INIT_LIST_HEAD(&nc->link);
>  	for (index = 0; index < NCSI_CAP_MAX; index++)
> diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
> index 1429e8b..eec88c7 100644
> --- a/net/ncsi/ncsi-rsp.c
> +++ b/net/ncsi/ncsi-rsp.c
> @@ -322,7 +322,7 @@ static int ncsi_rsp_handler_gls(struct
> ncsi_request *nr)
>  
>  	/* Reset the channel monitor if it has been enabled */
>  	spin_lock_irqsave(&nc->lock, flags);
> -	nc->timeout = 0;
> +	nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
>  	spin_unlock_irqrestore(&nc->lock, flags);
>  
>  	return 0;
> 


More information about the openbmc mailing list