[PATCH net-next v3 6/6] net/ncsi: Configure multi-package, multi-channel modes with failover

Samuel Mendoza-Jonas sam at mendozajonas.com
Fri Nov 9 13:17:21 AEDT 2018


On Thu, 2018-11-08 at 22:48 +0000, Justin.Lee1 at Dell.com wrote:
> Hi Samuel,
> 
> For multi-package and multi-channel case, channel seems to be select correctly. Expect that,
> I still see the timing issue for back-to-back netlink command. Due to that, channel might be
> set to invisible state. Please refer to ncsi0 and ncsi2 below. The channel state is set to 3.
> 
> cat /sys/kernel/debug/ncsi_protocol/ncsi_device_status
> IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
> =====================================================================
>   2   eth2   ncsi0  000 000 1  1  1  1  1  0  0  3  0  1  1  1  0  1
>   2   eth2   ncsi1  000 001 0  0  1  1  1  0  0  1  0  1  1  1  0  1
>   2   eth2   ncsi2  001 000 1  0  1  1  1  1  0  3  0  1  1  1  0  1
>   2   eth2   ncsi3  001 001 1  0  1  1  1  1  0  2  1  1  1  1  0  1
> =====================================================================
> MP: Multi-mode Package  WP: Whitelist Package
> MC: Multi-mode Channel  WC: Whitelist Channel
> PC: Primary Channel     CS: Channel State IA/A/IV 1/2/3
> PS: Poll Status         LS: Link Status
> RU: Running             CR: Carrier OK
> NQ: Queue Stopped       HA: Hardware Arbitration
> 
> The timing issue is not only happening in application. If I use using the following way
> to send the request, I can see the issue as well. 
> 
> ncsi_netlink -l 2 -a 0x01 -m; ncsi_netlink -l 2 -p 0 -b 0x03 -m; ncsi_netlink -l 2 -p 1 -b 0x00 -m;
> ncsi_netlink -l 2 -a 0x03 -m; ncsi_netlink -l 2 -p 0 -b 0x00 -m; ncsi_netlink -l 2 -p 1 -b 0x03 -m;

This actually recreates for me as well; I see now what you mean about
channels getting stuck in the invisible state. I believe I've narrowed
down the issue. I've pasted an additional patch below if you are able to
test on your machine.

> 
> 
> Also, there is one issue below for non-multi-package/non-multi-channel case.
> 
> Thanks,
> Justin
> 
> 
> > @@ -1008,32 +1164,49 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
> >  
> >  			ncm = &nc->modes[NCSI_MODE_LINK];
> >  			if (ncm->data[2] & 0x1) {
> > -				spin_unlock_irqrestore(&nc->lock, flags);
> >  				found = nc;
> > -				goto out;
> > +				with_link = true;
> >  			}
> >  
> > -			spin_unlock_irqrestore(&nc->lock, flags);
> > +			/* If multi_channel is enabled configure all valid
> > +			 * channels whether or not they currently have link
> > +			 * so they will have AENs enabled.
> > +			 */
> > +			if (with_link || np->multi_channel) {
> > +				spin_lock_irqsave(&ndp->lock, flags);
> > +				list_add_tail_rcu(&nc->link,
> > +						  &ndp->channel_queue);
> > +				spin_unlock_irqrestore(&ndp->lock, flags);
> > +
> > +				netdev_dbg(ndp->ndev.dev,
> > +					   "NCSI: Channel %u added to queue (link %s)\n",
> > +					   nc->id,
> > +					   ncm->data[2] & 0x1 ? "up" : "down");
> > +			}
> > +
> > +			spin_unlock_irqrestore(&nc->lock, cflags);
> > +
> > +			if (with_link && !np->multi_channel)
> > +				break;
> 
> The line needs to change to "goto found". If not, all channels with link will be added
> even if the multi-channel is not enabled for that package. The ncsi1 below is enabled.
> There is no netlink command sent to enable multi-package or multi-channel.
> 
> IFIDX IFNAME NAME   PID CID RX TX MP MC WP WC PC CS PS LS RU CR NQ HA
> =====================================================================
>   2   eth2   ncsi0  000 000 1  1  0  0  1  1  0  2  1  1  1  1  0  1
>   2   eth2   ncsi1  000 001 1  0  0  0  1  1  0  2  1  1  1  1  0  1
>   2   eth2   ncsi2  001 000 0  0  0  0  1  1  0  1  0  1  1  1  0  1
>   2   eth2   ncsi3  001 001 0  0  0  0  1  1  0  1  0  1  1  1  0  1
> =====================================================================
> MP: Multi-mode Package  WP: Whitelist Package
> MC: Multi-mode Channel  WC: Whitelist Channel
> PC: Primary Channel     CS: Channel State IA/A/IV 1/2/3
> PS: Poll Status         LS: Link Status
> RU: Running             CR: Carrier OK
> NQ: Queue Stopped       HA: Hardware Arbitration
> 
> >  		}
> > +		if (with_link && !ndp->multi_package)
> > +			break;
> >  	}
> 
> found:

This *may* be part of the above issue, I don't see this in normal
operation. The combination of (with_link && !np->multi_channel) and
(with_link && !ndp->multi_package) should prevent additional channels
being added without the need for 'goto found'. Please let me know if you
still see it with the extra patch.

> 
> After applying this change, I notice that if there is no link available to BMC when BMC
> starts, NC-SI can't properly configure channel once I plug in the Ethernet cable. 
> 
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_aen_handler_lsc() - pkg 0 ch 0 state up
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_aen_handler_lsc() - had_link 0, has_link 1, chained 0
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_stop_channel_monitor() - pkg 0 ch 0
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_process_next_channel()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_process_next_channel() - pkg 0 ch 0 INVISIBLE
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_process_next_channel() - suspending pkg 0 ch 0
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0400 select
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0403 dc
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0404 deselect
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0405 done
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_rsp_handler_dp() - pkg 0 ch 0 INACTIVE
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_rsp_handler_dp() - pkg 0 ch 1 INACTIVE
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_dev_work()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 state 0406 deselect
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_suspend_channel() - pkg 0 ch 0 INACTIVE
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_process_next_channel()
> npcm7xx-emc f0825000.eth eth2: NCSI: ncsi_process_next_channel() - No more channels to process
> npcm7xx-emc f0825000.eth eth2: NCSI interface down

Good find, there was a corner case in the LSC AEN handler changes that
led to this, I've fixed this in the patch as well. Thanks for testing!


>From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Samuel Mendoza-Jonas <sam at mendozajonas.com>
Date: Fri, 9 Nov 2018 13:11:03 +1100
Subject: [PATCH] net/ncsi: Reset state fixes, single-channel LSC

---
 net/ncsi/ncsi-aen.c    |  8 +++++---
 net/ncsi/ncsi-manage.c | 19 +++++++++++++++----
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index 39c2e9eea2ba..034cb1dc5566 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -93,14 +93,16 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
 	if ((had_link == has_link) || chained)
 		return 0;
 
-	if (!ndp->multi_package && !nc->package->multi_channel) {
-		if (had_link)
-			ndp->flags |= NCSI_DEV_RESHUFFLE;
+	if (!ndp->multi_package && !nc->package->multi_channel && had_link) {
+		ndp->flags |= NCSI_DEV_RESHUFFLE;
 		ncsi_stop_channel_monitor(nc);
 		spin_lock_irqsave(&ndp->lock, flags);
 		list_add_tail_rcu(&nc->link, &ndp->channel_queue);
 		spin_unlock_irqrestore(&ndp->lock, flags);
 		return ncsi_process_next_channel(ndp);
+	} else {
+		/* Configured channel came up */
+		return 0;
 	}
 
 	if (had_link) {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index fa3c2144f5ba..92e59f07f9a7 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1063,17 +1063,17 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 	case ncsi_dev_state_config_done:
 		netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n",
 			   nc->id);
+		spin_lock_irqsave(&nc->lock, flags);
+		nc->state = NCSI_CHANNEL_ACTIVE;
+
 		if (ndp->flags & NCSI_DEV_RESET) {
 			/* A reset event happened during config, start it now */
-			spin_lock_irqsave(&nc->lock, flags);
 			nc->reconfigure_needed = false;
 			spin_unlock_irqrestore(&nc->lock, flags);
-			nd->state = ncsi_dev_state_functional;
 			ncsi_reset_dev(nd);
 			break;
 		}
 
-		spin_lock_irqsave(&nc->lock, flags);
 		if (nc->reconfigure_needed) {
 			/* This channel's configuration has been updated
 			 * part-way during the config state - start the
@@ -1092,7 +1092,6 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
 			break;
 		}
 
-		nc->state = NCSI_CHANNEL_ACTIVE;
 		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
 			hot_nc = nc;
 		} else {
@@ -1803,6 +1802,18 @@ int ncsi_reset_dev(struct ncsi_dev *nd)
 			spin_unlock_irqrestore(&ndp->lock, flags);
 			return 0;
 		}
+	} else {
+		switch (nd->state) {
+		case ncsi_dev_state_suspend_done:
+		case ncsi_dev_state_config_done:
+		case ncsi_dev_state_functional:
+			/* Ok */
+			break;
+		default:
+			/* Current reset operation happening */
+			spin_unlock_irqrestore(&ndp->lock, flags);
+			return 0;
+		}
 	}
 
 	if (!list_empty(&ndp->channel_queue)) {
-- 
2.19.1





More information about the openbmc mailing list