[PATCH] arch/ppc/8xx_io/enet.c, version 2

Tue Oct 29 02:17:07 EST 2002

Hi Jocke,

I just had a look at the code. So far it is running fine. on my board. :)
Following question:
In scc-enet_rx you use invalidate_dcache_range() for the skb data area.
Why don't you do the same in scc_enet_init() for the first set of skb's?
Isn't it required too at this place.

Thanks, Stephan

> -----Original Message-----
> From: owner-linuxppc-embedded at lists.linuxppc.org
> [mailto:owner-linuxppc-embedded at lists.linuxppc.org]On Behalf Of Joakim
> Tjernlund
> Sent: Donnerstag, 24. Oktober 2002 16:24
> To: linuxppc-embedded at lists.linuxppc.org
> Cc: scop at digitel.com.br; thomas at corelatus.com
> Subject: [PATCH] arch/ppc/8xx_io/enet.c, version 2
>
>
>
> Hi
>
> This is the second version of my patch that removes the expensive
> memcpy of
> received
> ethernet frames in interrupt context.
>
> I have 1 report(from Ricardo Scop) of 20% increase in
> packets/second, packet
> size 1500 when
> applied to 8260 FEC(needs to be applied manually). But min packet size
> decreased with 10 %.
> This version should fix the 10% decrease case.
>
> This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
> 8260/fcc_enet.c with little effort.
>
> Better fix a bug in set_multicast_list(), move the dmi list forward when
>     walking it(dmi = dmi->next;)
>
> New stuff:
>    - Configrable: copy small packets or pass them directly, see
> COPY_SMALL_FRAMES in code.
>    - Collision reporting fix form Thomas Lange.
>    - Don't pass receive frames which has error upwards.
>    - Report RX_OV errors as fifo errors, not crc errors.
>
> Please test and report any problems and performace improvements.
>
>         Jocke
>
> --- arch/ppc/8xx_io/enet.c.org	Mon Oct 21 14:35:59 2002
> +++ arch/ppc/8xx_io/enet.c	Thu Oct 24 15:48:25 2002
> @@ -34,7 +34,6 @@
>  #include <linux/ioport.h>
>  #include <linux/slab.h>
>  #include <linux/interrupt.h>
> -#include <linux/pci.h>
>  #include <linux/init.h>
>  #include <linux/delay.h>
>  #include <linux/netdevice.h>
> @@ -86,6 +85,14 @@
>   * All functions are directly controlled using I/O pins.  See
> <asm/commproc.h>.
>   */
>
> +/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
> packets
> + * at a small performance hit. Note performance testing needed */
> +#define COPY_SMALL_FRAMES 1
> +
> +#ifdef COPY_SMALL_FRAMES
> +  #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
> internal use */
> +#endif
> +
>  /* The transmitter timeout
>   */
>  #define TX_TIMEOUT	(2*HZ)
> @@ -97,19 +104,17 @@
>   * the skbuffer directly.
>   */
>  #ifdef CONFIG_ENET_BIG_BUFFERS
> -#define CPM_ENET_RX_PAGES	32
> -#define CPM_ENET_RX_FRSIZE	2048
> -#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
> -#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
> -#define TX_RING_SIZE		64	/* Must be power of two */
> -#define TX_RING_MOD_MASK	63	/*   for this to work */
> +  #define RX_RING_SIZE		64
> +  #define TX_RING_SIZE		64	/* Must be power of
> two for this to work */
>  #else
> -#define CPM_ENET_RX_PAGES	4
> -#define CPM_ENET_RX_FRSIZE	2048
> -#define CPM_ENET_RX_FRPPG	(PAGE_SIZE / CPM_ENET_RX_FRSIZE)
> -#define RX_RING_SIZE		(CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
> -#define TX_RING_SIZE		8	/* Must be power of two */
> -#define TX_RING_MOD_MASK	7	/*   for this to work */
> +  #define RX_RING_SIZE		8
> +  #define TX_RING_SIZE		8	/* Must be power of
> two for this to work */
> +#endif
> +#define TX_RING_MOD_MASK	(TX_RING_SIZE-1)
> +
> +#define CPM_ENET_RX_FRSIZE	1600 /* must be a multiple of cache line */
> +#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
> +    #error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
>  #endif
>
>  /* The CPM stores dest/src/type, data, and checksum for receive packets.
> @@ -143,7 +148,7 @@
>  	/* Virtual addresses for the receive buffers because we can't
>  	 * do a __va() on them anymore.
>  	 */
> -	unsigned char *rx_vaddr[RX_RING_SIZE];
> +	void    *rx_vaddr[RX_RING_SIZE];
>  	struct	net_device_stats stats;
>  	uint	tx_full;
>  	spinlock_t lock;
> @@ -370,11 +375,11 @@
>
>  		cep->stats.tx_packets++;
>
> -		/* Deferred means some collisions occurred during transmit,
> -		 * but we eventually sent the packet OK.
> -		 */
> -		if (bdp->cbd_sc & BD_ENET_TX_DEF)
> -			cep->stats.collisions++;
> +		/* Check retry counter, i.e. collision counter */
> +		if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
> +			/* Note that counter cannot go higher than 15 */
> +			cep->stats.collisions+=(bdp->cbd_sc &
> BD_ENET_TX_RCMASK)>>2;
> +		}
>
>  		/* Free the sk buffer associated with this last transmit.
>  		*/
> @@ -449,6 +454,7 @@
>  	struct	scc_enet_private *cep;
>  	volatile cbd_t	*bdp;
>  	struct	sk_buff *skb;
> +	struct	sk_buff *skb_tmp;
>  	ushort	pkt_len;
>
>  	cep = (struct scc_enet_private *)dev->priv;
> @@ -461,7 +467,8 @@
>  for (;;) {
>  	if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
>  		break;
> -
> +
> +#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
> BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
>  #ifndef final_version
>  	/* Since we have allocated space to hold a complete frame, both
>  	 * the first and last indicators should be set.
> @@ -470,51 +477,62 @@
>  		(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
>  			printk("CPM ENET: rcv is not first+last\n");
>  #endif
> -
> -	/* Frame too long or too short.
> -	*/
> -	if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
> -		cep->stats.rx_length_errors++;
> -	if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame alignment */
> -		cep->stats.rx_frame_errors++;
> -	if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
> -		cep->stats.rx_crc_errors++;
> -	if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
> -		cep->stats.rx_crc_errors++;
> -
> -	/* Report late collisions as a frame error.
> -	 * On this error, the BD is closed, but we don't know what we
> -	 * have in the buffer.  So, just drop this frame on the floor.
> -	 */
> -	if (bdp->cbd_sc & BD_ENET_RX_CL) {
> -		cep->stats.rx_frame_errors++;
> -	}
> -	else {
> -
> +	if(bdp->cbd_sc & RX_BD_ERRORS){ /* Receive errors ? */
> +		cep->stats.rx_errors++;
> +		if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
> /* Frame too long or
> too short. */
> +			cep->stats.rx_length_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_NO)	/* Frame
> alignment */
> +			cep->stats.rx_frame_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_CR)	/* CRC Error */
> +			cep->stats.rx_crc_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_OV)	/* FIFO overrun */
> +			cep->stats.rx_fifo_errors++;
> +		if (bdp->cbd_sc & BD_ENET_RX_CL)        /* Late collision */
> +			cep->stats.collisions++;
> +	} else {
>  		/* Process the incoming frame.
>  		*/
>  		cep->stats.rx_packets++;
>  		pkt_len = bdp->cbd_datlen;
>  		cep->stats.rx_bytes += pkt_len;
> -
> -		/* This does 16 byte alignment, much more than we need.
> -		 * The packet length includes FCS, but we don't want to
> -		 * include that when passing upstream as it messes up
> -		 * bridging applications.
> -		 */
> -		skb = dev_alloc_skb(pkt_len-4);
> -
> -		if (skb == NULL) {
> +		pkt_len -= 4; /* The packet length includes FCS,
> but we don't want to
> +			       * include that when passing upstream
> as it messes up
> +			       * bridging applications. Is this
> still true ???? */
> +#ifdef COPY_SMALL_FRAMES
> +		/* Allocate the next buffer now so we are sure to
> have one when needed
> +		 * This does 16 byte alignment, exactly what we
> need(L1_CACHE aligned). */
> +		if(pkt_len < RX_COPYBREAK)
> +			skb_tmp = __dev_alloc_skb(pkt_len,
> GFP_ATOMIC|GFP_DMA);
> +		else
> +#endif
> +			skb_tmp =
> __dev_alloc_skb(CPM_ENET_RX_FRSIZE, GFP_ATOMIC|GFP_DMA);
> +
> +		if (skb_tmp == NULL) {
>  			printk("%s: Memory squeeze, dropping
> packet.\n", dev->name);
>  			cep->stats.rx_dropped++;
> -		}
> -		else {
> +
> +		} else {
> +			skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
> +			invalidate_dcache_range((unsigned long) skb->data,
> +						(unsigned long)
> skb->data + pkt_len);
> +
> +#ifdef COPY_SMALL_FRAMES
> +			if(pkt_len < RX_COPYBREAK) {
> +				typeof(skb) skb_swap = skb;
> +				memcpy(skb_put(skb_tmp, pkt_len),
> skb->data, pkt_len);
> +				/* swap the skb and skb_tmp */
> +				skb = skb_tmp;
> +				skb_tmp = skb_swap;
> +			}
> +			else
> +#endif
> +			{
> +				skb_put(skb, pkt_len);  /* Make room */
> +				bdp->cbd_bufaddr = __pa(skb_tmp->data);
> +				cep->rx_vaddr[bdp -
> cep->rx_bd_base] = skb_tmp;
> +			}
>  			skb->dev = dev;
> -			skb_put(skb,pkt_len-4);	/* Make room */
> -			eth_copy_and_sum(skb,
> -				cep->rx_vaddr[bdp - cep->rx_bd_base],
> -				pkt_len-4, 0);
> -			skb->protocol=eth_type_trans(skb,dev);
> +			skb->protocol=eth_type_trans(skb, dev);
>  			netif_rx(skb);
>  		}
>  	}
> @@ -608,7 +626,7 @@
>
>  			dmi = dev->mc_list;
>
> -			for (i=0; i<dev->mc_count; i++) {
> +			for (i=0; i<dev->mc_count; i++, dmi = dmi->next) {
>
>  				/* Only support group multicast for now.
>  				*/
> @@ -647,8 +665,7 @@
>  	struct net_device *dev;
>  	struct scc_enet_private *cep;
>  	int i, j, k;
> -	unsigned char	*eap, *ba;
> -	dma_addr_t	mem_addr;
> +	unsigned char	*eap;
>  	bd_t		*bd;
>  	volatile	cbd_t		*bdp;
>  	volatile	cpm8xx_t	*cp;
> @@ -839,22 +856,13 @@
>
>  	bdp = cep->rx_bd_base;
>  	k = 0;
> -	for (i=0; i<CPM_ENET_RX_PAGES; i++) {
> -
> -		/* Allocate a page.
> -		*/
> -		ba = (unsigned char *)consistent_alloc(GFP_KERNEL,
> PAGE_SIZE, &mem_addr);
> -
> -		/* Initialize the BD for every fragment in the page.
> -		*/
> -		for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
> -			bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
> -			bdp->cbd_bufaddr = mem_addr;
> -			cep->rx_vaddr[k++] = ba;
> -			mem_addr += CPM_ENET_RX_FRSIZE;
> -			ba += CPM_ENET_RX_FRSIZE;
> -			bdp++;
> -		}
> +	/* Initialize the BDs. */
> +	for (j=0; j < RX_RING_SIZE; j++) {
> +		struct	sk_buff * skb = __dev_alloc_skb(CPM_ENET_RX_FRSIZE,
> GFP_ATOMIC|GFP_DMA);
> +		bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
> +		bdp->cbd_bufaddr = __pa(skb->data);
> +		cep->rx_vaddr[k++] = skb;
> +		bdp++;
>  	}
>
>  	/* Set the last buffer to wrap.
>
>
>
>

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/