[PATCH] arch/ppc/8xx_io/enet.c, version 2
Joakim Tjernlund
Joakim.Tjernlund at lumentis.se
Fri Oct 25 00:23:31 EST 2002
Hi
This is the second version of my patch that removes the expensive memcpy of
received
ethernet frames in interrupt context.
I have 1 report(from Ricardo Scop) of 20% increase in packets/second, packet
size 1500 when
applied to 8260 FEC(needs to be applied manually). But min packet size
decreased with 10 %.
This version should fix the 10% decrease case.
This patch could be adapted 8xx_io/fec.c and 8260_io/enet.c and
8260/fcc_enet.c with little effort.
Better fix a bug in set_multicast_list(), move the dmi list forward when
walking it(dmi = dmi->next;)
New stuff:
- Configrable: copy small packets or pass them directly, see
COPY_SMALL_FRAMES in code.
- Collision reporting fix form Thomas Lange.
- Don't pass receive frames which has error upwards.
- Report RX_OV errors as fifo errors, not crc errors.
Please test and report any problems and performace improvements.
Jocke
--- arch/ppc/8xx_io/enet.c.org Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c Thu Oct 24 15:48:25 2002
@@ -34,7 +34,6 @@
#include <linux/ioport.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
-#include <linux/pci.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
@@ -86,6 +85,14 @@
* All functions are directly controlled using I/O pins. See
<asm/commproc.h>.
*/
+/* Define COPY_SMALL_FRAMES if you want to save buffer memory for small
packets
+ * at a small performance hit. Note performance testing needed */
+#define COPY_SMALL_FRAMES 1
+
+#ifdef COPY_SMALL_FRAMES
+ #define RX_COPYBREAK (256-16) /* dev_alloc_skb() adds 16 bytes for
internal use */
+#endif
+
/* The transmitter timeout
*/
#define TX_TIMEOUT (2*HZ)
@@ -97,19 +104,17 @@
* the skbuffer directly.
*/
#ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES 32
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE 64 /* Must be power of two */
-#define TX_RING_MOD_MASK 63 /* for this to work */
+ #define RX_RING_SIZE 64
+ #define TX_RING_SIZE 64 /* Must be power of two for this to work */
#else
-#define CPM_ENET_RX_PAGES 4
-#define CPM_ENET_RX_FRSIZE 2048
-#define CPM_ENET_RX_FRPPG (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
-#define TX_RING_SIZE 8 /* Must be power of two */
-#define TX_RING_MOD_MASK 7 /* for this to work */
+ #define RX_RING_SIZE 8
+ #define TX_RING_SIZE 8 /* Must be power of two for this to work */
+#endif
+#define TX_RING_MOD_MASK (TX_RING_SIZE-1)
+
+#define CPM_ENET_RX_FRSIZE 1600 /* must be a multiple of cache line */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+ #error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
#endif
/* The CPM stores dest/src/type, data, and checksum for receive packets.
@@ -143,7 +148,7 @@
/* Virtual addresses for the receive buffers because we can't
* do a __va() on them anymore.
*/
- unsigned char *rx_vaddr[RX_RING_SIZE];
+ void *rx_vaddr[RX_RING_SIZE];
struct net_device_stats stats;
uint tx_full;
spinlock_t lock;
@@ -370,11 +375,11 @@
cep->stats.tx_packets++;
- /* Deferred means some collisions occurred during transmit,
- * but we eventually sent the packet OK.
- */
- if (bdp->cbd_sc & BD_ENET_TX_DEF)
- cep->stats.collisions++;
+ /* Check retry counter, i.e. collision counter */
+ if (bdp->cbd_sc & BD_ENET_TX_RCMASK){
+ /* Note that counter cannot go higher than 15 */
+ cep->stats.collisions+=(bdp->cbd_sc & BD_ENET_TX_RCMASK)>>2;
+ }
/* Free the sk buffer associated with this last transmit.
*/
@@ -449,6 +454,7 @@
struct scc_enet_private *cep;
volatile cbd_t *bdp;
struct sk_buff *skb;
+ struct sk_buff *skb_tmp;
ushort pkt_len;
cep = (struct scc_enet_private *)dev->priv;
@@ -461,7 +467,8 @@
for (;;) {
if (bdp->cbd_sc & BD_ENET_RX_EMPTY)
break;
-
+
+#define RX_BD_ERRORS (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_NO |
BD_ENET_RX_CR | BD_ENET_RX_OV | BD_ENET_RX_CL)
#ifndef final_version
/* Since we have allocated space to hold a complete frame, both
* the first and last indicators should be set.
@@ -470,51 +477,62 @@
(BD_ENET_RX_FIRST | BD_ENET_RX_LAST))
printk("CPM ENET: rcv is not first+last\n");
#endif
-
- /* Frame too long or too short.
- */
- if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
- cep->stats.rx_length_errors++;
- if (bdp->cbd_sc & BD_ENET_RX_NO) /* Frame alignment */
- cep->stats.rx_frame_errors++;
- if (bdp->cbd_sc & BD_ENET_RX_CR) /* CRC Error */
- cep->stats.rx_crc_errors++;
- if (bdp->cbd_sc & BD_ENET_RX_OV) /* FIFO overrun */
- cep->stats.rx_crc_errors++;
-
- /* Report late collisions as a frame error.
- * On this error, the BD is closed, but we don't know what we
- * have in the buffer. So, just drop this frame on the floor.
- */
- if (bdp->cbd_sc & BD_ENET_RX_CL) {
- cep->stats.rx_frame_errors++;
- }
- else {
-
+ if(bdp->cbd_sc & RX_BD_ERRORS){ /* Receive errors ? */
+ cep->stats.rx_errors++;
+ if (bdp->cbd_sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) /* Frame too long or
too short. */
+ cep->stats.rx_length_errors++;
+ if (bdp->cbd_sc & BD_ENET_RX_NO) /* Frame alignment */
+ cep->stats.rx_frame_errors++;
+ if (bdp->cbd_sc & BD_ENET_RX_CR) /* CRC Error */
+ cep->stats.rx_crc_errors++;
+ if (bdp->cbd_sc & BD_ENET_RX_OV) /* FIFO overrun */
+ cep->stats.rx_fifo_errors++;
+ if (bdp->cbd_sc & BD_ENET_RX_CL) /* Late collision */
+ cep->stats.collisions++;
+ } else {
/* Process the incoming frame.
*/
cep->stats.rx_packets++;
pkt_len = bdp->cbd_datlen;
cep->stats.rx_bytes += pkt_len;
-
- /* This does 16 byte alignment, much more than we need.
- * The packet length includes FCS, but we don't want to
- * include that when passing upstream as it messes up
- * bridging applications.
- */
- skb = dev_alloc_skb(pkt_len-4);
-
- if (skb == NULL) {
+ pkt_len -= 4; /* The packet length includes FCS, but we don't want to
+ * include that when passing upstream as it messes up
+ * bridging applications. Is this still true ???? */
+#ifdef COPY_SMALL_FRAMES
+ /* Allocate the next buffer now so we are sure to have one when needed
+ * This does 16 byte alignment, exactly what we need(L1_CACHE aligned). */
+ if(pkt_len < RX_COPYBREAK)
+ skb_tmp = __dev_alloc_skb(pkt_len, GFP_ATOMIC|GFP_DMA);
+ else
+#endif
+ skb_tmp = __dev_alloc_skb(CPM_ENET_RX_FRSIZE, GFP_ATOMIC|GFP_DMA);
+
+ if (skb_tmp == NULL) {
printk("%s: Memory squeeze, dropping packet.\n", dev->name);
cep->stats.rx_dropped++;
- }
- else {
+
+ } else {
+ skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
+ invalidate_dcache_range((unsigned long) skb->data,
+ (unsigned long) skb->data + pkt_len);
+
+#ifdef COPY_SMALL_FRAMES
+ if(pkt_len < RX_COPYBREAK) {
+ typeof(skb) skb_swap = skb;
+ memcpy(skb_put(skb_tmp, pkt_len), skb->data, pkt_len);
+ /* swap the skb and skb_tmp */
+ skb = skb_tmp;
+ skb_tmp = skb_swap;
+ }
+ else
+#endif
+ {
+ skb_put(skb, pkt_len); /* Make room */
+ bdp->cbd_bufaddr = __pa(skb_tmp->data);
+ cep->rx_vaddr[bdp - cep->rx_bd_base] = skb_tmp;
+ }
skb->dev = dev;
- skb_put(skb,pkt_len-4); /* Make room */
- eth_copy_and_sum(skb,
- cep->rx_vaddr[bdp - cep->rx_bd_base],
- pkt_len-4, 0);
- skb->protocol=eth_type_trans(skb,dev);
+ skb->protocol=eth_type_trans(skb, dev);
netif_rx(skb);
}
}
@@ -608,7 +626,7 @@
dmi = dev->mc_list;
- for (i=0; i<dev->mc_count; i++) {
+ for (i=0; i<dev->mc_count; i++, dmi = dmi->next) {
/* Only support group multicast for now.
*/
@@ -647,8 +665,7 @@
struct net_device *dev;
struct scc_enet_private *cep;
int i, j, k;
- unsigned char *eap, *ba;
- dma_addr_t mem_addr;
+ unsigned char *eap;
bd_t *bd;
volatile cbd_t *bdp;
volatile cpm8xx_t *cp;
@@ -839,22 +856,13 @@
bdp = cep->rx_bd_base;
k = 0;
- for (i=0; i<CPM_ENET_RX_PAGES; i++) {
-
- /* Allocate a page.
- */
- ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, &mem_addr);
-
- /* Initialize the BD for every fragment in the page.
- */
- for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
- bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
- bdp->cbd_bufaddr = mem_addr;
- cep->rx_vaddr[k++] = ba;
- mem_addr += CPM_ENET_RX_FRSIZE;
- ba += CPM_ENET_RX_FRSIZE;
- bdp++;
- }
+ /* Initialize the BDs. */
+ for (j=0; j < RX_RING_SIZE; j++) {
+ struct sk_buff * skb = __dev_alloc_skb(CPM_ENET_RX_FRSIZE,
GFP_ATOMIC|GFP_DMA);
+ bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
+ bdp->cbd_bufaddr = __pa(skb->data);
+ cep->rx_vaddr[k++] = skb;
+ bdp++;
}
/* Set the last buffer to wrap.
** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/
More information about the Linuxppc-embedded
mailing list