Subject: [patch 15/16] AXON PCIe Network (apnet) driver High level description: Provides ethernet like interface per Axon. The Virtual Network interface will be called apnet?. Primary functions like memory allocation, interrupt (mailbox and msi), DMA transfers are handled by the base Axon driver that is not discussed in this patch. Pre-allocates receive SKBs and populates the data pointer in the descriptors for remote side to DMA it. NAPI poll function uses netif_receive_skb to indicate availability of the packet to the network stack and allocates new receive SKB for the slot. Always pushes the data to the remote side, meaning transmit function of the local system locates the free descriptor on the remote system and uses DMA to send the packet. Trasmit function sends a message (interrupt) to the remote system that acts as a receive interrupt. APNET driver requests base driver a shared memory area to store buffer descriptor configuration and receive descriptors. Approximately it needs about 2KB of shared memory for the default configuration of 128 descriptors of 16 bytes each. MAC addresses are generated randomly using random_ether_addr() and supports set_mac_address function to change the default MAC. This might be useful to enable bonding driver over apnet and ease dhcpd set-up as well. View of shared memory area as follows. ----------------- | Configuration | ----------------- | Descriptor 1 | ----------------- | Descriptor 2 | ----------------- | ........... | ----------------- | Descriptor n-1 | ----------------- | Descriptor n | ----------------- ------------------------------------------------------------------ | | | | |Ctrl/Sts 32b | Pkt. len 32b | Physical Address of pkt data 64b | | | | | ------------------------------------------------------------------ Sample Descriptor Signed-off-by: Murali Iyer Index: linux-2.6.23.1/drivers/net/apnet.c =================================================================== --- /dev/null +++ linux-2.6.23.1/drivers/net/apnet.c @@ -0,0 +1,1045 @@ +/* + * Virtual ethernet device driver for IBM AXON PCIe interface + * + * Copyright (C) IBM Corporation, 2007, 2008 + * + * Authors: Murali Iyer + * + * Acronym - APNET (Axon PCIe Network) + * + * APNET device driver. + * This driver registers as a net device and provides ethernet like interface + * per Axon found on the system. Driver supports little and big endian systems. + * The Virtual Network interface will be called apnet?. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "apnet.h" + +#define DRV_VERSION "1.00" +#define DRV_NAME "apnet" +#define DRV_DESC "IBM AXON PCIe Network Driver" + +MODULE_DESCRIPTION(DRV_DESC); +MODULE_VERSION(DRV_VERSION); +MODULE_AUTHOR("Murali Iyer "); +MODULE_LICENSE("GPL"); + +/* number of Axon devices found in the system for network interfaces */ +static int num_apnet_if; + +/* Store apnet's private data per interface */ +static struct apnet *apnet_interface[NUM_APNET]; + +/** + * apnet_ethtool_get_drvinfo - called by ethtool + * @ndev: network interface device structure + * @drvinfo: driver information + * + * Fills in device driver information for "ethtool -i apnet?" option + */ +static void apnet_ethtool_get_drvinfo(struct net_device *ndev, + struct ethtool_drvinfo *drvinfo) +{ + /* clear drvinfo structure and fill in the details */ + memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); + strncpy(drvinfo->driver, DRV_NAME, 32); + strncpy(drvinfo->version, DRV_VERSION, 32); + strncpy(drvinfo->fw_version, "not applicable", 32); + strncpy(drvinfo->bus_info, "PCIe interface", ETHTOOL_BUSINFO_LEN); +} + +/** + * apnet_ethtool_get_ringparam - called by ethtool + * @ndev: network interface device structure + * @rp: rx/tx ring paramater + * + * Fills in rx/tx descriptors information for "ethtool -g apnet?" option + */ +static void apnet_ethtool_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *rp) +{ + struct apnet *adev = netdev_priv(ndev); + + rp->rx_max_pending = adev->rx_cfg.slots; + rp->rx_pending = adev->rx_slot; + rp->tx_max_pending = adev->tx_cfg.slots; + rp->tx_pending = adev->tx_slot; +} + +/* match with the struct apnet_stats */ +static const char apnet_ethtool_stats[sizeof(struct apnet_stats) / sizeof(u64)]\ + [ETH_GSTRING_LEN] = { + "TX DMA Error", + "RX Poll", + "RX SKBs alloc", +}; + +/** + * apnet_ethtool_get_stats_count - called by ethtool + * @ndev: network interface device structure + * + * Returns number of statistics elements in apnet for + * "ethtool -S apnet?" option. + */ +static int apnet_ethtool_get_stats_count(struct net_device *ndev) +{ + return sizeof(struct apnet_stats) / sizeof(u64); +} + +/** + * apnet_ethtool_get_strings - called by ethtool + * @ndev: network interface device structure + * @stringsset: requested stringset type + * @buf: buffer to store strings + * + * Supported string set is ETH_SS_STATS. Fills in the statistics string + * in the provided buffer. Called by "ethtool -S apnet?" option. + */ +static void apnet_ethtool_get_strings(struct net_device *ndev, + u32 stringset, u8 *buf) +{ + if (stringset == ETH_SS_STATS) + memcpy(buf, &apnet_ethtool_stats, sizeof(apnet_ethtool_stats)); +} + +/** + * apnet_ethtool_get_stats - called by ethtool + * @ndev: network interface device structure + * @estats: ethtool statistics stucture + * @stats: buffer to fille in status values + * + * Fills in the statistics values collected by the driver to the + * provided stats buffer called by "ethtool -S apnet?" option. + */ +static void apnet_ethtool_get_stats(struct net_device *ndev, + struct ethtool_stats *estats, u64 *stats) +{ + struct apnet *adev = netdev_priv(ndev); + + memcpy(stats, &adev->astats, sizeof(adev->astats)); +} + +static const struct ethtool_ops apnet_ethtool_ops = { + .get_drvinfo = apnet_ethtool_get_drvinfo, + .get_ringparam = apnet_ethtool_get_ringparam, + .get_stats_count = apnet_ethtool_get_stats_count, + .get_strings = apnet_ethtool_get_strings, + .get_ethtool_stats = apnet_ethtool_get_stats, +}; + +/** + * apnet_get_stats - get interface statistics + * @ndev: network interface device structure + * + * Returns network statistics collected by apnet interface. + */ +static struct net_device_stats *apnet_get_stats(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + struct net_device_stats *nstats = &adev->nstats; + return nstats; +} + +/** + * apnet_wait_rx_complete - called during device close + * @ndev - net device structure + * + * This function called during device close, it waits for any pending + * rx transfers, in other words complete in flight tx packets of remote. + */ +static void apnet_wait_rx_complete(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + struct apnet_bd *bd; + __be16 slot; + int timeout; + + /* wait for any pending receive packets to complete */ + for (slot = 0; slot < adev->rx_cfg.slots; slot++) { + bd = adev->rx_bd + slot; + timeout = 10; + while (timeout--) { + if (read32be(&bd->ctrl) & APNET_TX_IN_FLIGHT) { + pr_debug("%s: waiting to complete receive for " + "rx slot %d.\n", ndev->name, slot); + schedule_timeout_interruptible(\ + msecs_to_jiffies(10)); + } else + break; + } + if (!timeout) + pr_err("%s: rx slot %d didn't get picked.\n", + ndev->name, slot); + } + return; +} + +/** + * apnet_get_rx_bd - get next valid receive buffer descriptor + * @adev: apnet device structure + * + * returns bd if valid rx bd present else NULL. + * Scans Receive Buffer Descriptors from the current slot and returns + * free available rx descriptor and also updates slot number. + */ +static struct apnet_bd *apnet_get_rx_bd(struct apnet *adev) +{ + struct apnet_bd *bd; + __be16 rx_slots = adev->rx_cfg.slots; + + while (rx_slots) { + if (adev->rx_slot >= adev->rx_cfg.slots) + adev->rx_slot = 0; + bd = adev->rx_bd + adev->rx_slot; + /* return first available slot */ + if (read32be(&bd->ctrl) & APNET_RX_CTRL_FULL) + break; + adev->rx_slot++; + rx_slots--; + } + if (rx_slots) + return bd; + else + return NULL; +} + +/* apnet_rx_msg - call back function when interruppted by remote system + * @msg_type - reason for call back + * @p_data - private data, usually pointer to the net_device structure + * + * returns 0 + * This is the call back function that the base driver calls when remote + * system sends message / interrupt for apnet interface +*/ +static int apnet_rx_msg(int msg_type, void *p_data) +{ + struct net_device *ndev = p_data; + struct apnet *adev = netdev_priv(ndev); + + /* protect if called even after notify disabled */ + spin_lock(&adev->rx_lock); + if (adev->astats.in_rx_poll) + goto ret; + + /* don't schedule if there are no packets to receive */ + if (!apnet_get_rx_bd(adev)) + goto ret; + + axon_disable_notify_callback(APNET_MSG_TYPE, adev->if_num); + adev->astats.in_rx_poll = 1; + wmb(); /* make sure update completes */ + netif_rx_schedule(adev->ndev); +ret: + spin_unlock(&adev->rx_lock); + return 0; + +} + +/** + * apnet_free_rx_ring - Frees any pre allocated skbs in rx buffer + * @ndev: net device structure + * + * returns none + */ +static void apnet_free_rx_ring(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + __be16 slot; + + /* free pre-allocated rx skbs */ + for (slot = 0; slot < adev->rx_cfg.slots; slot++) + if (adev->rx_skb[slot]) { + dev_kfree_skb_any(adev->rx_skb[slot]); + adev->rx_skb[slot] = NULL; + } + return; +} + +/** + * apnet_alloc_rx_skb - pre allocate a skb for receive + * @ndev - net device structure + * @slot - receive slot number for the skb + * + * return 0 on success, <0 on error + */ +static int apnet_alloc_rx_skb(struct net_device *ndev, __be16 slot) +{ + struct apnet *adev = netdev_priv(ndev); + struct sk_buff *skb; + struct apnet_bd *bd = adev->rx_bd; + dma_addr_t handle; + + adev->rx_skb[slot] = NULL; + skb = dev_alloc_skb(adev->rx_cfg.mtu); + if (!skb) { + pr_err("%s: out of skbs.\n", ndev->name); + return -ENOMEM; + } + bd += slot; + + if (axon_dma_map_single(adev->if_num, skb->data, adev->rx_cfg.mtu, + DMA_FROM_DEVICE, &handle)) { + pr_err("%s: out of dma mappings.\n", ndev->name); + dev_kfree_skb_any(skb); + adev->rx_skb[slot] = NULL; + return -ENOMEM; + } + write64be(handle, &bd->data_ptr); + adev->rx_skb[slot] = skb; + adev->astats.inflight_rx_skbs++; + return 0; +} + +/** + * apnet_poll - NAPI receive poll function + * @ndev - net device structure + * @budget - number of packets to process + * + * returns 0 if there are no more packets to process, otherwise 1 + * This is the main receive function that retrives and passes receive + * packets to the network stack. Updates various status, enables remote + * notification call back if there are no more packets in the buffer. + */ +static int apnet_poll(struct net_device *ndev, int *budget) +{ + int npkts = 0, quota = min(ndev->quota, *budget); + __be32 plen, data; + struct sk_buff *skb; + struct apnet *adev = netdev_priv(ndev); + struct apnet_bd *bd = apnet_get_rx_bd(adev); + + while (npkts < quota) { + if (!bd) + break; /* no valid packets to receive */ + plen = read32be(&bd->data_len); + if (unlikely(plen > adev->rx_cfg.mtu)) { + pr_err("%s: RX MTU size error. %d > %d\n", \ + ndev->name, plen, adev->rx_cfg.mtu); + adev->nstats.rx_errors++; + goto release_slot; + } + /* Will ever be empty? */ + if (!(adev->rx_skb[adev->rx_slot])) { + pr_err("%s: Trying to receive empty skb at slot %d\n",\ + ndev->name, adev->rx_slot); + adev->nstats.rx_dropped++; + goto release_slot; + } + skb = adev->rx_skb[adev->rx_slot]; + mb(); /* make sure rx packet in memory */ + skb_put(skb, plen); + skb->dev = ndev; + skb->protocol = eth_type_trans(skb, ndev); + skb->ip_summed = CHECKSUM_UNNECESSARY; + netif_receive_skb(skb); + adev->astats.inflight_rx_skbs--; + axon_dma_unmap_single(adev->if_num, bd->data_ptr, + adev->rx_cfg.mtu, DMA_FROM_DEVICE); + if (apnet_alloc_rx_skb(ndev, adev->rx_slot)) + adev->nstats.rx_dropped++; + adev->nstats.rx_packets++; + adev->nstats.rx_bytes += plen; +release_slot: + data = read32be(&bd->ctrl); + data &= ~APNET_RX_CTRL_FULL; + write32be(data, &bd->ctrl); + npkts++; /* includes good and dropped pkts */ + /* update to the next rx slot */ + adev->rx_slot++; + bd = apnet_get_rx_bd(adev); + } + *budget -= npkts; + ndev->quota -= npkts; + if (!bd) { + axon_enable_notify_callback(APNET_MSG_TYPE, adev->if_num); + netif_rx_complete(ndev); + adev->astats.in_rx_poll = 0; + mb(); /* make sure update completes */ + return 0; /* no more packets */ + } + return 1; /* more pkts are in rx buffer */ +} + +/** + * apnet_tx_ring_status - check remote system ready + * @adev: apnet device structure + * + * returns 0 if ready otherwise returns error + */ +static int apnet_tx_ring_status(struct apnet *adev) +{ + struct apnet_cfg *tx_cfg = adev->tx_base; + int rc = 0; + + if (read32be(&tx_cfg->sig) != APNET_SIG) + rc |= APNET_SIG_MISMATCH; + if (!(read16be(&tx_cfg->sts) & APNET_RX_ENABLE)) + rc |= APNET_RX_ENABLE; + return rc; +} + +/** + * apnet_get_tx_bd - get next valid transmit buffer descriptor + * @adev: apnet device structure + * + * returns bd if free tx bd present else NULL. + * Scans Transmit Buffer Descriptors from the current slot and returns + * free available tx descriptor and also updates slot number. + */ +static struct apnet_bd *apnet_get_tx_bd(struct apnet *adev) +{ + struct apnet_bd *bd; + __be16 tx_slots = adev->tx_cfg.slots; + + while (tx_slots) { + if (adev->tx_slot >= adev->tx_cfg.slots) + adev->tx_slot = 0; + bd = adev->tx_bd + adev->tx_slot; + /* return first free slot */ + if (!(read32be(&bd->ctrl) & APNET_RX_CTRL_FULL)) + break; + adev->tx_slot++; + tx_slots--; + } + if (tx_slots) + return bd; + else + return NULL; +} + +/** + * apnet_tx_timout - watchdog time called when trasmit function fails + * @ndev - net device structure + * + * wakes up tx queue if free slot found. + */ +static void apnet_tx_timeout(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + + if (apnet_tx_ring_status(adev)) + return; + + if (adev->astats.tx_dma_err) /* return if we got DMA error */ + return; + + if (apnet_get_tx_bd(adev)) + netif_wake_queue(ndev); /* found free slot to xmit */ + else + axon_send_remote_notification(APNET_MSG_TYPE, adev->if_num); + return; +} + +/** + * apnet_tx_dma_complete - this function called when DMA completes + * @if_num - interface number + * @tx_status - apnet tx status structure. + * + * returns 0 on success + * This is a callback function that DMA engines calls back when it + * completes requested transfer. Status updated and remote notification + * sent to indicate packet availability + */ +static int apnet_tx_dma_complete(int if_num, void *tx_status) +{ + struct apnet_tx_status *tx_sts = tx_status; + + netif_tx_lock_bh(tx_sts->ndev); + axon_dma_unmap_single(if_num, + tx_sts->handle, + tx_sts->skb->len, + DMA_TO_DEVICE); + dev_kfree_skb(tx_sts->skb); + tx_sts->sts &= ~cpu_to_be32(APNET_RX_CTRL_FULL); + netif_tx_unlock_bh(tx_sts->ndev); + /* generate remote interrupt to indicate packet arrival */ + axon_send_remote_notification(APNET_MSG_TYPE, if_num); + return 0; +} + +/** + * apnet_wait_tx_dma_complete - called during device close + * @ndev - net device structure + * + * This function called during device close, it waits for any pending + * DMA transfer. + */ +static void apnet_wait_tx_dma_complete(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + __be16 slot; + int timeout; + + /* wait for any pending Tx DMAs to complete */ + for (slot = 0; slot < adev->tx_cfg.slots; slot++) { + timeout = 10; + while (timeout--) { + if (adev->tx_sts[slot].sts & \ + cpu_to_be32(APNET_RX_CTRL_FULL)) { + pr_debug("%s: waiting to complete DMA xfer to " + "tx slot %d.\n", ndev->name, slot); + schedule_timeout_interruptible(\ + msecs_to_jiffies(10)); + } else + break; + } + if (!timeout) + pr_err("%s: tx slot %d didn't complete dma.\n", + ndev->name, slot); + } + return; +} + +/** + * apnet_xmit_frame - Transmit function called by network stack + * @skb - Socket buffer holding packet to be transmitted + * @ndev - net device structure + * + * returns NETDEV_TX_OK on success, otherwise NETDEV_TX_BUSY + * This function called by network stack if it has any packet to transmit. + * Retrieves next free transmit slot, initiates DMA and updates status. + * Handles necessary error conditions as well. + */ +static int apnet_xmit_frame(struct sk_buff *skb, struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + struct apnet_bd *bd; + unsigned int tx_slot; + __be32 control; + __be64 roffset; + int rc; + + rc = apnet_tx_ring_status(adev); + if (rc) { + if (rc & APNET_SIG_MISMATCH) + pr_info("%s: TX stopped, remote signature mismatch!\n", + ndev->name); + + if (rc & APNET_RX_ENABLE) + pr_info("%s: TX stopped, remote ring not ready!\n", + ndev->name); + goto error; + } + /* + * get next available free buf. desc. of the remote system, + * move the data, update descriptor and free skb. + */ + bd = apnet_get_tx_bd(adev); + if (!bd) { + pr_info("%s: TX stopped, remote buf full!\n", ndev->name); + goto error; + } + tx_slot = adev->tx_slot; + control = read32be(&bd->ctrl); + adev->tx_sts[tx_slot].sts = control; + /* Indicate xmit in progress */ + control |= APNET_TX_IN_FLIGHT; + write32be(control, &bd->ctrl); + write32be(skb->len, &bd->data_len); + adev->tx_sts[tx_slot].sts |= cpu_to_be32(APNET_RX_CTRL_FULL); + adev->tx_sts[tx_slot].skb = skb; + adev->tx_sts[tx_slot].ndev = ndev; + + /* get remote DMA-able address */ + roffset = read64be(&bd->data_ptr); + BUG_ON(!roffset); + /* Transfer data and indicate this slot has valid data */ + if (axon_dma_map_single(adev->if_num, skb->data, skb->len, + DMA_TO_DEVICE, &adev->tx_dma[tx_slot].local_buf)) { + pr_err("%s: dropping tx packet, out of dma mappings.\n", + ndev->name); + adev->nstats.tx_dropped++; + rc = NETDEV_TX_OK; + goto txerror; + } + adev->tx_sts[tx_slot].handle = adev->tx_dma[tx_slot].local_buf; + adev->tx_dma[tx_slot].remote_offset = roffset; + adev->tx_dma[tx_slot].local_status = adev->tx_sts_dma_handle + + (sizeof(struct apnet_tx_status) * tx_slot) + + offsetof(struct apnet_tx_status, sts); + adev->tx_dma[tx_slot].remote_status_offset = tx_slot * \ + sizeof(struct apnet_bd) + sizeof(struct apnet_cfg); + adev->tx_dma[tx_slot].length = skb->len; + adev->tx_dma[tx_slot].cb_func = apnet_tx_dma_complete; + adev->tx_dma[tx_slot].cb_data = &adev->tx_sts[tx_slot]; + adev->tx_dma[tx_slot].flags = + AXON_APNET_DMA_CALLBACK | AXON_APNET_DMATYPE_PUT; + switch (axon_apnet_dma(adev->if_num, &adev->tx_dma[tx_slot])) { + case -EIO: /* give up, dma engine broken */ + adev->nstats.tx_dropped++; + netif_stop_queue(ndev); + adev->astats.tx_dma_err = 1; + mb(); /* make sure status updated */ + pr_err("%s: DMA Error, stopped TX queue\n", ndev->name); + return NETDEV_TX_OK; /*FIXME no better return? */ + + case -EAGAIN: + pr_debug("%s: Try DMA request later\n", ndev->name); + adev->nstats.tx_errors++; + return NETDEV_TX_BUSY; + + case -EIOCBQUEUED: /* this is normal */ + /* pr_debug("%s: DMA request queued\n", ndev->name); */ + rc = NETDEV_TX_OK; + break; + + default: + pr_debug("%s: Unknown return code for DMA request\n", \ + ndev->name); + return NETDEV_TX_BUSY; + } + +txerror: + adev->nstats.tx_packets++; /* includes failed packets */ + adev->nstats.tx_bytes += skb->len; + adev->tx_slot++; + ndev->trans_start = jiffies; + return rc; +error: + adev->nstats.tx_errors++; + netif_stop_queue(ndev); + return NETDEV_TX_BUSY; +} + +/** + * apnet_init_rx_ring - called by probe function. + * @ndev - net device structure + * + * returns 0 on success, <0 on failure + * Requests a portion of the shared memory allocated by the base driver, + * initializes configuration and RX buffer descriptor information and + * allocates SKBs for packet reception. + */ +static int apnet_init_rx_ring(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + int slot, rc = 0; + + /* Request shared memory for RX descriptors from base driver */ + adev->rxb_sz = sizeof(struct apnet_bd) * NUM_RX_SLOT; + adev->rx_base = axon_get_local_memory(adev->if_num, adev->rxb_sz); + if (!adev->rx_base) { + pr_err("%s: Error allocation shared memory rx buffer." + "size %lu bytes.\n", ndev->name, adev->rxb_sz); + rc = -ENOMEM; + goto error; + } + pr_info("%s: %lu bytes mapped at 0x%p for RX descriptors\n", \ + ndev->name, adev->rxb_sz, adev->rx_base); + memset(adev->rx_base, 0, sizeof(struct apnet_bd) * NUM_RX_SLOT); + /* First BD slot holds configuration information. */ + if (sizeof (struct apnet_bd) != sizeof(struct apnet_cfg)) { + pr_err("%s: bd and cfg are different size.\n", ndev->name); + rc = -EPROTO; /* report better error */ + goto error; + } + /* first entry in the BD list holds configuration information */ + adev->rx_cfg_r = adev->rx_base; + /* second entry onwards holds buffer descriptors */ + adev->rx_bd = adev->rx_base + sizeof(struct apnet_bd); + /* RX buffer starts just after BD entries */ + adev->rx_slot = 0; /* initial inflight free slot */ + adev->rx_cfg.slots = NUM_RX_SLOT-1; + adev->rx_cfg.mtu = NUM_MAX_MTU; + adev->rx_cfg.sig = APNET_SIG; + adev->rx_cfg.ver = APNET_VER; + /* Save in big endian format to support hybrid system */ + write16be(adev->rx_cfg.slots, &adev->rx_cfg_r->slots); + write32be(adev->rx_cfg.mtu, &adev->rx_cfg_r->mtu); + write32be(adev->rx_cfg.sig, &adev->rx_cfg_r->sig); + write32be(adev->rx_cfg.ver, &adev->rx_cfg_r->ver); + + /* pre allocate rx skbs */ + for (slot = 0; slot < adev->rx_cfg.slots; slot++) { + rc = apnet_alloc_rx_skb(ndev, slot); + if (rc) + return rc; + } + pr_debug("%s: rx desc. size = %lu\n", ndev->name, \ + sizeof(struct apnet_bd)); + pr_debug("%s: rx descriptors = %d\n", ndev->name, adev->rx_cfg.slots); + pr_debug("%s: rx mtu size = %d\n", ndev->name, adev->rx_cfg.mtu); + pr_debug("%s: rx config at = 0x%p\n", ndev->name, adev->rx_cfg_r); + pr_debug("%s: rx desc. at = 0x%p\n", ndev->name, adev->rx_bd); + pr_debug("%s: rx skb = 0x%p\n", ndev->name, adev->rx_skb); + +error: + return rc; +} + +/** + * apnet_rx_ring - called by device open and close + * @ndev - net device structure + * @enable - set to enable the ring otherise disable + * + * Enable or Disable RX ring by chaning status field, xmit function + * checks for this field prior to sending packets. + */ +static void apnet_rx_ring(struct net_device *ndev, unsigned int enable) +{ + struct apnet *adev = netdev_priv(ndev); + __be16 sts; + + /* Set RX Enable bit in ring config status */ + sts = read16be(&adev->rx_cfg_r->sts); + if (enable) + sts |= APNET_RX_ENABLE; + else + sts &= ~APNET_RX_ENABLE; + write16be(sts, &adev->rx_cfg_r->sts); + pr_debug("%s: Updated rx ring status as %s\n", + ndev->name, enable ? "ready" : "not ready"); + return; +} + +/** + * apnet_get_tx_config - called by device open + * @ndev - net device structure + * + * returns 0 on success, <0 on failure + * Queries remote system's TX configuration and verifies with local system + * configuration. Updated local variables if it sees matching configuration. + */ +static int apnet_get_tx_config(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + struct apnet_cfg *tx_cfg; + int rc = 0; + + /* get details of TX buffer */ + adev->tx_base = axon_get_remote_memory(adev->if_num); + if (!adev->tx_base) { + pr_debug("%s: Remote memory not available, check remote " + "driver.\n", ndev->name); + rc = -ENETUNREACH; + goto error; + } + tx_cfg = adev->tx_base; + pr_info("%s: TX descriptors mapped at 0x%p\n", ndev->name, tx_cfg); + adev->tx_cfg.slots = read16be(&tx_cfg->slots); + adev->tx_cfg.mtu = read32be(&tx_cfg->mtu); + adev->tx_cfg.sig = read32be(&tx_cfg->sig); + adev->tx_cfg.ver = read32be(&tx_cfg->ver); + if (adev->tx_cfg.sig != APNET_SIG || \ + adev->tx_cfg.ver != APNET_VER || \ + adev->tx_cfg.mtu != NUM_MAX_MTU || \ + adev->tx_cfg.slots != NUM_RX_SLOT-1) { + pr_err("%s: Remote Signature/Version/MTU/Slots " + "doesn't match.\n", ndev->name); + rc = -EPROTOTYPE; /* Protocol type mismatch */ + goto error; + } + adev->tx_bd = adev->tx_base + sizeof(struct apnet_bd); + /* TX buffer starts just after BD entries */ + adev->tx_slot = 0; /* initial free slot */ + pr_debug("%s: tx desc. size = %lu\n", ndev->name, + sizeof(struct apnet_bd)); + pr_debug("%s: tx descriptors = %d\n", ndev->name, adev->tx_cfg.slots); + pr_debug("%s: tx mtu size = %d\n", ndev->name, adev->tx_cfg.mtu); + pr_debug("%s: tx config at = 0x%p\n", ndev->name, tx_cfg); + pr_debug("%s: tx desc. at = 0x%p\n", ndev->name, adev->tx_bd); + return 0; + +error: + return rc; +} + +/** + * apnet_set_mac_addr - sets new MAC address for the interface + * @ndev - net device structure + * @sockaddr - socket address structure + * + * returns 0 on success, <0 on failure + * Allows to change MAC address of the interface. + * example : ifconfig apnet0 hw ether 8a:c2:51:47:69:de + */ +static int apnet_set_mac_addr(struct net_device *ndev, void *sockaddr) +{ + struct sockaddr *addr = sockaddr; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EINVAL; + + pr_info("%s: Current MAC address %02X:%02X:%02X:%02X:%02X:%02X\n", + ndev->name, + ndev->dev_addr[0], ndev->dev_addr[1], + ndev->dev_addr[2], ndev->dev_addr[3], + ndev->dev_addr[4], ndev->dev_addr[5]); + memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len); + pr_info("%s: New MAC address %02X:%02X:%02X:%02X:%02X:%02X\n", + ndev->name, + ndev->dev_addr[0], ndev->dev_addr[1], + ndev->dev_addr[2], ndev->dev_addr[3], + ndev->dev_addr[4], ndev->dev_addr[5]); + return 0; +} + +/** + * apnet_change_mtu - allows to change MTU of the interface + * @ndev - net device structure + * @new_mtu - mtu size to be set + * + * returns 0 on success, <0 on failure + * currently this function is not supported and returns -EOPNOTSUPP + */ +static int apnet_change_mtu(struct net_device *ndev, int new_mtu) +{ + return -EOPNOTSUPP; /* FIXME this is not supported */ +} + +/** + * apnet_open - called when interface opened or brought up + * @ndev - net device structure + * + * returns 0 on success, <0 on failure + * Configures TX and RX rings, registers with call back function for + * notification and enables TX/RX queue. Returns appropriate errors. + */ +static int apnet_open(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + int rc; + + pr_debug("%s: open called\n", ndev->name); + /* configure tx related stuff */ + rc = apnet_get_tx_config(ndev); + if (rc) { + netif_tx_disable(ndev); + goto error; + } + + /* Register call back with the base driver */ + if (axon_register_notify_callback(APNET_MSG_TYPE, adev->if_num, ndev, + apnet_rx_msg)) { + pr_err("%s: reg. call back failed.\n", ndev->name); + rc = -ENETUNREACH; + goto error; + } + adev->astats.in_rx_poll = 0; + apnet_rx_ring(ndev, ENABLE); + netif_carrier_on(ndev); /* for rx */ + netif_wake_queue(ndev); /* for tx */ +error: + return rc; +} + +/** + * apnet_close - called when interface closed or brought down + * @ndev - net device structure + * + * returns 0 on success, <0 on failure + */ +static int apnet_close(struct net_device *ndev) +{ + struct apnet *adev = netdev_priv(ndev); + + pr_debug("%s: close called\n", ndev->name); + + axon_unregister_notify_callback(APNET_MSG_TYPE, adev->if_num); + apnet_rx_ring(ndev, DISABLE); + apnet_wait_rx_complete(ndev); + netif_carrier_off(ndev); /* for rx */ + ndev->trans_start = jiffies; /* prevent tx timeout */ + netif_tx_disable(ndev); /* for tx */ + apnet_wait_tx_dma_complete(ndev); + + return 0; +} + +/** + * FIXME : need a virtual axon bus to register properly. + * apnet_probe- called by device init + * @if_num - interface number + * + * returns 0 on success, <0 on failure + * Creates a ethernet like device structure, initializes and registers with + * network stack of the kernel. + */ +static int __devinit apnet_probe(const int if_num) +{ + struct net_device *ndev; + struct apnet *adev; + int rc; + + /* Allocate net and private data structure */ + ndev = alloc_etherdev(sizeof(struct apnet)); + if (!ndev) { + pr_err("%s%d: Error creating ethernet device.\n", \ + DRV_NAME, if_num); + return -ENOMEM; + } + /* save ndev and fill in private data structure */ + adev = netdev_priv(ndev); + adev->ndev = ndev; + adev->if_num = if_num; + + spin_lock_init(&adev->rx_lock); + + /* Fill in netdev structure */ + ndev->open = &apnet_open; + ndev->stop = &apnet_close; + ndev->hard_start_xmit = &apnet_xmit_frame; + ndev->get_stats = &apnet_get_stats; + ndev->tx_timeout = &apnet_tx_timeout; + /* transmit time-out */ + ndev->watchdog_timeo = (5 * HZ); + + /* Allow ethernet packet overhead.ETH_HLEN is 14 */ + ndev->mtu = NUM_MAX_MTU - 16; + + /* Interrupt mitigation / NAPI support */ + ndev->poll = apnet_poll; + ndev->weight = min((NUM_RX_SLOT / 2), 128); + random_ether_addr(ndev->dev_addr); /* random MAC address */ + pr_info("%s%d: MAC address %02X:%02X:%02X:%02X:%02X:%02X\n", + DRV_NAME, if_num, + ndev->dev_addr[0], ndev->dev_addr[1], + ndev->dev_addr[2], ndev->dev_addr[3], + ndev->dev_addr[4], ndev->dev_addr[5]); + + ndev->change_mtu = &apnet_change_mtu; + ndev->set_mac_address = &apnet_set_mac_addr; + SET_ETHTOOL_OPS(ndev, &apnet_ethtool_ops); + + ndev->features |= NETIF_F_NO_CSUM; + + netif_carrier_off(ndev); + netif_stop_queue(ndev); + sprintf(ndev->name, "%s%%d", DRV_NAME); + rc = register_netdev(ndev); + if (rc) { + pr_err("%s: Cannot register net device.\n", ndev->name); + goto error; + } + /* configure rx related stuff */ + rc = apnet_init_rx_ring(ndev); + if (rc) + goto error1; + + rc = axon_dma_map_single(adev->if_num, adev->tx_sts, + sizeof(struct apnet_tx_status) * NUM_RX_SLOT, + DMA_BIDIRECTIONAL, &adev->tx_sts_dma_handle); + if (rc) { + pr_err("%s: out of dma mappings for tx status.\n", ndev->name); + goto error1; + } + apnet_rx_ring(ndev, DISABLE); + apnet_interface[if_num] = adev; + pr_info("Initialized %s interface\n", ndev->name); + return 0; + +error1: + unregister_netdev(adev->ndev); +error: + apnet_free_rx_ring(ndev); + free_netdev(ndev); + return rc; +} + +/** + * FIXME : need a virtual axon bus to remove properly. + * apnet_remove- called by device exit + * @if_num - interface number + * + * returns 0 on success, <0 on failure + * Undoes all the registration with the network stack of the kernel. + */ +static int __devexit apnet_remove(const int if_num) +{ + struct apnet *adev = apnet_interface[if_num]; + + axon_dma_unmap_single(adev->if_num, adev->tx_sts_dma_handle, + sizeof(adev->tx_sts), DMA_BIDIRECTIONAL); + apnet_free_rx_ring(adev->ndev); + unregister_netdev(adev->ndev); + free_netdev(adev->ndev); + apnet_interface[if_num] = NULL; + pr_info("Removing %s%d interface\n", DRV_NAME, if_num); + return 0; +} + +/** + * FIXME : need a virtual axon bus to simplify the init. + * apnet_init - driver init called by kernel + * + * Current design: gets number of devices found by the base driver and + * instantiates virtual ethernet devices. + */ +static int __init apnet_init(void) +{ + int if_num, rc = 0; + + num_apnet_if = axon_get_num_devices(); + if (!num_apnet_if) { + pr_info("%s: Axon device(s) not found !\n", DRV_NAME); + return -ENODEV; + } + if (unlikely(num_apnet_if > NUM_APNET)) { + pr_info("%s: Maximum supported interfaces %d, found %d\n" + "limiting to %d interface(s)\n", \ + DRV_NAME, NUM_APNET, num_apnet_if, NUM_APNET); + num_apnet_if = NUM_APNET; + } + pr_info("%s - %s version %s, compatability version %d\n", \ + DRV_DESC, DRV_NAME, DRV_VERSION, APNET_VER); + for (if_num = 0; if_num < num_apnet_if; if_num++) { + pr_info("Instantiating apnet%d\n", if_num); + apnet_interface[if_num] = NULL; + rc = apnet_probe(if_num); + if (rc) { + pr_err("Error instantiating apnet%d\n", if_num); + continue; /* don't give up, other devices might work */ + } + } + return rc; +} +module_init(apnet_init); + +/** + * FIXME : need a virtual axon bus to simplify the init. + * apnet_exit - driver exit called by kernel + * + * Current design: calls remove function of the active interfaces + */ +static void __exit apnet_exit(void) +{ + int if_num; + + if (unlikely(!num_apnet_if)) { + pr_info("APNET interface(s) not found !\n"); + return; + } + for (if_num = 0; if_num < num_apnet_if; if_num++) { + if (apnet_interface[if_num] != NULL) + apnet_remove(if_num); + } + pr_info("All apnet interfaces removed\n"); + return; +} +module_exit(apnet_exit); Index: linux-2.6.23.1/drivers/net/apnet.h =================================================================== --- /dev/null +++ linux-2.6.23.1/drivers/net/apnet.h @@ -0,0 +1,187 @@ +/* + * Virtual ethernet device driver for IBM AXON PCIe interface + * + * Copyright (C) IBM Corporation, 2007, 2008 + * + * Authors: Murali Iyer + * + * Acronym - APNET (Axon PCIe Network) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef __APNET_H__ +#define __APNET_H__ + +/** + * Maximum number of APNET interfaces supported. Driver will query the + * hardware and limit to the number set here if it finds more devices. + * FIXME This hardcoded value will go away after base axon + * driver supports proper registration. + */ +#define NUM_APNET 4 + +/* Number of receive slots of MTU size for rx buffer. */ +#define NUM_RX_SLOT 128 + +/* simple check */ +#if NUM_RX_SLOT < 32 || NUM_RX_SLOT > 512 +#error Invalid number of slots, chose between 32 and 512 +#endif + +/* + * Maximum size of the packet (MTU) that this interface supports. + * Size mentioned here plus size of the skb structure and system + * cache line for performance will be requested from SLAB/SLUB. + * Example: configuring this to 64KB would get slab/slub allocation + * from 128K wasting most of the upper 64KB. + * Approximately 500 bytes of head room will take care of the above. +*/ +#define NUM_MAX_MTU 65016 + +enum { + DISABLE = 0, + ENABLE = 1, +}; + +/* Buffer descriptor structure for transmit and receive */ +struct apnet_bd{ + __be32 ctrl; /* Status / control bits */ + __be32 data_len; /* length of the packet */ + dma_addr_t data_ptr; /* address offset from start of the buffer */ +}; + +/* Definitions of status / control bits. */ +enum { + /* set by remote to indicate descriptor has valid data and + cleared by local after retrieving data */ + APNET_RX_CTRL_FULL = 0x00000001, + APNET_TX_IN_FLIGHT = 0x00000002, +}; + +/* keep the same size of apnet_bd. first descriptor holds configuration */ +struct apnet_cfg{ + __be16 slots; /* Number of slots configured */ + __be16 sts; /* Status of the ring */ + __be32 mtu; /* Maximum MTU size supported */ + __be32 sig; /* Signature of APNET */ + __be32 ver; /* interface version number */ +}; + +#define APNET_RX_ENABLE 0x0001 /* RX ring valid bit */ +#define APNET_SIG_MISMATCH 0x8000 + +/* APNET signature that allows to validate remotes system's configuration. */ +#define APNET_SIG 0x41504e54 /* APNT signature */ + +/* Interface version number that could act as compatability or feature flag + * with future driver changes. Note that APNET_VER number is different from + * driver version number that changes more frequently. + * Version 1 - Software timer triggers rx_poll and memcpy to tx & rx packets + * Version 2 - Enables DMA and interrupts + */ +#define APNET_VER 0x00000002 + +/* for ethtool , match apnet_ethtool_stats entries*/ +struct apnet_stats{ + u64 tx_dma_err; /* DMA hardware error */ + u64 in_rx_poll; /* for rx poll locking */ + u64 inflight_rx_skbs; /* number of outstanding */ +}; + +struct apnet_tx_status{ + struct net_device *ndev; + __be32 sts; /* Status of tx descriptor */ + struct sk_buff *skb; + dma_addr_t handle; +}; + +struct apnet{ + struct net_device *ndev; /* network device */ + int if_num; /* interface number */ + + /* Transmit buffer related stuff, tx buffer in remote memory */ + void *tx_base; /* Remote/Transmit ring base */ + struct apnet_cfg __iomem tx_cfg; /* Configuration of tx buf */ + struct apnet_bd __iomem *tx_bd; /* IORemapped addr of tx bd */ + unsigned int tx_slot; /* In flight tx slot num */ + struct apnet_tx_status tx_sts[NUM_RX_SLOT]; /* In-flight tx sts */ + dma_addr_t tx_sts_dma_handle; /* dma handle of tx_sts */ + struct axon_apnet_dma_cb tx_dma[NUM_RX_SLOT]; /* TX DMA list */ + + /* Receive buffer related stuff, rx buffer in local memory */ + void *rx_base; /* Local/Receive ring base */ + struct apnet_cfg rx_cfg; /* Configuration of rx buf */ + struct apnet_cfg *rx_cfg_r; /* rx config for remote */ + struct apnet_bd *rx_bd; /* Virtual addr of rx bd */ + unsigned int rx_slot; /* In flight rx slot num */ + size_t rxb_sz; /* Size of rx cfg & desc */ + struct sk_buff *rx_skb[NUM_RX_SLOT]; + + /* Status */ + struct net_device_stats nstats; /* network status stuff */ + struct apnet_stats astats; /* debug / ethtool purposes */ + + spinlock_t rx_lock; /* used in rx func */ +}; + +/** + * Returns 16/32/64 bit big endian value. + * Works on both big & little endian systems as well as for + * local and ioremapped memory + */ +static inline __be16 read16be(__be16 *addr) +{ + __be16 tmp = be16_to_cpu(__raw_readw(addr)); + rmb(); /* make sure read ordering */ + return tmp; +} + +static inline void write16be(__be16 data, __be16 *addr) +{ + __raw_writew(cpu_to_be16(data), addr); + wmb(); /* make sure write ordering */ +} + +static inline __be32 read32be(__be32 *addr) +{ + __be32 tmp = be32_to_cpu(__raw_readl(addr)); + rmb(); /* make sure read ordering */ + return tmp; +} + +static inline void write32be(__be32 data, __be32 *addr) +{ + __raw_writel(cpu_to_be32(data), addr); + wmb(); /* make sure write ordering */ +} + +static inline __be64 read64be(__be64 *addr) +{ + __be64 tmp = be64_to_cpu(__raw_readq(addr)); + rmb(); /* make sure read ordering */ + return tmp; +} + +static inline void write64be(__be64 data, __be64 *addr) +{ + __raw_writeq(cpu_to_be64(data), addr); + wmb(); /* make sure write ordering */ +} + +#define pr_err(fmt, arg...) \ + printk(KERN_ERR fmt, ##arg) + +#endif /* __APNET_H__ */ --