[PATCH 1/2] ucc_geth: Move freeing of TX packets to NAPI context.

Li Yang leoli at freescale.com
Fri Mar 27 21:50:09 EST 2009


On Thu, Mar 26, 2009 at 8:54 PM, Joakim Tjernlund
<Joakim.Tjernlund at transmode.se> wrote:
> Also set NAPI weight to 64 as this is a common value.
> This will make the system alot more responsive while
> ping flooding the ucc_geth ethernet interaface.
>
> Signed-off-by: Joakim Tjernlund <Joakim.Tjernlund at transmode.se>
> ---
>  drivers/net/ucc_geth.c |   32 ++++++++++++--------------------
>  drivers/net/ucc_geth.h |    1 -
>  2 files changed, 12 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
> index 097aed8..7fc91aa 100644
> --- a/drivers/net/ucc_geth.c
> +++ b/drivers/net/ucc_geth.c
> @@ -3214,7 +3214,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
>                dev->stats.tx_packets++;
>
>                /* Free the sk buffer associated with this TxBD */
> -               dev_kfree_skb_irq(ugeth->
> +               dev_kfree_skb(ugeth->
>                                  tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]]);
>                ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]] = NULL;
>                ugeth->skb_dirtytx[txQ] =
> @@ -3248,9 +3248,16 @@ static int ucc_geth_poll(struct napi_struct *napi, int budget)
>        for (i = 0; i < ug_info->numQueuesRx; i++)
>                howmany += ucc_geth_rx(ugeth, i, budget - howmany);
>
> +       /* Tx event processing */
> +       spin_lock(&ugeth->lock);
> +       for (i = 0; i < ug_info->numQueuesTx; i++) {
> +               ucc_geth_tx(ugeth->dev, i);
> +       }
> +       spin_unlock(&ugeth->lock);
> +
>        if (howmany < budget) {
>                netif_rx_complete(napi);
> -               setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS);
> +               setbits32(ugeth->uccf->p_uccm, UCCE_RX_EVENTS | UCCE_TX_EVENTS);
>        }
>
>        return howmany;
> @@ -3264,8 +3271,6 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
>        struct ucc_geth_info *ug_info;
>        register u32 ucce;
>        register u32 uccm;
> -       register u32 tx_mask;
> -       u8 i;
>
>        ugeth_vdbg("%s: IN", __func__);
>
> @@ -3279,27 +3284,14 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
>        out_be32(uccf->p_ucce, ucce);
>
>        /* check for receive events that require processing */
> -       if (ucce & UCCE_RX_EVENTS) {
> +       if (ucce & (UCCE_RX_EVENTS | UCCE_TX_EVENTS)) {
>                if (netif_rx_schedule_prep(&ugeth->napi)) {
> -                       uccm &= ~UCCE_RX_EVENTS;
> +                       uccm &= ~(UCCE_RX_EVENTS | UCCE_TX_EVENTS);
>                        out_be32(uccf->p_uccm, uccm);
>                        __netif_rx_schedule(&ugeth->napi);
>                }
>        }
>
> -       /* Tx event processing */
> -       if (ucce & UCCE_TX_EVENTS) {
> -               spin_lock(&ugeth->lock);
> -               tx_mask = UCC_GETH_UCCE_TXB0;
> -               for (i = 0; i < ug_info->numQueuesTx; i++) {
> -                       if (ucce & tx_mask)
> -                               ucc_geth_tx(dev, i);
> -                       ucce &= ~tx_mask;
> -                       tx_mask <<= 1;
> -               }
> -               spin_unlock(&ugeth->lock);
> -       }
> -
>        /* Errors and other events */
>        if (ucce & UCCE_OTHER) {
>                if (ucce & UCC_GETH_UCCE_BSY)
> @@ -3733,7 +3725,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
>        dev->netdev_ops = &ucc_geth_netdev_ops;
>        dev->watchdog_timeo = TX_TIMEOUT;
>        INIT_WORK(&ugeth->timeout_work, ucc_geth_timeout_work);
> -       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, UCC_GETH_DEV_WEIGHT);
> +       netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, 64);

It doesn't make sense to have larger napi budget than the size of RX
BD ring.  You can't have more BDs than RX_BD_RING_LEN in backlog for
napi_poll to process.  Increase the RX_BD_RING_LEN if you want to
increase UCC_GETH_DEV_WEIGHT.  However please also provide the
performance comparison for this kind of change.  Thanks

- Leo



More information about the Linuxppc-dev mailing list