Jumbo Frames, sil24 SATA driver, and kswapd0 page allocation failures

Jonathan Haws Jonathan.Haws at sdl.usu.edu
Wed Aug 19 08:56:44 EST 2009


> If the hardware supports it, the best way to deal with it is to set
> up
> the driver so that it only ever deals in single pages.  

I am working on fixing the driver to support NETIF_F_SG and have changed how it receives packets to follow how the e1000 driver does it.

Here is where I am at:

When I get the first part of the frame, I allocate an skb for the packet.  I call dev->page = alloc_page(GFP_ATOMIC) to allocate a page for the 4080 bytes coming from the MAL.

I then setup a DMA mapping for that page to get the data out of the MAL (the original code simply used dma_map_single, but I need a page).

Once the DMA map has been setup and data transferred, I call skb_fill_page_desc() to put the data into the skb.  I then wrote a function called emac_consume_page, which unmaps the DMA mapping, frees the page, and updates the lengths in the skb.

The relevant source code is at the end of this email.

My problem is this:

When I run this code, it appears to create the fragmented packet just fine, but when it passes it up the stack, the kernel spits out these bugs, one after another:

BUG: Bad page state in process swapper  pfn:0ee9b
page:c051f360 flags:(null) count:-3 mapcount:0 mapping:(null) index:766
Call Trace:
[c032bc30] [c0006ef0] show_stack+0x44/0x16c (unreliable)
[c032bc70] [c006c438] bad_page+0x94/0x130
[c032bc90] [c006d4a0] get_page_from_freelist+0x458/0x4d4
[c032bd20] [c006d5f4] __alloc_pages_nodemask+0xd8/0x4f8
[c032bda0] [c01a1174] emac_poll_rx+0x300/0x9c8
[c032bdf0] [c019cb64] mal_poll+0xa8/0x1ec
[c032be20] [c01cf218] net_rx_action+0x9c/0x1b4
[c032be50] [c0039678] __do_softirq+0xc4/0x148
[c032be90] [c0004d18] do_softirq+0x78/0x80
[c032bea0] [c0039264] irq_exit+0x64/0x7c
[c032beb0] [c0005210] do_IRQ+0x9c/0xb4
[c032bed0] [c000fa7c] ret_from_except+0x0/0x18
[c032bf90] [c000808c] cpu_idle+0xdc/0xec
[c032bfb0] [c00028fc] rest_init+0x70/0x84
[c032bfc0] [c02e0864] start_kernel+0x240/0x2c4
[c032bff0] [c0002254] start_here+0x44/0xb0
BUG: Bad page state in process swapper  pfn:0ee8c
page:c051f180 flags:(null) count:-3 mapcount:0 mapping:(null) index:757
Call Trace:
[c032bc30] [c0006ef0] show_stack+0x44/0x16c (unreliable)
[c032bc70] [c006c438] bad_page+0x94/0x130
[c032bc90] [c006d4a0] get_page_from_freelist+0x458/0x4d4
[c032bd20] [c006d5f4] __alloc_pages_nodemask+0xd8/0x4f8
[c032bda0] [c01a1174] emac_poll_rx+0x300/0x9c8
[c032bdf0] [c019cb64] mal_poll+0xa8/0x1ec
[c032be20] [c01cf218] net_rx_action+0x9c/0x1b4
[c032be50] [c0039678] __do_softirq+0xc4/0x148
[c032be90] [c0004d18] do_softirq+0x78/0x80
[c032bea0] [c0039264] irq_exit+0x64/0x7c
[c032beb0] [c0005210] do_IRQ+0x9c/0xb4
[c032bed0] [c000fa7c] ret_from_except+0x0/0x18
[c032bf90] [c000808c] cpu_idle+0xdc/0xec
[c032bfb0] [c00028fc] rest_init+0x70/0x84
[c032bfc0] [c02e0864] start_kernel+0x240/0x2c4
[c032bff0] [c0002254] start_here+0x44/0xb0

I know that I am missing something when it comes to allocating the pages for the fragments, but when I compare my methodology to the e1000 driver, they appear to be functionally the same?

Any ideas?  I can send the entire source file for the driver if needs be.

Thanks!

Jonathan


Here is the source:

static int emac_poll_rx(void *param, int budget)
{

... /* Other code is here */

push_packet:
	skb->dev = dev->ndev;
	skb->protocol = eth_type_trans(skb, dev->ndev);
	emac_rx_csum(dev, skb, ctrl);

	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
		++dev->estats.rx_dropped_stack;
next:
	++dev->stats.rx_packets;
skip:
	dev->stats.rx_bytes += len;
	slot = (slot + 1) % NUM_RX_BUFF;
	--budget;
	++received;
	continue;
sg:
if (ctrl & MAL_RX_CTRL_FIRST) {
	BUG_ON(dev->rx_sg_skb);
	if (unlikely(emac_alloc_rx_skb2(dev, slot, GFP_ATOMIC))) {
		DBG(dev, "rx OOM %d (%d) (%d)" NL, slot, dev->rx_skb_size, len);
		++dev->estats.rx_dropped_oom;
		emac_recycle_rx_skb(dev, slot, 0);
	} else {
		dev->rx_sg_skb = skb;
		skb_fill_page_desc(dev->rx_sg_skb, 0, dev->page, 0, len);
		emac_consume_page(dev, len, slot);
		dev->rx_sg_skb->len += ETH_HLEN;
	}
} else if (!emac_rx_sg_append(dev, slot) && (ctrl & MAL_RX_CTRL_LAST)) {
	skb = dev->rx_sg_skb;
	dev->rx_sg_skb = NULL;

	ctrl &= EMAC_BAD_RX_MASK;
	if (unlikely(ctrl && ctrl != EMAC_RX_TAH_BAD_CSUM)) {
		emac_parse_rx_error(dev, ctrl);
		++dev->estats.rx_dropped_error;
		dev_kfree_skb(skb);
		len = 0;
	} else
		goto push_packet;
}

... /* Other code is here */
} /* end of emac_poll_rx */

static inline int emac_alloc_rx_skb2(struct emac_instance *dev, int slot,
				    gfp_t flags)
{
	struct sk_buff *skb = alloc_skb(242, flags);
	if (unlikely(!skb))
		return -ENOMEM;


	dev->rx_skb[slot] = skb;
	dev->rx_desc[slot].data_len = 0;

	dev->page = alloc_page(flags);
	DBG(dev, "emac_alloc_skb2: page %x" NL, dev->page);
	if(unlikely(!dev->page))
	{
		return -1;
	}
	dev->rx_desc[slot].data_ptr = dma_map_page(&dev->ofdev->dev, dev->page, 0, 4096, DMA_FROM_DEVICE);

	wmb();
	dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY |
	    (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0);

	return 0;
} /* end of emac_alloc_rx_skb2 */

static inline void emac_consume_page(struct emac_instance* dev, int length, int slot)
{
	dma_unmap_page(&dev->ofdev->dev, dev->rx_desc[slot].data_ptr, 4096, DMA_FROM_DEVICE);
	wmb();
	__free_page(dev->page);
	dev->page = NULL;
	dev->rx_sg_skb->len += length;
	dev->rx_sg_skb->data_len += length;
	dev->rx_sg_skb->truesize += length;
}

static inline int emac_rx_sg_append(struct emac_instance *dev, int slot)
{
	if (likely(dev->rx_sg_skb != NULL)) {
		int len = dev->rx_desc[slot].data_len;
		int tot_len = dev->rx_sg_skb->len + len;

		if (unlikely(tot_len + 2 > dev->max_mtu)) {
			++dev->estats.rx_dropped_mtu;
			dev_kfree_skb(dev->rx_sg_skb);
			dev->rx_sg_skb = NULL;
		} else {
			dev->page = alloc_page(GFP_ATOMIC);
			if(unlikely(!dev->page))
			{
				return -ENOMEM;
			}
			dev->rx_desc[slot].data_ptr = dma_map_page(&dev->ofdev->dev, dev->page, 0, 4096, DMA_FROM_DEVICE);
			dev->rx_desc[slot].data_len = 0;
			wmb();
			dev->rx_desc[slot].ctrl = MAL_RX_CTRL_EMPTY | (slot == (NUM_RX_BUFF - 1) ? MAL_RX_CTRL_WRAP : 0);
			skb_fill_page_desc(dev->rx_sg_skb, skb_shinfo(dev->rx_sg_skb)->nr_frags, dev->page, 0, len);
			emac_consume_page(dev, len, slot);
			return 0;
		}
	}
	emac_recycle_rx_skb(dev, slot, 0);
	return -1;
} /* end of emac_rx_sg_append */


More information about the Linuxppc-dev mailing list