[RFC PATCH 1/2] USB: add HCD_BOUNCE_BUFFERS host controller driver flag
Alan Stern
stern at rowland.harvard.edu
Thu Feb 4 06:40:11 EST 2010
On Wed, 3 Feb 2010, Albert Herranz wrote:
> The HCD_BOUNCE_BUFFERS USB host controller driver flag can be enabled
> to instruct the USB stack to always bounce USB buffers to/from coherent
> memory buffers _just_ before/after a host controller transmission.
>
> This setting allows overcoming some platform-specific limitations.
>
> For example, the Nintendo Wii video game console is a NOT_COHERENT_CACHE
> platform that is unable to safely perform non-32 bit uncached writes
> to RAM because the byte enables are not connected to the bus.
> Thus, in that platform, "coherent" DMA buffers cannot be directly used
> by the kernel code unless it guarantees that all write accesses
> to said buffers are done in 32 bit chunks (which is not the case in the
> USB subsystem).
>
> To avoid this unwanted behaviour HCD_BOUNCE_BUFFERS can be enabled at
> the HCD controller, causing buffer allocations to be satisfied from
> normal memory and, only at the very last moment, before the actual
> transfer, buffers get copied to/from their corresponding DMA coherent
> bounce buffers.
>
> Note that HCD_LOCAL_MEM doesn't help in solving this problem as in that
> case buffers may be allocated from coherent memory in the first place
> and thus potentially accessed in non-32 bit chuncks by USB drivers.
This description sounds hopelessly confused. Maybe you're just
misusing the term "coherent". The patch itself doesn't affect the
coherent DMA mappings anyway; it affects the streaming mappings. Or to
put it another way, what's the justification for replacing a call to
dma_map_single() with a call to dma_alloc_coherent()?
Since the patch doesn't affect any of the coherent mappings (see for
example the calls to dma_pool_create() in ehci-mem.c), I don't see how
it can possibly do what you claim.
> +/**
> + * hcd_memcpy32_to_coherent - copy data to a bounce buffer
> + * @dst: destination dma bounce buffer
> + * @src: source buffer
> + * @len: number of bytes to copy
> + *
> + * This function copies @len bytes from @src to @dst in 32 bit chunks.
> + * The caller must guarantee that @dst length is 4 byte aligned and
> + * that @dst length is greater than or equal to @src length.
> + */
> +static void *hcd_memcpy32_to_coherent(void *dst, const void *src, size_t len)
> +{
> + u32 *q = dst, *p = (void *)src;
> + u8 *s;
> +
> + while (len >= 4) {
> + *q++ = *p++;
> + len -= 4;
> + }
> + s = (u8 *)p;
> + switch (len) {
> + case 3:
> + *q = s[0] << 24 | s[1] << 16 | s[2] << 8;
> + break;
> + case 2:
> + *q = s[0] << 24 | s[1] << 16;
> + break;
> + case 1:
> + *q = s[0] << 24;
> + break;
> + default:
> + break;
> + }
> + return dst;
> +}
What happens if somebody tries to use this code on a little-endian CPU?
> static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
> gfp_t mem_flags)
> {
> @@ -1274,53 +1441,80 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
> if (is_root_hub(urb->dev))
> return 0;
>
> - if (usb_endpoint_xfer_control(&urb->ep->desc)
> - && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
> - if (hcd->self.uses_dma) {
> - urb->setup_dma = dma_map_single(
> - hcd->self.controller,
> - urb->setup_packet,
> - sizeof(struct usb_ctrlrequest),
> - DMA_TO_DEVICE);
> - if (dma_mapping_error(hcd->self.controller,
> - urb->setup_dma))
> - return -EAGAIN;
> - } else if (hcd->driver->flags & HCD_LOCAL_MEM)
> - ret = hcd_alloc_coherent(
> - urb->dev->bus, mem_flags,
> + if (usb_endpoint_xfer_control(&urb->ep->desc)) {
> + if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) {
> + if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
> + urb->setup_dma = 0;
> + ret = hcd_bounce_to_coherent(
> + hcd->self.controller, mem_flags,
> &urb->setup_dma,
> (void **)&urb->setup_packet,
> sizeof(struct usb_ctrlrequest),
> DMA_TO_DEVICE);
> + } else if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
> + if (hcd->self.uses_dma) {
> + urb->setup_dma = dma_map_single(
> + hcd->self.controller,
> + urb->setup_packet,
> + sizeof(struct usb_ctrlrequest),
> + DMA_TO_DEVICE);
> + if (dma_mapping_error(hcd->self.controller,
> + urb->setup_dma))
> + return -EAGAIN;
> + } else if (hcd->driver->flags & HCD_LOCAL_MEM)
> + ret = hcd_alloc_coherent(
> + urb->dev->bus, mem_flags,
> + &urb->setup_dma,
> + (void **)&urb->setup_packet,
> + sizeof(struct usb_ctrlrequest),
> + DMA_TO_DEVICE);
> + }
> }
>
> dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> - if (ret == 0 && urb->transfer_buffer_length != 0
> - && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
> - if (hcd->self.uses_dma) {
> - urb->transfer_dma = dma_map_single (
> - hcd->self.controller,
> - urb->transfer_buffer,
> - urb->transfer_buffer_length,
> - dir);
> - if (dma_mapping_error(hcd->self.controller,
> - urb->transfer_dma))
> - return -EAGAIN;
> - } else if (hcd->driver->flags & HCD_LOCAL_MEM) {
> - ret = hcd_alloc_coherent(
> - urb->dev->bus, mem_flags,
> + if (ret == 0 && urb->transfer_buffer_length != 0) {
> + if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) {
> + if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP))
> + urb->transfer_dma = 0;
> + ret = hcd_bounce_to_coherent(
> + hcd->self.controller, mem_flags,
> &urb->transfer_dma,
> &urb->transfer_buffer,
> urb->transfer_buffer_length,
> dir);
>
> - if (ret && usb_endpoint_xfer_control(&urb->ep->desc)
> - && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
> - hcd_free_coherent(urb->dev->bus,
> - &urb->setup_dma,
> - (void **)&urb->setup_packet,
> - sizeof(struct usb_ctrlrequest),
> - DMA_TO_DEVICE);
> + if (ret && usb_endpoint_xfer_control(&urb->ep->desc))
> + hcd_bounce_from_coherent(hcd->self.controller,
> + &urb->setup_dma,
> + (void **)&urb->setup_packet,
> + sizeof(struct usb_ctrlrequest),
> + DMA_TO_DEVICE);
> + } else if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
> + if (hcd->self.uses_dma) {
> + urb->transfer_dma = dma_map_single(
> + hcd->self.controller,
> + urb->transfer_buffer,
> + urb->transfer_buffer_length,
> + dir);
> + if (dma_mapping_error(hcd->self.controller,
> + urb->transfer_dma))
> + return -EAGAIN;
> + } else if (hcd->driver->flags & HCD_LOCAL_MEM) {
> + ret = hcd_alloc_coherent(
> + urb->dev->bus, mem_flags,
> + &urb->transfer_dma,
> + &urb->transfer_buffer,
> + urb->transfer_buffer_length,
> + dir);
> +
> + if (ret &&
> + usb_endpoint_xfer_control(&urb->ep->desc))
> + hcd_free_coherent(urb->dev->bus,
> + &urb->setup_dma,
> + (void **)&urb->setup_packet,
> + sizeof(struct usb_ctrlrequest),
> + DMA_TO_DEVICE);
> + }
> }
> }
> return ret;
It seems that every time somebody comes up with a new kind of
memory-access restriction, this function grows by a factor of 2. After
a few more iterations it will be larger than the rest of the kernel!
There must be a better way to structure the requirements here.
Alan Stern
More information about the Linuxppc-dev
mailing list