[RFC PATCH 1/2] USB: add HCD_BOUNCE_BUFFERS host controller driver flag

Alan Stern stern at rowland.harvard.edu
Thu Feb 4 06:40:11 EST 2010


On Wed, 3 Feb 2010, Albert Herranz wrote:

> The HCD_BOUNCE_BUFFERS USB host controller driver flag can be enabled
> to instruct the USB stack to always bounce USB buffers to/from coherent
> memory buffers _just_ before/after a host controller transmission.
> 
> This setting allows overcoming some platform-specific limitations.
> 
> For example, the Nintendo Wii video game console is a NOT_COHERENT_CACHE
> platform that is unable to safely perform non-32 bit uncached writes
> to RAM because the byte enables are not connected to the bus.
> Thus, in that platform, "coherent" DMA buffers cannot be directly used
> by the kernel code unless it guarantees that all write accesses
> to said buffers are done in 32 bit chunks (which is not the case in the
> USB subsystem).
> 
> To avoid this unwanted behaviour HCD_BOUNCE_BUFFERS can be enabled at
> the HCD controller, causing buffer allocations to be satisfied from
> normal memory and, only at the very last moment, before the actual
> transfer, buffers get copied to/from their corresponding DMA coherent
> bounce buffers.
> 
> Note that HCD_LOCAL_MEM doesn't help in solving this problem as in that
> case buffers may be allocated from coherent memory in the first place
> and thus potentially accessed in non-32 bit chuncks by USB drivers.

This description sounds hopelessly confused.  Maybe you're just
misusing the term "coherent".  The patch itself doesn't affect the
coherent DMA mappings anyway; it affects the streaming mappings.  Or to
put it another way, what's the justification for replacing a call to
dma_map_single() with a call to dma_alloc_coherent()?

Since the patch doesn't affect any of the coherent mappings (see for 
example the calls to dma_pool_create() in ehci-mem.c), I don't see how 
it can possibly do what you claim.

> +/**
> + * hcd_memcpy32_to_coherent - copy data to a bounce buffer
> + * @dst: destination dma bounce buffer
> + * @src: source buffer
> + * @len: number of bytes to copy
> + *
> + * This function copies @len bytes from @src to @dst in 32 bit chunks.
> + * The caller must guarantee that @dst length is 4 byte aligned and
> + * that @dst length is greater than or equal to @src length.
> + */
> +static void *hcd_memcpy32_to_coherent(void *dst, const void *src, size_t len)
> +{
> +	u32 *q = dst, *p = (void *)src;
> +	u8 *s;
> +
> +	while (len >= 4) {
> +		*q++ = *p++;
> +		len -= 4;
> +	}
> +	s = (u8 *)p;
> +	switch (len) {
> +	case 3:
> +		*q = s[0] << 24 | s[1] << 16 | s[2] << 8;
> +		break;
> +	case 2:
> +		*q = s[0] << 24 | s[1] << 16;
> +		break;
> +	case 1:
> +		*q = s[0] << 24;
> +		break;
> +	default:
> +		break;
> +	}
> +	return dst;
> +}

What happens if somebody tries to use this code on a little-endian CPU?

>  static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
>  			   gfp_t mem_flags)
>  {
> @@ -1274,53 +1441,80 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
>  	if (is_root_hub(urb->dev))
>  		return 0;
>  
> -	if (usb_endpoint_xfer_control(&urb->ep->desc)
> -	    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
> -		if (hcd->self.uses_dma) {
> -			urb->setup_dma = dma_map_single(
> -					hcd->self.controller,
> -					urb->setup_packet,
> -					sizeof(struct usb_ctrlrequest),
> -					DMA_TO_DEVICE);
> -			if (dma_mapping_error(hcd->self.controller,
> -						urb->setup_dma))
> -				return -EAGAIN;
> -		} else if (hcd->driver->flags & HCD_LOCAL_MEM)
> -			ret = hcd_alloc_coherent(
> -					urb->dev->bus, mem_flags,
> +	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
> +		if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) {
> +			if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
> +				urb->setup_dma = 0;
> +			ret = hcd_bounce_to_coherent(
> +					hcd->self.controller, mem_flags,
>  					&urb->setup_dma,
>  					(void **)&urb->setup_packet,
>  					sizeof(struct usb_ctrlrequest),
>  					DMA_TO_DEVICE);
> +		} else if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
> +			if (hcd->self.uses_dma) {
> +				urb->setup_dma = dma_map_single(
> +						hcd->self.controller,
> +						urb->setup_packet,
> +						sizeof(struct usb_ctrlrequest),
> +						DMA_TO_DEVICE);
> +				if (dma_mapping_error(hcd->self.controller,
> +						      urb->setup_dma))
> +					return -EAGAIN;
> +			} else if (hcd->driver->flags & HCD_LOCAL_MEM)
> +				ret = hcd_alloc_coherent(
> +						urb->dev->bus, mem_flags,
> +						&urb->setup_dma,
> +						(void **)&urb->setup_packet,
> +						sizeof(struct usb_ctrlrequest),
> +						DMA_TO_DEVICE);
> +		}
>  	}
>  
>  	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
> -	if (ret == 0 && urb->transfer_buffer_length != 0
> -	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
> -		if (hcd->self.uses_dma) {
> -			urb->transfer_dma = dma_map_single (
> -					hcd->self.controller,
> -					urb->transfer_buffer,
> -					urb->transfer_buffer_length,
> -					dir);
> -			if (dma_mapping_error(hcd->self.controller,
> -						urb->transfer_dma))
> -				return -EAGAIN;
> -		} else if (hcd->driver->flags & HCD_LOCAL_MEM) {
> -			ret = hcd_alloc_coherent(
> -					urb->dev->bus, mem_flags,
> +	if (ret == 0 && urb->transfer_buffer_length != 0) {
> +		if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) {
> +			if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP))
> +				urb->transfer_dma = 0;
> +			ret = hcd_bounce_to_coherent(
> +					hcd->self.controller, mem_flags,
>  					&urb->transfer_dma,
>  					&urb->transfer_buffer,
>  					urb->transfer_buffer_length,
>  					dir);
>  
> -			if (ret && usb_endpoint_xfer_control(&urb->ep->desc)
> -			    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
> -				hcd_free_coherent(urb->dev->bus,
> -					&urb->setup_dma,
> -					(void **)&urb->setup_packet,
> -					sizeof(struct usb_ctrlrequest),
> -					DMA_TO_DEVICE);
> +			if (ret && usb_endpoint_xfer_control(&urb->ep->desc))
> +				hcd_bounce_from_coherent(hcd->self.controller,
> +						&urb->setup_dma,
> +						(void **)&urb->setup_packet,
> +						sizeof(struct usb_ctrlrequest),
> +						DMA_TO_DEVICE);
> +		} else if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
> +			if (hcd->self.uses_dma) {
> +				urb->transfer_dma = dma_map_single(
> +						hcd->self.controller,
> +						urb->transfer_buffer,
> +						urb->transfer_buffer_length,
> +						dir);
> +				if (dma_mapping_error(hcd->self.controller,
> +						      urb->transfer_dma))
> +					return -EAGAIN;
> +			} else if (hcd->driver->flags & HCD_LOCAL_MEM) {
> +				ret = hcd_alloc_coherent(
> +						urb->dev->bus, mem_flags,
> +						&urb->transfer_dma,
> +						&urb->transfer_buffer,
> +						urb->transfer_buffer_length,
> +						dir);
> +
> +				if (ret &&
> +				    usb_endpoint_xfer_control(&urb->ep->desc))
> +					hcd_free_coherent(urb->dev->bus,
> +						&urb->setup_dma,
> +						(void **)&urb->setup_packet,
> +						sizeof(struct usb_ctrlrequest),
> +						DMA_TO_DEVICE);
> +			}
>  		}
>  	}
>  	return ret;

It seems that every time somebody comes up with a new kind of 
memory-access restriction, this function grows by a factor of 2.  After 
a few more iterations it will be larger than the rest of the kernel!

There must be a better way to structure the requirements here.

Alan Stern



More information about the Linuxppc-dev mailing list