[Skiboot] [PATCH] core/pci: Introduce virtual device and filter cache

Alistair Popple alistair at popple.id.au
Mon Sep 12 10:59:18 AEST 2016


Hi Gavin,

On Thu, 8 Sep 2016 05:19:32 PM Gavin Shan wrote:
> This introduces virtual device and filter cache to speed up the
> searching for PCI virtual device and config space register filter
> in PCI config access path. With this applied, the original bandwidth
> is regained during the NPU1 bandwidth testing.

Have you done any profiling that shows the virtual pci filters are the source 
of the slow down? I am quite confused why this is an issue as once the links 
are setup nothing should be touching the virtual config spaces (apart from a 
watchdog thread running once a second in the nvidia device driver).

I am concerned the slow down may be due to some other reason such as link 
training not work correctly so would like some confirmation that slow config 
space access is what is causing this. Also if the slow down is due to frequent 
config space accesses then we should be looking to reduce the frequency of 
accesses. There is probably also plenty of scope to further optimise these 
code paths as they were assumed to be on a fairly slow path when written.

Regars,

Alistair

> Original bandwidth before PCI virtual device patchset is applied:
> 
> garrison# pwd
> /home/alistair/NVIDIA_CUDA-7.5_Samples/1_Utilities/bandwidthTest
> garrison# ./bandwidthTest --memory=pinned
>     :
>  Host to Device Bandwidth, 1 Device(s)
>  PINNED Memory Transfers
>    Transfer Size (Bytes)	Bandwidth(MB/s)
>    33554432			33205.6
> 
> With PCI virtual device patchset and this one is applied:
> 
>  Host to Device Bandwidth, 1 Device(s)
>  PINNED Memory Transfers
>    Transfer Size (Bytes)	Bandwidth(MB/s)
>    33554432			33080.0
> 
> Signed-off-by: Gavin Shan <gwshan at linux.vnet.ibm.com>
> ---
>  core/pci-virt.c    | 23 +++++++++++++++++++++--
>  include/pci-virt.h |  1 +
>  include/pci.h      |  1 +
>  3 files changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/core/pci-virt.c b/core/pci-virt.c
> index b531470..663225a 100644
> --- a/core/pci-virt.c
> +++ b/core/pci-virt.c
> @@ -55,6 +55,12 @@ static struct pci_cfg_reg_filter *pci_virt_find_filter(
>  	if (!pvd || !len || start >= pvd->cfg_size)
>  		return NULL;
>  
> +	/* Check the cache first */
> +	pcrf = pvd->cached_pcrf;
> +	if (pcrf && start < (pcrf->start + pcrf->len) &&
> +	    (start + len) > pcrf->start)
> +		return pcrf;
> +
>  	/* Return filter if there is overlapped region. We don't
>  	 * require strict matching for more flexibility. It also
>  	 * means the associated handler should validate the register
> @@ -62,8 +68,12 @@ static struct pci_cfg_reg_filter *pci_virt_find_filter(
>  	 */
>  	list_for_each(&pvd->pcrf, pcrf, link) {
>  		if (start < (pcrf->start + pcrf->len) &&
> -		    (start + len) > pcrf->start)
> +		    (start + len) > pcrf->start) {
> +			/* Update the cache */
> +			pvd->cached_pcrf = pcrf;
> +
>  			return pcrf;
> +		}
>  	}
>  
>  	return NULL;
> @@ -111,9 +121,18 @@ struct pci_virt_device *pci_virt_find_device(struct phb 
*phb,
>  {
>  	struct pci_virt_device *pvd;
>  
> +	/* Check the cached virtual device firstly */
> +	pvd = phb->cached_pvd;
> +	if (pvd && pvd->bdfn == bdfn)
> +		return pvd;
> +
>  	list_for_each(&phb->virt_devices, pvd, node) {
> -		if (pvd->bdfn == bdfn)
> +		if (pvd->bdfn == bdfn) {
> +			/* Update the cache */
> +			phb->cached_pvd = pvd;
> +
>  			return pvd;
> +		}
>  	}
>  
>  	return NULL;
> diff --git a/include/pci-virt.h b/include/pci-virt.h
> index 7c787cf..d1427bc 100644
> --- a/include/pci-virt.h
> +++ b/include/pci-virt.h
> @@ -30,6 +30,7 @@ struct pci_virt_device {
>  	uint32_t		bdfn;
>  	uint32_t		cfg_size;
>  	uint8_t			*config[PCI_VIRT_CFG_MAX];
> +	struct pci_cfg_reg_filter	*cached_pcrf;
>  	struct list_head	pcrf;
>  	struct list_node	node;
>  	void			*data;
> diff --git a/include/pci.h b/include/pci.h
> index 92e3dce..c326a88 100644
> --- a/include/pci.h
> +++ b/include/pci.h
> @@ -324,6 +324,7 @@ struct phb {
>  	enum phb_type		phb_type;
>  	struct lock		lock;
>  	struct list_head	devices;
> +	struct pci_virt_device	*cached_pvd;
>  	struct list_head	virt_devices;
>  	const struct phb_ops	*ops;
>  	struct pci_lsi_state	lstate;
> 



More information about the Skiboot mailing list