[PATCH v2 01/10] cxl: Drop commands if the PCI channel is not in normal state

Cyril Bur cyrilbur at gmail.com
Tue Aug 11 13:31:58 AEST 2015


On Tue, 28 Jul 2015 15:28:34 +1000
Daniel Axtens <dja at axtens.net> wrote:

> If the PCI channel has gone down, don't attempt to poke the hardware.
> 
> We need to guard every time cxl_whatever_(read|write) is called. This
> is because a call to those functions will dereference an offset into an
> mmio register, and the mmio mappings get invalidated in the EEH
> teardown.
> 

Hey Daniel, keeping in mind I can't exactly test it, and that I don't know the
ins and outs of CAPI, the code looks nice and it makes sence to me.

Some very minor points,

Otherwise

Acked-by: Cyril Bur <cyrilbur at gmail.com>

> Check in the read/write functions in the header.
> We give them the same semantics as usual PCI operations:
>  - a write to a channel that is down is ignored.
>  - a read from a channel that is down returns all fs.
> 
> Also, we try to access the MMIO space of a vPHB device as part of the
> PCI disable path. Because that's a read that bypasses most of our usual
> checks, we handle it explicitly.
> 
> As far as user visible warnings go:
>  - Check link state in file ops, return -EIO if down.
>  - Be reasonably quiet if there's an error in a teardown path,
>    or when we already know the hardware is going down.
>  - Throw a big WARN if someone tries to start a CXL operation
>    while the card is down. This gives a useful stacktrace for
>    debugging whatever is doing that.
> 
> Signed-off-by: Daniel Axtens <dja at axtens.net>
> ---
>  drivers/misc/cxl/context.c |  6 +++-
>  drivers/misc/cxl/cxl.h     | 34 ++++++++++++++++------
>  drivers/misc/cxl/file.c    | 19 +++++++++++++
>  drivers/misc/cxl/native.c  | 71 ++++++++++++++++++++++++++++++++++++++++++++--
>  drivers/misc/cxl/vphb.c    | 26 +++++++++++++++++
>  5 files changed, 144 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
> index 1287148629c0..615842115848 100644
> --- a/drivers/misc/cxl/context.c
> +++ b/drivers/misc/cxl/context.c
> @@ -193,7 +193,11 @@ int __detach_context(struct cxl_context *ctx)
>  	if (status != STARTED)
>  		return -EBUSY;
>  
> -	WARN_ON(cxl_detach_process(ctx));
> +	/* Only warn if we detached while the link was OK.
> +	 * If detach fails when hw is down, we don't care.
> +	 */
> +	WARN_ON(cxl_detach_process(ctx) &&
> +		cxl_adapter_link_ok(ctx->afu->adapter));
>  	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
>  	put_pid(ctx->pid);
>  	cxl_ctx_put();
> diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
> index 4fd66cabde1e..47eadbcfd379 100644
> --- a/drivers/misc/cxl/cxl.h
> +++ b/drivers/misc/cxl/cxl.h
> @@ -531,6 +531,14 @@ struct cxl_process_element {
>  	__be32 software_state;
>  } __packed;
>  
> +static inline bool cxl_adapter_link_ok(struct cxl *cxl)
> +{
> +	struct pci_dev *pdev;
> +
> +	pdev = to_pci_dev(cxl->dev.parent);
> +	return (pdev->error_state == pci_channel_io_normal);
> +}
> +

In the process of reviewing these patches I read the style guide in furthur
detail and (it doesn't 100% commit one way or the other but) it suggests it may
be wise to get GCC choose if it should inline or not, unless you have an reason 
(the macro replacment below being a good example)... Just a thought.

>  static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
>  {
>  	WARN_ON(!cpu_has_feature(CPU_FTR_HVMODE));
> @@ -538,9 +546,11 @@ static inline void __iomem *_cxl_p1_addr(struct cxl *cxl, cxl_p1_reg_t reg)
>  }
>  
>  #define cxl_p1_write(cxl, reg, val) \
> -	out_be64(_cxl_p1_addr(cxl, reg), val)
> +	if (cxl_adapter_link_ok(cxl)) \
> +		out_be64(_cxl_p1_addr(cxl, reg), val)
>  #define cxl_p1_read(cxl, reg) \
> -	in_be64(_cxl_p1_addr(cxl, reg))
> +	(cxl_adapter_link_ok(cxl) ? in_be64(_cxl_p1_addr(cxl, reg)) \
> +	 : (~0ULL))
>  
>  static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg)
>  {
> @@ -549,9 +559,11 @@ static inline void __iomem *_cxl_p1n_addr(struct cxl_afu *afu, cxl_p1n_reg_t reg
>  }
>  
>  #define cxl_p1n_write(afu, reg, val) \
> -	out_be64(_cxl_p1n_addr(afu, reg), val)
> +	if (cxl_adapter_link_ok(afu->adapter)) \
> +		out_be64(_cxl_p1n_addr(afu, reg), val)
>  #define cxl_p1n_read(afu, reg) \
> -	in_be64(_cxl_p1n_addr(afu, reg))
> +	(cxl_adapter_link_ok(afu->adapter) ? in_be64(_cxl_p1n_addr(afu, reg)) \
> +	 : (~0ULL))
>  

In the interest of safety and consistency, you might want braces around afu
when you dereference it. ie (afu)->adapter.

>  static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg)
>  {
> @@ -559,15 +571,21 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg
>  }
>  
>  #define cxl_p2n_write(afu, reg, val) \
> -	out_be64(_cxl_p2n_addr(afu, reg), val)
> +	if (cxl_adapter_link_ok(afu->adapter)) \
> +		out_be64(_cxl_p2n_addr(afu, reg), val)
>  #define cxl_p2n_read(afu, reg) \
> -	in_be64(_cxl_p2n_addr(afu, reg))
> +	(cxl_adapter_link_ok(afu->adapter) ? in_be64(_cxl_p2n_addr(afu, reg)) \
> +	 : (~0ULL))
>  
>  
>  #define cxl_afu_cr_read64(afu, cr, off) \
> -	in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off))
> +	(cxl_adapter_link_ok(afu->adapter) ? \
> +	 in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) : \
> +	 (~0ULL))
>  #define cxl_afu_cr_read32(afu, cr, off) \
> -	in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off))
> +	(cxl_adapter_link_ok(afu->adapter) ? \
> +	 in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) : \
> +	 0xffffffff)

Previous comment applies here, the existing code has (afu)->


So a birdy has informed me these are going to become inlines, you can
therefore disregard those comments. I am much more in favour of inlines!

>  u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off);
>  u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off);
>  
> diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
> index e3f4b69527a9..b99b4fa84ebd 100644
> --- a/drivers/misc/cxl/file.c
> +++ b/drivers/misc/cxl/file.c
> @@ -73,6 +73,11 @@ static int __afu_open(struct inode *inode, struct file *file, bool master)
>  	if (!afu->current_mode)
>  		goto err_put_afu;
>  
> +	if (!cxl_adapter_link_ok(adapter)) {
> +		rc = -EIO;
> +		goto err_put_afu;
> +	}
> +
>  	if (!(ctx = cxl_context_alloc())) {
>  		rc = -ENOMEM;
>  		goto err_put_afu;
> @@ -238,6 +243,9 @@ long afu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  	if (ctx->status == CLOSED)
>  		return -EIO;
>  
> +	if (!cxl_adapter_link_ok(ctx->afu->adapter))
> +		return -EIO;
> +
>  	pr_devel("afu_ioctl\n");
>  	switch (cmd) {
>  	case CXL_IOCTL_START_WORK:
> @@ -265,6 +273,9 @@ int afu_mmap(struct file *file, struct vm_area_struct *vm)
>  	if (ctx->status != STARTED)
>  		return -EIO;
>  
> +	if (!cxl_adapter_link_ok(ctx->afu->adapter))
> +		return -EIO;
> +
>  	return cxl_context_iomap(ctx, vm);
>  }
>  
> @@ -309,6 +320,9 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
>  	int rc;
>  	DEFINE_WAIT(wait);
>  
> +	if (!cxl_adapter_link_ok(ctx->afu->adapter))
> +		return -EIO;
> +
>  	if (count < CXL_READ_MIN_SIZE)
>  		return -EINVAL;
>  
> @@ -319,6 +333,11 @@ ssize_t afu_read(struct file *file, char __user *buf, size_t count,
>  		if (ctx_event_pending(ctx))
>  			break;
>  
> +		if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
> +			rc = -EIO;
> +			goto out;
> +		}
> +
>  		if (file->f_flags & O_NONBLOCK) {
>  			rc = -EAGAIN;
>  			goto out;
> diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
> index 10567f245818..16948915eb0d 100644
> --- a/drivers/misc/cxl/native.c
> +++ b/drivers/misc/cxl/native.c
> @@ -41,6 +41,12 @@ static int afu_control(struct cxl_afu *afu, u64 command,
>  			rc = -EBUSY;
>  			goto out;
>  		}
> +		if (!cxl_adapter_link_ok(afu->adapter)) {
> +			afu->enabled = enabled;
> +			rc = -EIO;
> +			goto out;
> +		}
> +
>  		pr_devel_ratelimited("AFU control... (0x%.16llx)\n",
>  				     AFU_Cntl | command);
>  		cpu_relax();
> @@ -85,6 +91,10 @@ int __cxl_afu_reset(struct cxl_afu *afu)
>  
>  int cxl_afu_check_and_enable(struct cxl_afu *afu)
>  {
> +	if (!cxl_adapter_link_ok(afu->adapter)) {
> +		WARN(1, "Refusing to enable afu while link down!\n");
> +		return -EIO;
> +	}
>  	if (afu->enabled)
>  		return 0;
>  	return afu_enable(afu);
> @@ -103,6 +113,12 @@ int cxl_psl_purge(struct cxl_afu *afu)
>  
>  	pr_devel("PSL purge request\n");
>  
> +	if (!cxl_adapter_link_ok(afu->adapter)) {
> +		dev_warn(&afu->dev, "PSL Purge called with link down, ignoring\n");
> +		rc = -EIO;
> +		goto out;
> +	}
> +
>  	if ((AFU_Cntl & CXL_AFU_Cntl_An_ES_MASK) != CXL_AFU_Cntl_An_ES_Disabled) {
>  		WARN(1, "psl_purge request while AFU not disabled!\n");
>  		cxl_afu_disable(afu);
> @@ -119,6 +135,11 @@ int cxl_psl_purge(struct cxl_afu *afu)
>  			rc = -EBUSY;
>  			goto out;
>  		}
> +		if (!cxl_adapter_link_ok(afu->adapter)) {
> +			rc = -EIO;
> +			goto out;
> +		}
> +
>  		dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
>  		pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx  PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr);
>  		if (dsisr & CXL_PSL_DSISR_TRANS) {
> @@ -215,6 +236,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
>  			dev_warn(&adapter->dev, "WARNING: CXL adapter wide TLBIA timed out!\n");
>  			return -EBUSY;
>  		}
> +		if (!cxl_adapter_link_ok(adapter))
> +			return -EIO;
>  		cpu_relax();
>  	}
>  
> @@ -224,6 +247,8 @@ int cxl_tlb_slb_invalidate(struct cxl *adapter)
>  			dev_warn(&adapter->dev, "WARNING: CXL adapter wide SLBIA timed out!\n");
>  			return -EBUSY;
>  		}
> +		if (!cxl_adapter_link_ok(adapter))
> +			return -EIO;
>  		cpu_relax();
>  	}
>  	return 0;
> @@ -240,6 +265,11 @@ int cxl_afu_slbia(struct cxl_afu *afu)
>  			dev_warn(&afu->dev, "WARNING: CXL AFU SLBIA timed out!\n");
>  			return -EBUSY;
>  		}
> +		/* If the adapter has gone down, we can assume that we
> +		 * will PERST it and that will invalidate everything.
> +		 */
> +		if (!cxl_adapter_link_ok(afu->adapter))
> +			return -EIO;
>  		cpu_relax();
>  	}
>  	return 0;
> @@ -279,6 +309,8 @@ static void slb_invalid(struct cxl_context *ctx)
>  	cxl_p1_write(adapter, CXL_PSL_SLBIA, CXL_TLB_SLB_IQ_LPIDPID);
>  
>  	while (1) {
> +		if (!cxl_adapter_link_ok(adapter))
> +			break;
>  		slbia = cxl_p1_read(adapter, CXL_PSL_SLBIA);
>  		if (!(slbia & CXL_TLB_SLB_P))
>  			break;
> @@ -308,6 +340,11 @@ static int do_process_element_cmd(struct cxl_context *ctx,
>  			rc = -EBUSY;
>  			goto out;
>  		}
> +		if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
> +			dev_warn(&ctx->afu->dev, "WARNING: Device link down, aborting Process Element Command!\n");
> +			rc = -EIO;
> +			goto out;
> +		}
>  		state = be64_to_cpup(ctx->afu->sw_command_status);
>  		if (state == ~0ULL) {
>  			pr_err("cxl: Error adding process element to AFU\n");
> @@ -355,8 +392,13 @@ static int terminate_process_element(struct cxl_context *ctx)
>  
>  	mutex_lock(&ctx->afu->spa_mutex);
>  	pr_devel("%s Terminate pe: %i started\n", __func__, ctx->pe);
> -	rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
> -				    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
> +	/* We could be asked to terminate when the hw is down.  That

                                                               ^
I'm notoriously bad for having a low care threshhold but I do have a high care
threshhold for consistency. Double space after period? Are you sure?

> +	 * should always succeed: it's not running if the hw has gone
> +	 * away and is being reset.
> +	 */
> +	if (cxl_adapter_link_ok(ctx->afu->adapter))
> +		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_TERMINATE,
> +					    CXL_PE_SOFTWARE_STATE_V | CXL_PE_SOFTWARE_STATE_T);
>  	ctx->elem->software_state = 0;	/* Remove Valid bit */
>  	pr_devel("%s Terminate pe: %i finished\n", __func__, ctx->pe);
>  	mutex_unlock(&ctx->afu->spa_mutex);
> @@ -369,7 +411,14 @@ static int remove_process_element(struct cxl_context *ctx)
>  
>  	mutex_lock(&ctx->afu->spa_mutex);
>  	pr_devel("%s Remove pe: %i started\n", __func__, ctx->pe);
> -	if (!(rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0)))
> +
> +	/* We could be asked to remove when the hw is down.  Again, if

Also here                                                   ^

> +	 * the hw is down, the PE is gone, so we succeed.
> +	 */
> +	if (cxl_adapter_link_ok(ctx->afu->adapter))
> +		rc = do_process_element_cmd(ctx, CXL_SPA_SW_CMD_REMOVE, 0);
> +
> +	if (!rc)
>  		ctx->pe_inserted = false;
>  	slb_invalid(ctx);
>  	pr_devel("%s Remove pe: %i finished\n", __func__, ctx->pe);
> @@ -614,6 +663,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
>  	if (!(mode & afu->modes_supported))
>  		return -EINVAL;
>  
> +	if (!cxl_adapter_link_ok(afu->adapter)) {
> +		WARN(1, "Device link is down, refusing to activate!\n");
> +		return -EIO;
> +	}
> +
>  	if (mode == CXL_MODE_DIRECTED)
>  		return activate_afu_directed(afu);
>  	if (mode == CXL_MODE_DEDICATED)
> @@ -624,6 +678,11 @@ int cxl_afu_activate_mode(struct cxl_afu *afu, int mode)
>  
>  int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr)
>  {
> +	if (!cxl_adapter_link_ok(ctx->afu->adapter)) {
> +		WARN(1, "Device link is down, refusing to attach process!\n");
> +		return -EIO;
> +	}
> +
>  	ctx->kernel = kernel;
>  	if (ctx->afu->current_mode == CXL_MODE_DIRECTED)
>  		return attach_afu_directed(ctx, wed, amr);
> @@ -668,6 +727,12 @@ int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
>  {
>  	u64 pidtid;
>  
> +	/* If the adapter has gone away, we can't get any meaningful
> +	 * information.
> +	 */
> +	if (!cxl_adapter_link_ok(afu->adapter))
> +		return -EIO;
> +
>  	info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
>  	info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
>  	info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
> diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
> index 2eba002b580b..2930911c1e42 100644
> --- a/drivers/misc/cxl/vphb.c
> +++ b/drivers/misc/cxl/vphb.c
> @@ -138,6 +138,26 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn,
>  	return 0;
>  }
>  
> +
> +static inline bool cxl_config_link_ok(struct pci_bus *bus)
> +{
> +	struct pci_controller *phb;
> +	struct cxl_afu *afu;
> +
> +	/* Config space IO is based on phb->cfg_addr, which is based on
> +	 * afu_desc_mmio. This isn't safe to read/write when the link

And yet not here         ^

> +	 * goes down, as EEH tears down MMIO space.
> +	 *
> +	 * Check if the link is OK before proceeding.
> +	 */
> +
> +	phb = pci_bus_to_host(bus);
> +	if (phb == NULL)
> +		return false;
> +	afu = (struct cxl_afu *)phb->private_data;
> +	return cxl_adapter_link_ok(afu->adapter);
> +}
> +
>  static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
>  				int offset, int len, u32 *val)
>  {
> @@ -150,6 +170,9 @@ static int cxl_pcie_read_config(struct pci_bus *bus, unsigned int devfn,
>  	if (rc)
>  		return rc;
>  
> +	if (!cxl_config_link_ok(bus))
> +		return PCIBIOS_DEVICE_NOT_FOUND;
> +
>  	/* Can only read 32 bits */
>  	*val = (in_le32(ioaddr) >> shift) & mask;
>  	return PCIBIOS_SUCCESSFUL;
> @@ -167,6 +190,9 @@ static int cxl_pcie_write_config(struct pci_bus *bus, unsigned int devfn,
>  	if (rc)
>  		return rc;
>  
> +	if (!cxl_config_link_ok(bus))
> +		return PCIBIOS_DEVICE_NOT_FOUND;
> +
>  	/* Can only write 32 bits so do read-modify-write */
>  	mask <<= shift;
>  	val <<= shift;



More information about the Linuxppc-dev mailing list