[PATCH v3 23/27] powerpc/powernv/pmem: Add debug IOCTLs

Alastair D'Silva alastair at au1.ibm.com
Thu Mar 12 15:24:30 AEDT 2020


On Wed, 2020-03-04 at 16:21 +0100, Frederic Barrat wrote:
> 
> Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> > From: Alastair D'Silva <alastair at d-silva.org>
> > 
> > These IOCTLs provide low level access to the card to aid in
> > debugging
> > controller/FPGA firmware.
> > 
> > Signed-off-by: Alastair D'Silva <alastair at d-silva.org>
> > ---
> >   arch/powerpc/platforms/powernv/pmem/Kconfig |   6 +
> >   arch/powerpc/platforms/powernv/pmem/ocxl.c  | 249
> > ++++++++++++++++++++
> >   include/uapi/nvdimm/ocxl-pmem.h             |  32 +++
> >   3 files changed, 287 insertions(+)
> > 
> > diff --git a/arch/powerpc/platforms/powernv/pmem/Kconfig
> > b/arch/powerpc/platforms/powernv/pmem/Kconfig
> > index c5d927520920..3f44429d70c9 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/Kconfig
> > +++ b/arch/powerpc/platforms/powernv/pmem/Kconfig
> > @@ -12,4 +12,10 @@ config OCXL_PMEM
> >   
> >   	  Select N if unsure.
> >   
> > +config OCXL_PMEM_DEBUG
> > +	bool "OpenCAPI Persistent Memory debugging"
> > +	depends on OCXL_PMEM
> > +	help
> > +	  Enables low level IOCTLs for OpenCAPI Persistent Memory
> > firmware development
> > +
> >   endif
> > diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > index e01f6f9fc180..d4ce5e9e0521 100644
> > --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> > @@ -1050,6 +1050,235 @@ int req_controller_health_perf(struct
> > ocxlpmem *ocxlpmem)
> >   				      GLOBAL_MMIO_HCI_REQ_HEALTH_PERF);
> >   }
> >   
> > +#ifdef CONFIG_OCXL_PMEM_DEBUG
> > +/**
> > + * enable_fwdebug() - Enable FW debug on the controller
> > + * @ocxlpmem: the device metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int enable_fwdebug(const struct ocxlpmem *ocxlpmem)
> > +{
> > +	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCI,
> > +				      OCXL_LITTLE_ENDIAN,
> > +				      GLOBAL_MMIO_HCI_FW_DEBUG);
> > +}
> > +
> > +/**
> > + * disable_fwdebug() - Disable FW debug on the controller
> > + * @ocxlpmem: the device metadata
> > + * Return: 0 on success, negative on failure
> > + */
> > +static int disable_fwdebug(const struct ocxlpmem *ocxlpmem)
> > +{
> > +	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu,
> > GLOBAL_MMIO_HCIC,
> > +				      OCXL_LITTLE_ENDIAN,
> > +				      GLOBAL_MMIO_HCI_FW_DEBUG);
> > +}
> > +
> > +static int ioctl_fwdebug(struct ocxlpmem *ocxlpmem,
> > +			     struct ioctl_ocxl_pmem_fwdebug __user
> > *uarg)
> > +{
> > +	struct ioctl_ocxl_pmem_fwdebug args;
> > +	u64 val;
> > +	int i;
> > +	int rc;
> > +
> > +	if (copy_from_user(&args, uarg, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	// Buffer size must be a multiple of 8
> > +	if ((args.buf_size & 0x07))
> > +		return -EINVAL;
> > +
> > +	if (args.buf_size > ocxlpmem->admin_command.data_size)
> > +		return -EINVAL;
> > +
> > +	mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > +	rc = enable_fwdebug(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_FW_DEBUG);
> > +	if (rc)
> > +		goto out;
> > +
> > +	// Write DebugAction & FunctionCode
> > +	val = ((u64)args.debug_action << 56) | ((u64)args.function_code
> > << 40);
> > +
> > +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +				      ocxlpmem-
> > >admin_command.request_offset + 0x08,
> > +				      OCXL_LITTLE_ENDIAN, val);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +				      ocxlpmem-
> > >admin_command.request_offset + 0x10,
> > +				      OCXL_LITTLE_ENDIAN,
> > args.debug_parameter_1);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +				      ocxlpmem-
> > >admin_command.request_offset + 0x18,
> > +				      OCXL_LITTLE_ENDIAN,
> > args.debug_parameter_2);
> > +	if (rc)
> > +		goto out;
> > +
> > +	for (i = 0x20; i < 0x38; i += 0x08)
> > +		rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > +					      ocxlpmem-
> > >admin_command.request_offset + i,
> > +					      OCXL_LITTLE_ENDIAN, 0);
> > +	if (rc)
> > +		goto out;
> 
> rc is the for loop body. The rc test is not.
> 
Whoops :)

> 
> > +
> > +
> > +	// Populate admin command buffer
> > +	if (args.buf_size) {
> > +		for (i = 0; i < args.buf_size; i += sizeof(u64)) {
> > +			u64 val;
> > +
> > +			if (copy_from_user(&val, &args.buf[i],
> > sizeof(u64)))
> > +				return -EFAULT;
> 
> need to get rc and goto out because of the mutex
> 
Ok

> 
> > +
> > +			rc = ocxl_global_mmio_write64(ocxlpmem-
> > >ocxl_afu,
> > +						      ocxlpmem-
> > >admin_command.data_offset + i,
> > +						      OCXL_HOST_ENDIAN,
> > val);
> > +			if (rc)
> > +				goto out;
> > +		}
> > +	}
> > +
> > +	rc = admin_command_execute(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_complete_timeout(ocxlpmem,
> > +					    ocxlpmem-
> > >timeouts[ADMIN_COMMAND_FW_DEBUG]);
> > +	if (rc < 0)
> > +		goto out;
> > +
> > +	rc = admin_response(ocxlpmem);
> > +	if (rc < 0)
> > +		goto out;
> > +	if (rc != STATUS_SUCCESS) {
> > +		warn_status(ocxlpmem, "Unexpected status from FW
> > Debug", rc);
> > +		goto out;
> > +	}
> > +
> > +	if (args.buf_size) {
> > +		for (i = 0; i < args.buf_size; i += sizeof(u64)) {
> > +			u64 val;
> > +
> > +			rc = ocxl_global_mmio_read64(ocxlpmem-
> > >ocxl_afu,
> > +						     ocxlpmem-
> > >admin_command.data_offset + i,
> > +						     OCXL_HOST_ENDIAN,
> > &val);
> > +			if (rc)
> > +				goto out;
> > +
> > +			if (copy_to_user(&args.buf[i], &val,
> > sizeof(u64))) {
> > +				rc = -EFAULT;
> > +				goto out;
> > +			}
> > +		}
> > +	}
> > +
> > +	rc = admin_response_handled(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = disable_fwdebug(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +out:
> > +	mutex_unlock(&ocxlpmem->admin_command.lock);
> > +	return rc;
> > +}
> > +
> > +static int ioctl_shutdown(struct ocxlpmem *ocxlpmem)
> > +{
> > +	int rc;
> > +
> > +	mutex_lock(&ocxlpmem->admin_command.lock);
> > +
> > +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_SHUTDOWN);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_execute(ocxlpmem);
> > +	if (rc)
> > +		goto out;
> > +
> > +	rc = admin_command_complete_timeout(ocxlpmem,
> > ADMIN_COMMAND_SHUTDOWN);
> > +	if (rc < 0) {
> > +		dev_warn(&ocxlpmem->dev, "Shutdown timed out\n");
> > +		goto out;
> > +	}
> > +
> > +	rc = 0;
> > +	goto out;
> 
> We can remove that goto.

Ok

> 
> No admin_response_handled()? Is that shutting down the full adapter
> and 
> we have nobody to talk to? What happens next?
> 

That's an oversight, we should call admin_response_handled().

> 
> > +
> > +out:
> > +	mutex_unlock(&ocxlpmem->admin_command.lock);
> > +	return rc;
> > +}
> > +
> > +static int ioctl_mmio_write(struct ocxlpmem *ocxlpmem,
> > +				struct ioctl_ocxl_pmem_mmio __user
> > *uarg)
> > +{
> > +	struct scm_ioctl_mmio args;
> > +
> > +	if (copy_from_user(&args, uarg, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	return ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> > args.address,
> > +					OCXL_LITTLE_ENDIAN, args.val);
> > +}
> > +
> > +static int ioctl_mmio_read(struct ocxlpmem *ocxlpmem,
> > +				     struct ioctl_ocxl_pmem_mmio __user
> > *uarg)
> > +{
> > +	struct ioctl_ocxl_pmem_mmio args;
> > +	int rc;
> > +
> > +	if (copy_from_user(&args, uarg, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, args.address,
> > +				     OCXL_LITTLE_ENDIAN, &args.val);
> > +	if (rc)
> > +		return rc;
> > +
> > +	if (copy_to_user(uarg, &args, sizeof(args)))
> > +		return -EFAULT;
> > +
> > +	return 0;
> > +}
> > +#else /* CONFIG_OCXL_PMEM_DEBUG */
> > +static int ioctl_fwdebug(struct ocxlpmem *ocxlpmem,
> > +			     struct ioctl_ocxl_pmem_fwdebug __user
> > *uarg)
> > +{
> > +	return -EPERM;
> > +}
> > +
> > +static int ioctl_shutdown(struct ocxlpmem *ocxlpmem)
> > +{
> > +	return -EPERM;
> > +}
> > +
> > +static int ioctl_mmio_write(struct ocxlpmem *ocxlpmem,
> > +				struct ioctl_ocxl_pmem_mmio __user
> > *uarg)
> > +{
> > +	return -EPERM;
> > +}
> > +
> > +static int ioctl_mmio_read(struct ocxlpmem *ocxlpmem,
> > +			       struct ioctl_ocxl_pmem_mmio __user
> > *uarg)
> > +{
> > +	return -EPERM;
> > +}
> 
> The 'else' clause could be dropped, the ioctls will return EINVAL,
> which 
> is fine, I think.
> 
> 

Ok

> 
> > +#endif /* CONFIG_OCXL_PMEM_DEBUG */
> > +
> >   static long file_ioctl(struct file *file, unsigned int cmd,
> > unsigned long args)
> >   {
> >   	struct ocxlpmem *ocxlpmem = file->private_data;
> > @@ -1091,6 +1320,26 @@ static long file_ioctl(struct file *file,
> > unsigned int cmd, unsigned long args)
> >   	case IOCTL_OCXL_PMEM_REQUEST_HEALTH:
> >   		rc = req_controller_health_perf(ocxlpmem);
> >   		break;
> > +
> > +	case IOCTL_OCXL_PMEM_FWDEBUG:
> > +		rc = ioctl_fwdebug(ocxlpmem,
> > +				   (struct ioctl_ocxl_pmem_fwdebug
> > __user *)args);
> > +		break;
> > +
> > +	case IOCTL_OCXL_PMEM_SHUTDOWN:
> > +		rc = ioctl_shutdown(ocxlpmem);
> > +		break;
> > +
> > +	case IOCTL_OCXL_PMEM_MMIO_WRITE:
> > +		rc = ioctl_mmio_write(ocxlpmem,
> > +				      (struct ioctl_ocxl_pmem_mmio
> > __user *)args);
> > +		break;
> > +
> > +	case IOCTL_OCXL_PMEM_MMIO_READ:
> > +		rc = ioctl_mmio_read(ocxlpmem,
> > +				     (struct ioctl_ocxl_pmem_mmio
> > __user *)args);
> > +		break;
> > +
> >   	}
> >   
> >   	return rc;
> > diff --git a/include/uapi/nvdimm/ocxl-pmem.h
> > b/include/uapi/nvdimm/ocxl-pmem.h
> > index 0d03abb44001..e20a4f8be82a 100644
> > --- a/include/uapi/nvdimm/ocxl-pmem.h
> > +++ b/include/uapi/nvdimm/ocxl-pmem.h
> > @@ -6,6 +6,28 @@
> >   #include <linux/types.h>
> >   #include <linux/ioctl.h>
> >   
> > +enum ocxlpmem_fwdebug_action {
> > +	OCXL_PMEM_FWDEBUG_READ_CONTROLLER_MEMORY = 0x01,
> > +	OCXL_PMEM_FWDEBUG_WRITE_CONTROLLER_MEMORY = 0x02,
> > +	OCXL_PMEM_FWDEBUG_ENABLE_FUNCTION = 0x03,
> > +	OCXL_PMEM_FWDEBUG_DISABLE_FUNCTION = 0x04,
> > +	OCXL_PMEM_FWDEBUG_GET_PEL = 0x05, // Retrieve Persistent Error
> > Log
> > +};
> > +
> > +struct ioctl_ocxl_pmem_buffer_info {
> > +	__u32	admin_command_buffer_size; // out
> > +	__u32	near_storage_buffer_size; // out
> > +};
> > +
> > +struct ioctl_ocxl_pmem_fwdebug { // All args are inputs
> > +	enum ocxlpmem_fwdebug_action debug_action;
> 
> More kernel ABI problems. My interpretation of the "enumeration 
> specifiers" section of C99 is that we can't rely on the size of the
> enum.
> 

Ok

> 
> > +	__u16 function_code;
> > +	__u16 buf_size; // Size of optional data buffer
> > +	__u64 debug_parameter_1;
> > +	__u64 debug_parameter_2;
> > +	__u8 *buf; // Pointer to optional in/out data buffer
> > +};
> > +
> >   #define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
> >   #define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
> >   #define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
> > @@ -66,6 +88,11 @@ struct ioctl_ocxl_pmem_controller_stats {
> >   	__u64 cache_write_latency; /* nanoseconds */
> >   };
> >   
> > +struct ioctl_ocxl_pmem_mmio {
> > +	__u64 address; /* Offset in global MMIO space */
> > +	__u64 val; /* value to write/was read */
> > +};
> 
> Can we group all the debug data structures together in the header
> file, 
> with a comment indicating that they may not be available in the
> kernel, 
> depending on the config?
> 

Ok

>    Fred
> 
> 
> > +
> >   struct ioctl_ocxl_pmem_eventfd {
> >   	__s32 eventfd;
> >   	__u32 reserved;
> > @@ -92,4 +119,9 @@ struct ioctl_ocxl_pmem_eventfd {
> >   #define IOCTL_OCXL_PMEM_EVENT_CHECK			_IOR(OC
> > XL_PMEM_MAGIC, 0x07, __u64)
> >   #define IOCTL_OCXL_PMEM_REQUEST_HEALTH			_IO(OCX
> > L_PMEM_MAGIC, 0x08)
> >   
> > +#define IOCTL_OCXL_PMEM_FWDEBUG		_IOWR(OCXL_PMEM_MAGIC,
> > 0xf0, struct ioctl_ocxl_pmem_fwdebug)
> > +#define IOCTL_OCXL_PMEM_MMIO_WRITE	_IOW(OCXL_PMEM_MAGIC, 0xf1,
> > struct ioctl_ocxl_pmem_mmio)
> > +#define IOCTL_OCXL_PMEM_MMIO_READ	_IOWR(OCXL_PMEM_MAGIC, 0xf2,
> > struct ioctl_ocxl_pmem_mmio)
> > +#define IOCTL_OCXL_PMEM_SHUTDOWN	_IO(OCXL_PMEM_MAGIC, 0xf3)
> > +
> >   #endif /* _UAPI_OCXL_SCM_H */
> > 
-- 
Alastair D'Silva
Open Source Developer
Linux Technology Centre, IBM Australia
mob: 0423 762 819



More information about the Linuxppc-dev mailing list