[PATCH v3 23/27] powerpc/powernv/pmem: Add debug IOCTLs

Frederic Barrat fbarrat at linux.ibm.com
Thu Mar 5 02:21:50 AEDT 2020



Le 21/02/2020 à 04:27, Alastair D'Silva a écrit :
> From: Alastair D'Silva <alastair at d-silva.org>
> 
> These IOCTLs provide low level access to the card to aid in debugging
> controller/FPGA firmware.
> 
> Signed-off-by: Alastair D'Silva <alastair at d-silva.org>
> ---
>   arch/powerpc/platforms/powernv/pmem/Kconfig |   6 +
>   arch/powerpc/platforms/powernv/pmem/ocxl.c  | 249 ++++++++++++++++++++
>   include/uapi/nvdimm/ocxl-pmem.h             |  32 +++
>   3 files changed, 287 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/pmem/Kconfig b/arch/powerpc/platforms/powernv/pmem/Kconfig
> index c5d927520920..3f44429d70c9 100644
> --- a/arch/powerpc/platforms/powernv/pmem/Kconfig
> +++ b/arch/powerpc/platforms/powernv/pmem/Kconfig
> @@ -12,4 +12,10 @@ config OCXL_PMEM
>   
>   	  Select N if unsure.
>   
> +config OCXL_PMEM_DEBUG
> +	bool "OpenCAPI Persistent Memory debugging"
> +	depends on OCXL_PMEM
> +	help
> +	  Enables low level IOCTLs for OpenCAPI Persistent Memory firmware development
> +
>   endif
> diff --git a/arch/powerpc/platforms/powernv/pmem/ocxl.c b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> index e01f6f9fc180..d4ce5e9e0521 100644
> --- a/arch/powerpc/platforms/powernv/pmem/ocxl.c
> +++ b/arch/powerpc/platforms/powernv/pmem/ocxl.c
> @@ -1050,6 +1050,235 @@ int req_controller_health_perf(struct ocxlpmem *ocxlpmem)
>   				      GLOBAL_MMIO_HCI_REQ_HEALTH_PERF);
>   }
>   
> +#ifdef CONFIG_OCXL_PMEM_DEBUG
> +/**
> + * enable_fwdebug() - Enable FW debug on the controller
> + * @ocxlpmem: the device metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int enable_fwdebug(const struct ocxlpmem *ocxlpmem)
> +{
> +	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCI,
> +				      OCXL_LITTLE_ENDIAN,
> +				      GLOBAL_MMIO_HCI_FW_DEBUG);
> +}
> +
> +/**
> + * disable_fwdebug() - Disable FW debug on the controller
> + * @ocxlpmem: the device metadata
> + * Return: 0 on success, negative on failure
> + */
> +static int disable_fwdebug(const struct ocxlpmem *ocxlpmem)
> +{
> +	return ocxl_global_mmio_set64(ocxlpmem->ocxl_afu, GLOBAL_MMIO_HCIC,
> +				      OCXL_LITTLE_ENDIAN,
> +				      GLOBAL_MMIO_HCI_FW_DEBUG);
> +}
> +
> +static int ioctl_fwdebug(struct ocxlpmem *ocxlpmem,
> +			     struct ioctl_ocxl_pmem_fwdebug __user *uarg)
> +{
> +	struct ioctl_ocxl_pmem_fwdebug args;
> +	u64 val;
> +	int i;
> +	int rc;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	// Buffer size must be a multiple of 8
> +	if ((args.buf_size & 0x07))
> +		return -EINVAL;
> +
> +	if (args.buf_size > ocxlpmem->admin_command.data_size)
> +		return -EINVAL;
> +
> +	mutex_lock(&ocxlpmem->admin_command.lock);
> +
> +	rc = enable_fwdebug(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_FW_DEBUG);
> +	if (rc)
> +		goto out;
> +
> +	// Write DebugAction & FunctionCode
> +	val = ((u64)args.debug_action << 56) | ((u64)args.function_code << 40);
> +
> +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +				      ocxlpmem->admin_command.request_offset + 0x08,
> +				      OCXL_LITTLE_ENDIAN, val);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +				      ocxlpmem->admin_command.request_offset + 0x10,
> +				      OCXL_LITTLE_ENDIAN, args.debug_parameter_1);
> +	if (rc)
> +		goto out;
> +
> +	rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +				      ocxlpmem->admin_command.request_offset + 0x18,
> +				      OCXL_LITTLE_ENDIAN, args.debug_parameter_2);
> +	if (rc)
> +		goto out;
> +
> +	for (i = 0x20; i < 0x38; i += 0x08)
> +		rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +					      ocxlpmem->admin_command.request_offset + i,
> +					      OCXL_LITTLE_ENDIAN, 0);
> +	if (rc)
> +		goto out;


rc is the for loop body. The rc test is not.


> +
> +
> +	// Populate admin command buffer
> +	if (args.buf_size) {
> +		for (i = 0; i < args.buf_size; i += sizeof(u64)) {
> +			u64 val;
> +
> +			if (copy_from_user(&val, &args.buf[i], sizeof(u64)))
> +				return -EFAULT;


need to get rc and goto out because of the mutex


> +
> +			rc = ocxl_global_mmio_write64(ocxlpmem->ocxl_afu,
> +						      ocxlpmem->admin_command.data_offset + i,
> +						      OCXL_HOST_ENDIAN, val);
> +			if (rc)
> +				goto out;
> +		}
> +	}
> +
> +	rc = admin_command_execute(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_complete_timeout(ocxlpmem,
> +					    ocxlpmem->timeouts[ADMIN_COMMAND_FW_DEBUG]);
> +	if (rc < 0)
> +		goto out;
> +
> +	rc = admin_response(ocxlpmem);
> +	if (rc < 0)
> +		goto out;
> +	if (rc != STATUS_SUCCESS) {
> +		warn_status(ocxlpmem, "Unexpected status from FW Debug", rc);
> +		goto out;
> +	}
> +
> +	if (args.buf_size) {
> +		for (i = 0; i < args.buf_size; i += sizeof(u64)) {
> +			u64 val;
> +
> +			rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu,
> +						     ocxlpmem->admin_command.data_offset + i,
> +						     OCXL_HOST_ENDIAN, &val);
> +			if (rc)
> +				goto out;
> +
> +			if (copy_to_user(&args.buf[i], &val, sizeof(u64))) {
> +				rc = -EFAULT;
> +				goto out;
> +			}
> +		}
> +	}
> +
> +	rc = admin_response_handled(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = disable_fwdebug(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +out:
> +	mutex_unlock(&ocxlpmem->admin_command.lock);
> +	return rc;
> +}
> +
> +static int ioctl_shutdown(struct ocxlpmem *ocxlpmem)
> +{
> +	int rc;
> +
> +	mutex_lock(&ocxlpmem->admin_command.lock);
> +
> +	rc = admin_command_request(ocxlpmem, ADMIN_COMMAND_SHUTDOWN);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_execute(ocxlpmem);
> +	if (rc)
> +		goto out;
> +
> +	rc = admin_command_complete_timeout(ocxlpmem, ADMIN_COMMAND_SHUTDOWN);
> +	if (rc < 0) {
> +		dev_warn(&ocxlpmem->dev, "Shutdown timed out\n");
> +		goto out;
> +	}
> +
> +	rc = 0;
> +	goto out;


We can remove that goto.

No admin_response_handled()? Is that shutting down the full adapter and 
we have nobody to talk to? What happens next?


> +
> +out:
> +	mutex_unlock(&ocxlpmem->admin_command.lock);
> +	return rc;
> +}
> +
> +static int ioctl_mmio_write(struct ocxlpmem *ocxlpmem,
> +				struct ioctl_ocxl_pmem_mmio __user *uarg)
> +{
> +	struct scm_ioctl_mmio args;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	return ocxl_global_mmio_write64(ocxlpmem->ocxl_afu, args.address,
> +					OCXL_LITTLE_ENDIAN, args.val);
> +}
> +
> +static int ioctl_mmio_read(struct ocxlpmem *ocxlpmem,
> +				     struct ioctl_ocxl_pmem_mmio __user *uarg)
> +{
> +	struct ioctl_ocxl_pmem_mmio args;
> +	int rc;
> +
> +	if (copy_from_user(&args, uarg, sizeof(args)))
> +		return -EFAULT;
> +
> +	rc = ocxl_global_mmio_read64(ocxlpmem->ocxl_afu, args.address,
> +				     OCXL_LITTLE_ENDIAN, &args.val);
> +	if (rc)
> +		return rc;
> +
> +	if (copy_to_user(uarg, &args, sizeof(args)))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +#else /* CONFIG_OCXL_PMEM_DEBUG */
> +static int ioctl_fwdebug(struct ocxlpmem *ocxlpmem,
> +			     struct ioctl_ocxl_pmem_fwdebug __user *uarg)
> +{
> +	return -EPERM;
> +}
> +
> +static int ioctl_shutdown(struct ocxlpmem *ocxlpmem)
> +{
> +	return -EPERM;
> +}
> +
> +static int ioctl_mmio_write(struct ocxlpmem *ocxlpmem,
> +				struct ioctl_ocxl_pmem_mmio __user *uarg)
> +{
> +	return -EPERM;
> +}
> +
> +static int ioctl_mmio_read(struct ocxlpmem *ocxlpmem,
> +			       struct ioctl_ocxl_pmem_mmio __user *uarg)
> +{
> +	return -EPERM;
> +}


The 'else' clause could be dropped, the ioctls will return EINVAL, which 
is fine, I think.



> +#endif /* CONFIG_OCXL_PMEM_DEBUG */
> +
>   static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
>   {
>   	struct ocxlpmem *ocxlpmem = file->private_data;
> @@ -1091,6 +1320,26 @@ static long file_ioctl(struct file *file, unsigned int cmd, unsigned long args)
>   	case IOCTL_OCXL_PMEM_REQUEST_HEALTH:
>   		rc = req_controller_health_perf(ocxlpmem);
>   		break;
> +
> +	case IOCTL_OCXL_PMEM_FWDEBUG:
> +		rc = ioctl_fwdebug(ocxlpmem,
> +				   (struct ioctl_ocxl_pmem_fwdebug __user *)args);
> +		break;
> +
> +	case IOCTL_OCXL_PMEM_SHUTDOWN:
> +		rc = ioctl_shutdown(ocxlpmem);
> +		break;
> +
> +	case IOCTL_OCXL_PMEM_MMIO_WRITE:
> +		rc = ioctl_mmio_write(ocxlpmem,
> +				      (struct ioctl_ocxl_pmem_mmio __user *)args);
> +		break;
> +
> +	case IOCTL_OCXL_PMEM_MMIO_READ:
> +		rc = ioctl_mmio_read(ocxlpmem,
> +				     (struct ioctl_ocxl_pmem_mmio __user *)args);
> +		break;
> +
>   	}
>   
>   	return rc;
> diff --git a/include/uapi/nvdimm/ocxl-pmem.h b/include/uapi/nvdimm/ocxl-pmem.h
> index 0d03abb44001..e20a4f8be82a 100644
> --- a/include/uapi/nvdimm/ocxl-pmem.h
> +++ b/include/uapi/nvdimm/ocxl-pmem.h
> @@ -6,6 +6,28 @@
>   #include <linux/types.h>
>   #include <linux/ioctl.h>
>   
> +enum ocxlpmem_fwdebug_action {
> +	OCXL_PMEM_FWDEBUG_READ_CONTROLLER_MEMORY = 0x01,
> +	OCXL_PMEM_FWDEBUG_WRITE_CONTROLLER_MEMORY = 0x02,
> +	OCXL_PMEM_FWDEBUG_ENABLE_FUNCTION = 0x03,
> +	OCXL_PMEM_FWDEBUG_DISABLE_FUNCTION = 0x04,
> +	OCXL_PMEM_FWDEBUG_GET_PEL = 0x05, // Retrieve Persistent Error Log
> +};
> +
> +struct ioctl_ocxl_pmem_buffer_info {
> +	__u32	admin_command_buffer_size; // out
> +	__u32	near_storage_buffer_size; // out
> +};
> +
> +struct ioctl_ocxl_pmem_fwdebug { // All args are inputs
> +	enum ocxlpmem_fwdebug_action debug_action;


More kernel ABI problems. My interpretation of the "enumeration 
specifiers" section of C99 is that we can't rely on the size of the enum.


> +	__u16 function_code;
> +	__u16 buf_size; // Size of optional data buffer
> +	__u64 debug_parameter_1;
> +	__u64 debug_parameter_2;
> +	__u8 *buf; // Pointer to optional in/out data buffer
> +};
> +
>   #define OCXL_PMEM_ERROR_LOG_ACTION_RESET	(1 << (32-32))
>   #define OCXL_PMEM_ERROR_LOG_ACTION_CHKFW	(1 << (53-32))
>   #define OCXL_PMEM_ERROR_LOG_ACTION_REPLACE	(1 << (54-32))
> @@ -66,6 +88,11 @@ struct ioctl_ocxl_pmem_controller_stats {
>   	__u64 cache_write_latency; /* nanoseconds */
>   };
>   
> +struct ioctl_ocxl_pmem_mmio {
> +	__u64 address; /* Offset in global MMIO space */
> +	__u64 val; /* value to write/was read */
> +};


Can we group all the debug data structures together in the header file, 
with a comment indicating that they may not be available in the kernel, 
depending on the config?

   Fred


> +
>   struct ioctl_ocxl_pmem_eventfd {
>   	__s32 eventfd;
>   	__u32 reserved;
> @@ -92,4 +119,9 @@ struct ioctl_ocxl_pmem_eventfd {
>   #define IOCTL_OCXL_PMEM_EVENT_CHECK			_IOR(OCXL_PMEM_MAGIC, 0x07, __u64)
>   #define IOCTL_OCXL_PMEM_REQUEST_HEALTH			_IO(OCXL_PMEM_MAGIC, 0x08)
>   
> +#define IOCTL_OCXL_PMEM_FWDEBUG		_IOWR(OCXL_PMEM_MAGIC, 0xf0, struct ioctl_ocxl_pmem_fwdebug)
> +#define IOCTL_OCXL_PMEM_MMIO_WRITE	_IOW(OCXL_PMEM_MAGIC, 0xf1, struct ioctl_ocxl_pmem_mmio)
> +#define IOCTL_OCXL_PMEM_MMIO_READ	_IOWR(OCXL_PMEM_MAGIC, 0xf2, struct ioctl_ocxl_pmem_mmio)
> +#define IOCTL_OCXL_PMEM_SHUTDOWN	_IO(OCXL_PMEM_MAGIC, 0xf3)
> +
>   #endif /* _UAPI_OCXL_SCM_H */
> 



More information about the Linuxppc-dev mailing list