[PATCH] powerpc/pseries: add lparctl driver for platform-specific functions

Nathan Lynch nathanl at linux.ibm.com
Sat Jul 30 10:04:58 AEST 2022


This adds a chardev+ioctl-based interface for user space to access pseries
platform-specific functions which don't easily fit elsewhere.

The immediate motivation is to unbreak librtas[1] with kernel lockdown
enabled. librtas provides a thin root-only user-space API, allowing nearly
direct access to RTAS functions. Since its inception, some of librtas's
APIs have used /dev/mem to allocate buffers that are addressable by RTAS
for use with the powerpc-specific rtas() syscall. This design likely would
not be our first choice today, but it has served adequately for about two
decades without much change, and librtas has a non-negligible number of
existing users, including powerpc-utils, ppc64-diag, lsvpd, lscpu
(util-linux), and several non-OSS programs. With lockdown enabled, /dev/mem
access is prohibited, preventing communication with the management console
and breaking associated functions such as DLPAR and partition migration.

So the task is to provide new lockdown-compatible interfaces for librtas to
prefer over the legacy /dev/mem+sys_rtas(), allowing it to maintain its own
user-facing ABIs as much as possible. This means that we make different
interface choices than we would were we writing everything from scratch. In
the ioctl commands added here, we do not map RTAS error statuses to Linux
errno values, because the existing users of librtas's system parameter APIs
expect the RTAS status codes. Instead, the ioctl succeeds if the kernel
manages to call the RTAS function at all, and passes the RTAS status back
to user space in the argument buffer.

Add the ability to retrieve and change system parameters as defined by
PAPR. Along with proposed implementation changes to librtas[2], this
effectively fixes librtas's rtas_get_sysparm() and rtas_set_sysparm() APIs
for existing users with lockdown. This is enough to get HMC communication
working via the proprietary RSCT stack, enabling LPM, processor DLPAR,
memory DLPAR, and various other use cases.

While this starts with RTAS-implemented functions, there's no reason it
couldn't host facilities that rely on hcalls or other PAPR-specified
interfaces. It could be an alternative to adding more key=value lines to
/proc/powrpc/lparcfg. Hence the generic name 'lparctl'.

Utilities tested (ppc64le kernel and user space):
* ppc64_cpu --run-mode (powerpc-utils, gets/sets processor diag run mode)
* serv_config (powerpc-utils, gets/sets various system and LPAR policies)
* lscpu (util-linux, retrieves processor topology)
* RSCT (proprietary, retrieves HMC connection details)

Future work to unbreak more librtas APIs may include:
* VPD retrieval via ibm,get-vpd
* RTAS error injection
* indicator query/manipulation for diagnostics

[1] https://github.com/ibm-power-utilities/librtas
[2] https://github.com/ibm-power-utilities/librtas/pull/26

Signed-off-by: Nathan Lynch <nathanl at linux.ibm.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |   1 +
 arch/powerpc/include/uapi/asm/lparctl.h       |  63 +++++++
 arch/powerpc/platforms/pseries/Makefile       |   2 +-
 arch/powerpc/platforms/pseries/lparctl.c      | 171 ++++++++++++++++++
 4 files changed, 236 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/uapi/asm/lparctl.h
 create mode 100644 arch/powerpc/platforms/pseries/lparctl.c

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index fcab013e47c9..029de1b7ebdb 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,7 @@ Code  Seq#    Include File                                           Comments
                                                                      <mailto:vgo at ratio.de>
 0xB1  00-1F                                                          PPPoX
                                                                      <mailto:mostrows at styx.uwaterloo.ca>
+0xB2  01-02  arch/powerpc/include/uapi/asm/lparctl.h                 powerpc/pseries lparctl API
 0xB3  00     linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h                                            <mailto:linux-gpio at vger.kernel.org>
 0xB5  00-0F  uapi/linux/rpmsg.h                                      <mailto:linux-remoteproc at vger.kernel.org>
diff --git a/arch/powerpc/include/uapi/asm/lparctl.h b/arch/powerpc/include/uapi/asm/lparctl.h
new file mode 100644
index 000000000000..42e1ee5fe3c8
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/lparctl.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef POWERPC_UAPI_LPARCTL_H
+#define POWERPC_UAPI_LPARCTL_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define LPARCTL_IOCTL_BASE 0xb2
+
+#define LPARCTL_IO(nr)         _IO(LPARCTL_IOCTL_BASE, nr)
+#define LPARCTL_IOR(nr, type)  _IOR(LPARCTL_IOCTL_BASE, nr, type)
+#define LPARCTL_IOW(nr, type)  _IOW(LPARCTL_IOCTL_BASE, nr, type)
+#define LPARCTL_IOWR(nr, type) _IOWR(LPARCTL_IOCTL_BASE, nr, type)
+
+/**
+ * struct lparctl_get_system_parameter - System parameter retrieval.
+ *
+ * @rtas_status: (output) The result of the ibm,get-system-parameter RTAS
+ *               call attempted by the kernel.
+ * @token: (input) System parameter token as specified in PAPR+ 7.3.16
+ *         "System Parameters Option".
+ * @data: (input and output) If applicable, any required input data ("OS
+ *        Service Entitlement Status" appears to be the only system
+ *        parameter that requires this). If @rtas_status is zero on return
+ *        from ioctl(), contains the value of the specified parameter. The
+ *        first two bytes contain the (big-endian) length of valid data in
+ *        both cases. If @rtas_status is not zero the contents are
+ *        indeterminate.
+ */
+struct lparctl_get_system_parameter {
+	__s32 rtas_status;
+	__u16 token;
+	union {
+		__be16 length;
+		__u8   data[4002];
+	};
+};
+
+#define LPARCTL_GET_SYSPARM LPARCTL_IOWR(0x01, struct lparctl_get_system_parameter)
+
+/**
+ * struct lparctl_set_system_parameter - System parameter update.
+ *
+ * @rtas_status: (output) The result of the ibm,get-system-parameter RTAS
+ *               call attempted by the kernel.
+ * @token: (input) System parameter token as specified in PAPR+ 7.3.16
+ *         "System Parameters Option".
+ * @data: (input) The desired value for the parameter corresponding to
+ *        @token. The first two bytes contain the (big-endian) length of
+ *        valid data.
+ */
+struct lparctl_set_system_parameter {
+	__s32 rtas_status;
+	__u16 token;
+	union {
+		__be16 length;
+		__u8   data[1026];
+	};
+};
+
+#define LPARCTL_SET_SYSPARM LPARCTL_IOWR(0x02, struct lparctl_set_system_parameter)
+
+#endif /* POWERPC_UAPI_LPARCTL_H */
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 14e143b946a3..8fff7ed10d7c 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC64)			:= $(NO_MINIMAL_TOC)
 ccflags-$(CONFIG_PPC_PSERIES_DEBUG)	+= -DDEBUG
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
-			   of_helpers.o \
+			   of_helpers.o lparctl.o \
 			   setup.o iommu.o event_sources.o ras.o \
 			   firmware.o power.o dlpar.o mobility.o rng.o \
 			   pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/lparctl.c b/arch/powerpc/platforms/pseries/lparctl.c
new file mode 100644
index 000000000000..11a33b8d9234
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/lparctl.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Character device for accessing pseries/PAPR platform-specific
+ * facilities.
+ */
+#define pr_fmt(fmt) "lparctl: " fmt
+
+#include <linux/build_bug.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/miscdevice.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <asm/lparctl.h>
+#include <asm/machdep.h>
+#include <asm/rtas.h>
+
+/**
+ * lparctl_get_sysparm() - Query a PAPR system parameter.
+ *
+ * Retrieve the value of the parameter indicated by the @token member of
+ * the &struct lparctl_get_system_parameter at @arg. If available and
+ * accessible, the value of the parameter is copied to the @data member of
+ * the &struct lparctl_get_system_parameter at @arg, and its @rtas_status
+ * field is set to zero. Otherwise, the @rtas_status member reflects the
+ * most recent RTAS call status, and the contents of @data are
+ * indeterminate.
+ *
+ * Non-zero RTAS call statuses are not translated to conventional errno
+ * values. Only kernel issues or API misuse result in an error at the
+ * syscall level. This is to serve the needs of legacy software which
+ * historically has accessed system parameters via the rtas() syscall,
+ * which has similar behavior.
+ *
+ * Return:
+ * * 0 - OK. Caller must examine the @rtas_status member of the returned
+ *       &struct lparctl_get_system_parameter to determine whether a parameter
+ *       value was copied out.
+ * * -EINVAL - The copied-in &struct lparctl_get_system_parameter.rtas_status
+ *             is non-zero.
+ * * -EFAULT - The supplied @arg is a bad address.
+ * * -ENOMEM - Allocation failure.
+ */
+static long lparctl_get_sysparm(struct lparctl_get_system_parameter __user *argp)
+{
+	struct lparctl_get_system_parameter *gsp;
+	long ret;
+	int fwrc;
+
+	/*
+	 * Special case to allow user space to probe the command.
+	 */
+	if (argp == NULL)
+		return 0;
+
+	gsp = memdup_user(argp, sizeof(*gsp));
+	if (IS_ERR(gsp)) {
+		ret = PTR_ERR(gsp);
+		goto err_return;
+	}
+
+	ret = -EINVAL;
+	if (gsp->rtas_status != 0)
+		goto err_free;
+
+	do {
+		static_assert(sizeof(gsp->data) <= sizeof(rtas_data_buf));
+
+		spin_lock(&rtas_data_buf_lock);
+		memset(rtas_data_buf, 0, sizeof(rtas_data_buf));
+		memcpy(rtas_data_buf, gsp->data, sizeof(gsp->data));
+		fwrc = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				 NULL, gsp->token, __pa(rtas_data_buf),
+				 sizeof(gsp->data));
+		if (fwrc == 0)
+			memcpy(gsp->data, rtas_data_buf, sizeof(gsp->data));
+		spin_unlock(&rtas_data_buf_lock);
+	} while (rtas_busy_delay(fwrc));
+
+	gsp->rtas_status = fwrc;
+	ret = 0;
+	if (copy_to_user(argp, gsp, sizeof(*gsp)))
+		ret = -EFAULT;
+err_free:
+	kfree(gsp);
+err_return:
+	return ret;
+}
+
+static long lparctl_set_sysparm(struct lparctl_set_system_parameter __user *argp)
+{
+	struct lparctl_set_system_parameter *ssp;
+	long ret;
+	int fwrc;
+
+	/*
+	 * Special case to allow user space to probe the command.
+	 */
+	if (argp == NULL)
+		return 0;
+
+	ssp = memdup_user(argp, sizeof(*ssp));
+	if (IS_ERR(ssp)) {
+		ret = PTR_ERR(ssp);
+		goto err_return;
+	}
+
+	ret = -EINVAL;
+	if (ssp->rtas_status != 0)
+		goto err_free;
+
+	do {
+		static_assert(sizeof(ssp->data) <= sizeof(rtas_data_buf));
+
+		spin_lock(&rtas_data_buf_lock);
+		memset(rtas_data_buf, 0, sizeof(rtas_data_buf));
+		memcpy(rtas_data_buf, ssp->data, sizeof(ssp->data));
+		fwrc = rtas_call(rtas_token("ibm,set-system-parameter"), 2, 1,
+				 NULL, ssp->token, __pa(rtas_data_buf));
+		spin_unlock(&rtas_data_buf_lock);
+	} while (rtas_busy_delay(fwrc));
+
+	ret = 0;
+	if (copy_to_user(&argp->rtas_status, &fwrc, sizeof(argp->rtas_status)))
+		ret = -EFAULT;
+err_free:
+	kfree(ssp);
+err_return:
+	return ret;
+}
+
+static long lparctl_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	long ret;
+
+	switch (ioctl) {
+	case LPARCTL_GET_SYSPARM:
+		ret = lparctl_get_sysparm(argp);
+		break;
+	case LPARCTL_SET_SYSPARM:
+		ret = lparctl_set_sysparm(argp);
+		break;
+	default:
+		ret = -ENOIOCTLCMD;
+		break;
+	}
+	return ret;
+}
+
+static const struct file_operations lparctl_ops = {
+	.unlocked_ioctl = lparctl_dev_ioctl,
+	.llseek		= noop_llseek,
+};
+
+static struct miscdevice lparctl_dev = {
+	MISC_DYNAMIC_MINOR,
+	"lparctl",
+	&lparctl_ops,
+};
+
+static __init int lparctl_init(void)
+{
+	int ret;
+
+	ret = misc_register(&lparctl_dev);
+
+	return ret;
+}
+machine_device_initcall(pseries, lparctl_init);
-- 
2.35.1



More information about the Linuxppc-dev mailing list