[PATCH v5 2/8] lkdtm/powerpc: Add test to hijack a patch mapping

Tue Jul 13 15:31:07 AEST 2021

When live patching with STRICT_KERNEL_RWX the CPU doing the patching
must temporarily remap the page(s) containing the patch site with +W
permissions. While this temporary mapping is in use, another CPU could
write to the same mapping and maliciously alter kernel text. Implement a
LKDTM test to attempt to exploit such an opening during code patching.
The test is implemented on powerpc and requires LKDTM built into the
kernel (building LKDTM as a module is insufficient).

The LKDTM "hijack" test works as follows:

  1. A CPU executes an infinite loop to patch an instruction. This is
     the "patching" CPU.
  2. Another CPU attempts to write to the address of the temporary
     mapping used by the "patching" CPU. This other CPU is the
     "hijacker" CPU. The hijack either fails with a fault/error or
     succeeds, in which case some kernel text is now overwritten.

The virtual address of the temporary patch mapping is provided via an
LKDTM-specific accessor to the hijacker CPU. This test assumes a
hypothetical situation where this address was leaked previously.

How to run the test:

	mount -t debugfs none /sys/kernel/debug
	(echo HIJACK_PATCH > /sys/kernel/debug/provoke-crash/DIRECT)

A passing test indicates that it is not possible to overwrite kernel
text from another CPU by using the temporary mapping established by
a CPU for patching.

Signed-off-by: Christopher M. Riedl <cmr at linux.ibm.com>

---

v5:  * Use `u32*` instead of `struct ppc_inst*` based on new series in
       upstream.

v4:  * Separate the powerpc and x86_64 bits into individual patches.
     * Use __put_kernel_nofault() when attempting to hijack the mapping
     * Use raw_smp_processor_id() to avoid triggering the BUG() when
       calling smp_processor_id() in preemptible code - the only thing
       that matters is that one of the threads is bound to a different
       CPU - we are not using smp_processor_id() to access any per-cpu
       data or similar where preemption should be disabled.
     * Rework the patching_cpu() kthread stop condition to avoid:
       https://lwn.net/Articles/628628/
---
 drivers/misc/lkdtm/core.c  |   1 +
 drivers/misc/lkdtm/lkdtm.h |   1 +
 drivers/misc/lkdtm/perms.c | 134 +++++++++++++++++++++++++++++++++++++
 3 files changed, 136 insertions(+)

diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 8024b6a5cc7fc..fbcb95eda337b 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -147,6 +147,7 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(WRITE_RO),
 	CRASHTYPE(WRITE_RO_AFTER_INIT),
 	CRASHTYPE(WRITE_KERN),
+	CRASHTYPE(HIJACK_PATCH),
 	CRASHTYPE(REFCOUNT_INC_OVERFLOW),
 	CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
 	CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 99f90d3e5e9cb..87e7e6136d962 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -62,6 +62,7 @@ void lkdtm_EXEC_USERSPACE(void);
 void lkdtm_EXEC_NULL(void);
 void lkdtm_ACCESS_USERSPACE(void);
 void lkdtm_ACCESS_NULL(void);
+void lkdtm_HIJACK_PATCH(void);
 
 /* refcount.c */
 void lkdtm_REFCOUNT_INC_OVERFLOW(void);
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 2dede2ef658f3..39e7456852229 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mman.h>
 #include <linux/uaccess.h>
+#include <linux/kthread.h>
 #include <asm/cacheflush.h>
 
 /* Whether or not to fill the target memory area with do_nothing(). */
@@ -222,6 +223,139 @@ void lkdtm_ACCESS_NULL(void)
 	pr_err("FAIL: survived bad write\n");
 }
 
+#if (IS_BUILTIN(CONFIG_LKDTM) && defined(CONFIG_STRICT_KERNEL_RWX) && \
+	defined(CONFIG_PPC))
+/*
+ * This is just a dummy location to patch-over.
+ */
+static void patching_target(void)
+{
+	return;
+}
+
+#include <asm/code-patching.h>
+const u32 *patch_site = (const u32 *)&patching_target;
+
+static inline int lkdtm_do_patch(u32 data)
+{
+	return patch_instruction((u32 *)patch_site, ppc_inst(data));
+}
+
+static inline u32 lkdtm_read_patch_site(void)
+{
+	return READ_ONCE(*patch_site);
+}
+
+/* Returns True if the write succeeds */
+static inline bool lkdtm_try_write(u32 data, u32 *addr)
+{
+	__put_kernel_nofault(addr, &data, u32, err);
+	return true;
+
+err:
+	return false;
+}
+
+static int lkdtm_patching_cpu(void *data)
+{
+	int err = 0;
+	u32 val = 0xdeadbeef;
+
+	pr_info("starting patching_cpu=%d\n", raw_smp_processor_id());
+
+	do {
+		err = lkdtm_do_patch(val);
+	} while (lkdtm_read_patch_site() == val && !err && !kthread_should_stop());
+
+	if (err)
+		pr_warn("XFAIL: patch_instruction returned error: %d\n", err);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+	}
+
+	return err;
+}
+
+void lkdtm_HIJACK_PATCH(void)
+{
+	struct task_struct *patching_kthrd;
+	int patching_cpu, hijacker_cpu, attempts;
+	unsigned long addr;
+	bool hijacked;
+	const u32 bad_data = 0xbad00bad;
+	const u32 original_insn = lkdtm_read_patch_site();
+
+	if (!IS_ENABLED(CONFIG_SMP)) {
+		pr_err("XFAIL: this test requires CONFIG_SMP\n");
+		return;
+	}
+
+	if (num_online_cpus() < 2) {
+		pr_warn("XFAIL: this test requires at least two cpus\n");
+		return;
+	}
+
+	hijacker_cpu = raw_smp_processor_id();
+	patching_cpu = cpumask_any_but(cpu_online_mask, hijacker_cpu);
+
+	patching_kthrd = kthread_create_on_node(&lkdtm_patching_cpu, NULL,
+						cpu_to_node(patching_cpu),
+						"lkdtm_patching_cpu");
+	kthread_bind(patching_kthrd, patching_cpu);
+	wake_up_process(patching_kthrd);
+
+	addr = offset_in_page(patch_site) | read_cpu_patching_addr(patching_cpu);
+
+	pr_info("starting hijacker_cpu=%d\n", hijacker_cpu);
+	for (attempts = 0; attempts < 100000; ++attempts) {
+		/* Try to write to the other CPU's temp patch mapping */
+		hijacked = lkdtm_try_write(bad_data, (u32 *)addr);
+
+		if (hijacked) {
+			if (kthread_stop(patching_kthrd)) {
+				pr_info("hijack attempts: %d\n", attempts);
+				pr_err("XFAIL: error stopping patching cpu\n");
+				return;
+			}
+			break;
+		}
+	}
+	pr_info("hijack attempts: %d\n", attempts);
+
+	if (hijacked) {
+		if (lkdtm_read_patch_site() == bad_data)
+			pr_err("overwrote kernel text\n");
+		/*
+		 * There are window conditions where the hijacker cpu manages to
+		 * write to the patch site but the site gets overwritten again by
+		 * the patching cpu. We still consider that a "successful" hijack
+		 * since the hijacker cpu did not fault on the write.
+		 */
+		pr_err("FAIL: wrote to another cpu's patching area\n");
+	} else {
+		kthread_stop(patching_kthrd);
+	}
+
+	/* Restore the original data to be able to run the test again */
+	lkdtm_do_patch(original_insn);
+}
+
+#else
+
+void lkdtm_HIJACK_PATCH(void)
+{
+	if (!IS_ENABLED(CONFIG_PPC))
+		pr_err("XFAIL: this test only runs on powerpc\n");
+	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+		pr_err("XFAIL: this test requires CONFIG_STRICT_KERNEL_RWX\n");
+	if (!IS_BUILTIN(CONFIG_LKDTM))
+		pr_err("XFAIL: this test requires CONFIG_LKDTM=y (not =m!)\n");
+}
+
+#endif
+
 void __init lkdtm_perms_init(void)
 {
 	/* Make sure we can write to __ro_after_init values during __init */
-- 
2.26.1