[PATCH v2 0/4] powerpc/64: syscalls in C

Nicholas Piggin npiggin at gmail.com
Thu Aug 29 20:45:27 AEST 2019


Nicholas Piggin's on August 29, 2019 7:38 pm:
> Christophe Leroy's on August 28, 2019 7:55 pm:
>> 
>> 
>> Le 28/08/2019 à 11:49, Nicholas Piggin a écrit :
>>> Christophe Leroy's on August 28, 2019 7:06 pm:
>>>>
>>>>
>>>> Le 27/08/2019 à 15:55, Nicholas Piggin a écrit :
>>>>> Accounted for some feedback.
>>>>>
>>>>> Nicholas Piggin (4):
>>>>>     powerpc: convert to copy_thread_tls
>>>>>     powerpc/64: remove support for kernel-mode syscalls
>>>>>     powerpc/64: system call remove non-volatile GPR save optimisation
>>>>>     powerpc/64: system call implement the bulk of the logic in C
>>>>
>>>> Would it be possible to split in the following parts:
>>>>
>>>> 1/ Implement in C whatever can be implemented without removing
>>>> non-volatile GPR save optimisation
>>>> 2/ Remove non-volatile GPR save optimisation
>>>> 3/ Implement in C everything else
>>> 
>>> Hmm. I'll have a look but I would rather not go back and add the
>>> intermediate state I was hoping to avoid. I'll think about it and
>>> if it's not too difficult I will try to add something. I have an
>>> idea.
>>> 
>>> With your nvregs performance test on ppc32, are you doing the
>>> nvgpr restore? The fast path should be able to avoid that.
>> 
>> I only added the SAVE_NVGPRS call in the syscall entry macro just after 
>> the saving of volatile regs, and changed the trap from \trapno+1 to \trapno
> 
> So... this actually seems to work. Haven't booted it, but the compiler
> seems to do what we want.
> 

Here's a really quick start for ppc32. The interrupt handling is
different enough it may be hard to merge entirely with ppc64, but
it's not really much code anyway.

Unfortunately can't restore full registers using the same method,
because we have some others like lr and cr, so the exit still must
return a code to asm.

---
 arch/powerpc/kernel/Makefile     |   2 +-
 arch/powerpc/kernel/syscall_32.c | 167 +++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/syscall_32.c

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 7f53cc07f933..83d5808654ec 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -107,7 +107,7 @@ extra-y				+= vmlinux.lds
 
 obj-$(CONFIG_RELOCATABLE)	+= reloc_$(BITS).o
 
-obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o
+obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o early_32.o syscall_32.o
 obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
diff --git a/arch/powerpc/kernel/syscall_32.c b/arch/powerpc/kernel/syscall_32.c
new file mode 100644
index 000000000000..ff37edac76c8
--- /dev/null
+++ b/arch/powerpc/kernel/syscall_32.c
@@ -0,0 +1,167 @@
+#include <linux/err.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+
+typedef long (*syscall_fn)(long, long, long, long, long, long);
+
+register unsigned long r31 asm("r31");
+register unsigned long r30 asm("r30");
+register unsigned long r29 asm("r29");
+register unsigned long r28 asm("r28");
+register unsigned long r27 asm("r27");
+register unsigned long r26 asm("r26");
+register unsigned long r25 asm("r25");
+register unsigned long r24 asm("r24");
+register unsigned long r23 asm("r23");
+register unsigned long r22 asm("r22");
+register unsigned long r21 asm("r21");
+register unsigned long r20 asm("r20");
+register unsigned long r19 asm("r19");
+register unsigned long r18 asm("r18");
+register unsigned long r17 asm("r17");
+register unsigned long r16 asm("r16");
+register unsigned long r15 asm("r15");
+register unsigned long r14 asm("r14");
+register unsigned long r13 asm("r13");
+
+static void save_nvgprs(struct pt_regs *regs)
+{
+	if (!(regs->trap & 1))
+		return;
+	regs->trap &= ~0x1;
+
+	asm volatile("stmw	13, %0" : : "m"(regs->gpr[13]) : "memory");
+}
+
+long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs)
+{
+	unsigned long ti_flags;
+	syscall_fn f;
+
+	BUG_ON(irqs_disabled());
+	BUG_ON(!(regs->msr & MSR_PR));
+	BUG_ON(!(regs->msr & MSR_EE));
+
+	ti_flags = current_thread_info()->flags;
+	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+		/*
+		 * We use the return value of do_syscall_trace_enter() as the
+		 * syscall number. If the syscall was rejected for any reason
+		 * do_syscall_trace_enter() returns an invalid syscall number
+		 * and the test below against NR_syscalls will fail.
+		 */
+		save_nvgprs(regs);
+		r0 = do_syscall_trace_enter(regs);
+	}
+
+	if (unlikely(r0 >= NR_syscalls))
+		return -ENOSYS;
+
+	/* May be faster to do array_index_nospec? */
+	barrier_nospec();
+
+	f = (void *)sys_call_table[r0];
+
+	return f(r3, r4, r5, r6, r7, r8);
+}
+
+static inline void load_dbcr0(void)
+{
+	/* blah */
+}
+
+unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs)
+{
+	unsigned long *ti_flagsp = &current_thread_info()->flags;
+	unsigned long ti_flags;
+	unsigned long ret = 0;
+
+	regs->result = r3;
+
+	/* Check whether the syscall is issued inside a restartable sequence */
+	rseq_syscall(regs);
+
+	ti_flags = *ti_flagsp;
+
+	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+		if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+			r3 = -r3;
+			regs->ccr |= 0x10000000; /* Set SO bit in CR */
+		}
+	}
+
+	if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+		if (ti_flags & _TIF_RESTOREALL)
+			ret = _TIF_RESTOREALL;
+		else
+			regs->gpr[3] = r3;
+		clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+	} else {
+		regs->gpr[3] = r3;
+	}
+
+	if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+		save_nvgprs(regs);
+		do_syscall_trace_leave(regs);
+	}
+
+	local_irq_disable();
+	ti_flags = READ_ONCE(*ti_flagsp);
+	while (unlikely(ti_flags & _TIF_USER_WORK_MASK)) {
+		local_irq_enable();
+		if (ti_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			save_nvgprs(regs);
+			/*
+			 * SIGPENDING must restore signal handler function
+			 * argument GPRs, and some non-volatiles (e.g., r1).
+			 * Restore all for now. This could be made lighter.
+			 */
+			if (ti_flags & _TIF_SIGPENDING)
+				ret |= _TIF_RESTOREALL;
+			do_notify_resume(regs, ti_flags);
+		}
+		local_irq_disable();
+		ti_flags = READ_ONCE(*ti_flagsp);
+	}
+
+	WARN_ON(!(regs->msr & MSR_EE)); /* don't do this */
+
+	/* Tell lockdep IRQs are being enabled when we RFI */
+	trace_hardirqs_on();
+
+#if 0
+	if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) {
+		struct thread_struct *t = &current->thread;
+
+		/*
+		 * If the process has its own DBCR0 value, load it up. The
+		 * internal debug mode bit tells us that dbcr0 should be
+		 * loaded.
+		 */
+		if (unlikely(t->debug.dbcr0 & DBCR0_IDM))
+			load_dbcr0();
+	}
+
+	if (IS_ENABLED(CONFIG_4xx) && !mmu_has_feature(MMU_FTR_TYPE_47x)) {
+		if (unlikely(icache_44x_need_flush))
+			flush_icache_44x();
+	}
+
+	if (IS_ENABLED(PPC_BOOK3S_32))
+		kuep_unlock();
+
+	kuap_check();
+#endif
+
+	account_cpu_user_exit();
+
+	return ret;
+}
-- 
2.22.0



More information about the Linuxppc-dev mailing list