[PATCH V5 09/14] powerpc/vas: Update CSB and notify process for fault CRBs

Fri Feb 7 16:46:16 AEDT 2020

On Wed, 2020-01-22 at 00:17 -0800, Haren Myneni wrote:
> For each fault CRB, update fault address in CRB (fault_storage_addr)
> and translation error status in CSB so that user space can touch the
> fault address and resend the request. If the user space passed invalid
> CSB address send signal to process with SIGSEGV.
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev at linux.vnet.ibm.com>
> Signed-off-by: Haren Myneni <haren at linux.ibm.com>
> ---
>  arch/powerpc/platforms/powernv/vas-fault.c | 116
> +++++++++++++++++++++++++++++
>  1 file changed, 116 insertions(+)
> 
> diff --git a/arch/powerpc/platforms/powernv/vas-fault.c
> b/arch/powerpc/platforms/powernv/vas-fault.c
> index 5c2cada..2cfab0c 100644
> --- a/arch/powerpc/platforms/powernv/vas-fault.c
> +++ b/arch/powerpc/platforms/powernv/vas-fault.c
> @@ -11,6 +11,7 @@
>  #include <linux/slab.h>
>  #include <linux/uaccess.h>
>  #include <linux/kthread.h>
> +#include <linux/sched/signal.h>
>  #include <linux/mmu_context.h>
>  #include <asm/icswx.h>
>  
> @@ -26,6 +27,120 @@
>  #define VAS_FAULT_WIN_FIFO_SIZE	(4 << 20)
>  
>  /*
> + * Update the CSB to indicate a translation error.
> + *
> + * If the fault is in the CSB address itself or if we are unable to
> + * update the CSB, send a signal to the process, because we have no
> + * other way of notifying the user process.
> + *
> + * Remaining settings in the CSB are based on wait_for_csb() of
> + * NX-GZIP.
> + */
> +static void update_csb(struct vas_window *window,
> +			struct coprocessor_request_block *crb)
> +{
> +	int rc;
> +	struct pid *pid;
> +	void __user *csb_addr;
> +	struct task_struct *tsk;
> +	struct kernel_siginfo info;
> +	struct coprocessor_status_block csb;
> +
> +	/*
> +	 * NX user space windows can not be opened for task->mm=NULL
> +	 * and faults will not be generated for kernel requests.
> +	 */
> +	if (!window->mm || !window->user_win)
> +		return;
> +
> +	csb_addr = (void *)be64_to_cpu(crb->csb_addr);
> +
> +	csb.cc = CSB_CC_TRANSLATION;
> +	csb.ce = CSB_CE_TERMINATION;
> +	csb.cs = 0;
> +	csb.count = 0;
> +
> +	/*
> +	 * Returns the fault address in CPU format since it is passed with
> +	 * signal. But if the user space expects BE format, need changes.
> +	 * i.e either kernel (here) or user should convert to CPU format.
> +	 * Not both!
> +	 */
> +	csb.address = be64_to_cpu(crb->stamp.nx.fault_storage_addr);

This looks wrong and I don't understand the comment. You need to convert this
back to be64 to write it to csb.address. ie.

  csb.address = cpu_to_be64(be64_to_cpu(crb->stamp.nx.fault_storage_addr));

Which I think you can just avoid the endian conversion all together.

> +	csb.flags = 0;
> +
> +	pid = window->pid;
> +	tsk = get_pid_task(pid, PIDTYPE_PID);
> +	/*
> +	 * Send window will be closed after processing all NX requests
> +	 * and process exits after closing all windows. In multi-thread
> +	 * applications, thread may not exists, but does not close FD
> +	 * (means send window) upon exit. Parent thread (tgid) can use
> +	 * and close the window later.
> +	 * pid and mm references are taken when window is opened by
> +	 * process (pid). So tgid is used only when child thread opens
> +	 * a window and exits without closing it in multithread tasks.
> +	 */
> +	if (!tsk) {
> +		pid = window->tgid;
> +		tsk = get_pid_task(pid, PIDTYPE_PID);
> +		/*
> +		 * Parent thread will be closing window during its exit.
> +		 * So should not get here.
> +		 */
> +		if (!tsk)
> +			return;
> +	}
> +
> +	/* Return if the task is exiting. */
> +	if (tsk->flags & PF_EXITING) {
> +		put_task_struct(tsk);
> +		return;
> +	}
> +
> +	use_mm(window->mm);
> +	rc = copy_to_user(csb_addr, &csb, sizeof(csb));
> +	/*
> +	 * User space polls on csb.flags (first byte). So add barrier
> +	 * then copy first byte with csb flags update.
> +	 */
> +	smp_mb();
> +	if (!rc) {
> +		csb.flags = CSB_V;
> +		rc = copy_to_user(csb_addr, &csb, sizeof(u8));
> +	}
> +	unuse_mm(window->mm);
> +	put_task_struct(tsk);
> +
> +	/* Success */
> +	if (!rc)
> +		return;
> +
> +	pr_err("Invalid CSB address 0x%p signalling pid(%d)\n",
> +			csb_addr, pid_vnr(pid));

This is a userspace error, not a kernel error. This should not be a pr_err().

Userspace could spam the console with this.

> +
> +	clear_siginfo(&info);
> +	info.si_signo = SIGSEGV;
> +	info.si_errno = EFAULT;
> +	info.si_code = SEGV_MAPERR;
> +	info.si_addr = csb_addr;
> +
> +	/*
> +	 * process will be polling on csb.flags after request is sent to
> +	 * NX. So generally CSB update should not fail except when an
> +	 * application does not follow the process properly. So an error
> +	 * message will be displayed and leave it to user space whether
> +	 * to ignore or handle this signal.
> +	 */
> +	rcu_read_lock();
> +	rc = kill_pid_info(SIGSEGV, &info, pid);
> +	rcu_read_unlock();

why the rcu_read_un/lock() here?

> +
> +	pr_devel("%s(): pid %d kill_proc_info() rc %d\n", __func__,
> +			pid_vnr(pid), rc);
> +}
> +
> +/*
>   * Process CRBs that we receive on the fault window.
>   */
>  irqreturn_t vas_fault_handler(int irq, void *data)
> @@ -104,6 +219,7 @@ irqreturn_t vas_fault_handler(int irq, void *data)
>  			return IRQ_HANDLED;
>  		}
>  
> +		update_csb(window, crb);
>  	} while (true);
>  
>  	return IRQ_HANDLED;