[RFC PATCH 1/2] KVM: PPC: Book3S HV: Make virtual processor area registration more robust

Alexander Graf agraf at suse.de
Tue Jan 17 00:04:29 EST 2012


On 20.12.2011, at 11:22, Paul Mackerras wrote:

> The PAPR API allows three sorts of per-virtual-processor areas to be
> registered (VPA, SLB shadow buffer, and dispatch trace log), and
> furthermore, these can be registered and unregistered for another
> virtual CPU.  Currently we just update the vcpu fields pointing to
> these areas at the time of registration or unregistration.  If this
> is done on another vcpu, there is the possibility that the target vcpu
> is using those fields at the time and could end up using a bogus
> pointer and corrupting memory.
> 
> This fixes the race by making the target cpu itself do the update, so
> we can be sure that the update happens at a time when the fields aren't
> being used.  These are updated from a set of 'next_*' fields, which
> are protected by a spinlock.  (We could have just taken the spinlock
> when using the vpa, slb_shadow or dtl fields, but that would mean
> taking the spinlock on every guest entry and exit.)
> 
> The code in do_h_register_vpa now takes the spinlock and updates the
> 'next_*' fields.  There is also a set of '*_pending' flags to indicate
> that an update is pending.
> 
> This also changes 'struct dtl' (which was undefined) to 'struct dtl_entry',
> which is what the rest of the kernel uses.
> 
> Signed-off-by: Paul Mackerras <paulus at samba.org>
> ---
> arch/powerpc/include/asm/kvm_host.h |   15 +++-
> arch/powerpc/kvm/book3s_hv.c        |  167 +++++++++++++++++++++++++----------
> 2 files changed, 131 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 1cb6e52..b1126c1 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -82,7 +82,7 @@ struct kvm_vcpu;
> 
> struct lppaca;
> struct slb_shadow;
> -struct dtl;
> +struct dtl_entry;
> 
> struct kvm_vm_stat {
> 	u32 remote_tlb_flush;
> @@ -449,9 +449,18 @@ struct kvm_vcpu_arch {
> 	u32 last_inst;
> 
> 	struct lppaca *vpa;
> +	struct lppaca *next_vpa;
> 	struct slb_shadow *slb_shadow;
> -	struct dtl *dtl;
> -	struct dtl *dtl_end;
> +	struct slb_shadow *next_slb_shadow;
> +	struct dtl_entry *dtl;
> +	struct dtl_entry *dtl_end;
> +	struct dtl_entry *dtl_ptr;
> +	struct dtl_entry *next_dtl;
> +	struct dtl_entry *next_dtl_end;
> +	u8 vpa_pending;
> +	u8 slb_shadow_pending;
> +	u8 dtl_pending;
> +	spinlock_t vpa_update_lock;
> 
> 	wait_queue_head_t *wqp;
> 	struct kvmppc_vcore *vcore;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index c11d960..6f6e88d 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -140,7 +140,7 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
> {
> 	struct kvm *kvm = vcpu->kvm;
> 	unsigned long len, nb;
> -	void *va;
> +	void *va, *free_va, *tvpa, *dtl, *ss;
> 	struct kvm_vcpu *tvcpu;
> 	int err = H_PARAMETER;
> 
> @@ -152,6 +152,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
> 	flags &= 7;
> 	if (flags == 0 || flags == 4)

This could probably use a new variable name. Also, what do 0 and 4 mean? Constant defines would be nice here.

> 		return H_PARAMETER;
> +	free_va = va = NULL;
> +	len = 0;
> 	if (flags < 4) {
> 		if (vpa & 0x7f)
> 			return H_PARAMETER;
> @@ -165,65 +167,122 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,

[pasted from real source]
>                 va = kvmppc_pin_guest_page(kvm, vpa, &nb);

Here you're pinning the page, setting va to that temporarily available address.

[...]

> 			len = *(unsigned short *)(va + 4);

This is a condition on (flags <= 1). We bail out on flags == 0 a few lines up. Just move this whole thing into the respective function handlers.

> 		else
> 			len = *(unsigned int *)(va + 4);

va + 4 isn't really descriptive. Is this a defined struct? Why not actually define one which you can just read data from? Or at least make this a define too. Reading random numbers in code is barely readable.

> +		free_va = va;

Now free_va is the temporarily available address.

> 		if (len > nb)
> 			goto out_unpin;
> -		switch (flags) {
> -		case 1:		/* register VPA */
> -			if (len < 640)
> -				goto out_unpin;
> -			if (tvcpu->arch.vpa)
> -				kvmppc_unpin_guest_page(kvm, vcpu->arch.vpa);
> -			tvcpu->arch.vpa = va;
> -			init_vpa(vcpu, va);
> -			break;
> -		case 2:		/* register DTL */
> -			if (len < 48)
> -				goto out_unpin;
> -			len -= len % 48;
> -			if (tvcpu->arch.dtl)
> -				kvmppc_unpin_guest_page(kvm, vcpu->arch.dtl);
> -			tvcpu->arch.dtl = va;
> -			tvcpu->arch.dtl_end = va + len;
> +	}
> +
> +	spin_lock(&tvcpu->arch.vpa_update_lock);
> +
> +	switch (flags) {
> +	case 1:		/* register VPA */

Yeah, these could also use defines :)


> +		if (len < 640)
> 			break;
> -		case 3:		/* register SLB shadow buffer */
> -			if (len < 16)
> -				goto out_unpin;
> -			if (tvcpu->arch.slb_shadow)
> -				kvmppc_unpin_guest_page(kvm, vcpu->arch.slb_shadow);
> -			tvcpu->arch.slb_shadow = va;
> +		free_va = tvcpu->arch.next_vpa;
> +		tvcpu->arch.next_vpa = va;

Now you're setting next_vpa to this temporarily available address? But next_vpa will be used after va is getting free'd, no? Or is that why you have free_va?

Wouldn't it be easier to just map it every time we actually use it and only shove the GPA around? We could basically save ourselves a lot of the logic here.


Alex



More information about the Linuxppc-dev mailing list