[PATCH 4/5] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit

Alexander Graf agraf at suse.de
Sat Dec 24 00:23:30 EST 2011


On 15.12.2011, at 13:03, Paul Mackerras wrote:

> This changes the implementation of kvm_vm_ioctl_get_dirty_log() for
> Book3s HV guests to use the hardware C (changed) bits in the guest
> hashed page table.  Since this makes the implementation quite different
> from the Book3s PR case, this moves the existing implementation from
> book3s.c to book3s_pr.c and creates a new implementation in book3s_hv.c.
> That implementation calls kvmppc_hv_get_dirty_log() to do the actual
> work by calling kvm_test_clear_dirty on each page.  It iterates over
> the HPTEs, clearing the C bit if set, and returns 1 if any C bit was
> set (including the saved C bit in the rmap entry).
> 
> Signed-off-by: Paul Mackerras <paulus at samba.org>
> ---
> arch/powerpc/include/asm/kvm_book3s.h |    2 +
> arch/powerpc/kvm/book3s.c             |   39 ------------------
> arch/powerpc/kvm/book3s_64_mmu_hv.c   |   69 +++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/book3s_hv.c          |   37 +++++++++++++++++
> arch/powerpc/kvm/book3s_pr.c          |   39 ++++++++++++++++++
> 5 files changed, 147 insertions(+), 39 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 6ececb4..aa795cc 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -158,6 +158,8 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
> 			long pte_index, unsigned long pteh, unsigned long ptel);
> extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
> 			long pte_index, unsigned long pteh, unsigned long ptel);
> +extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
> +			struct kvm_memory_slot *memslot);
> 
> extern void kvmppc_entry_trampoline(void);
> extern void kvmppc_hv_entry_trampoline(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 6bf7e05..7d54f4e 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -477,45 +477,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
> 	return 0;
> }
> 
> -/*
> - * Get (and clear) the dirty memory log for a memory slot.
> - */
> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> -				      struct kvm_dirty_log *log)
> -{
> -	struct kvm_memory_slot *memslot;
> -	struct kvm_vcpu *vcpu;
> -	ulong ga, ga_end;
> -	int is_dirty = 0;
> -	int r;
> -	unsigned long n;
> -
> -	mutex_lock(&kvm->slots_lock);
> -
> -	r = kvm_get_dirty_log(kvm, log, &is_dirty);
> -	if (r)
> -		goto out;
> -
> -	/* If nothing is dirty, don't bother messing with page tables. */
> -	if (is_dirty) {
> -		memslot = id_to_memslot(kvm->memslots, log->slot);
> -
> -		ga = memslot->base_gfn << PAGE_SHIFT;
> -		ga_end = ga + (memslot->npages << PAGE_SHIFT);
> -
> -		kvm_for_each_vcpu(n, vcpu, kvm)
> -			kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
> -
> -		n = kvm_dirty_bitmap_bytes(memslot);
> -		memset(memslot->dirty_bitmap, 0, n);
> -	}
> -
> -	r = 0;
> -out:
> -	mutex_unlock(&kvm->slots_lock);
> -	return r;
> -}
> -
> void kvmppc_decrementer_func(unsigned long data)
> {
> 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 926e2b9..783cd35 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -870,6 +870,75 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
> 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
> }
> 
> +static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
> +{
> +	struct revmap_entry *rev = kvm->arch.revmap;
> +	unsigned long head, i, j;
> +	unsigned long *hptep;
> +	int ret = 0;
> +
> + retry:
> +	lock_rmap(rmapp);
> +	if (*rmapp & KVMPPC_RMAP_CHANGED) {
> +		*rmapp &= ~KVMPPC_RMAP_CHANGED;
> +		ret = 1;
> +	}
> +	if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
> +		unlock_rmap(rmapp);
> +		return ret;
> +	}
> +
> +	i = head = *rmapp & KVMPPC_RMAP_INDEX;
> +	do {
> +		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
> +		j = rev[i].forw;
> +
> +		if (!(hptep[1] & HPTE_R_C))
> +			continue;
> +
> +		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
> +			/* unlock rmap before spinning on the HPTE lock */
> +			unlock_rmap(rmapp);
> +			while (hptep[0] & HPTE_V_HVLOCK)
> +				cpu_relax();
> +			goto retry;
> +		}
> +
> +		/* Now check and modify the HPTE */
> +		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
> +			/* need to make it temporarily absent to clear C */
> +			hptep[0] |= HPTE_V_ABSENT;
> +			kvmppc_invalidate_hpte(kvm, hptep, i);
> +			hptep[1] &= ~HPTE_R_C;
> +			eieio();
> +			hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
> +			rev[i].guest_rpte |= HPTE_R_C;
> +			ret = 1;
> +		}
> +		hptep[0] &= ~HPTE_V_HVLOCK;
> +	} while ((i = j) != head);
> +
> +	unlock_rmap(rmapp);
> +	return ret;
> +}
> +
> +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> +	unsigned long i;
> +	unsigned long *rmapp, *map;
> +
> +	preempt_disable();
> +	rmapp = memslot->rmap;
> +	map = memslot->dirty_bitmap;
> +	for (i = 0; i < memslot->npages; ++i) {
> +		if (kvm_test_clear_dirty(kvm, rmapp))
> +			__set_bit_le(i, map);

So if I read things correctly, this is the only case you're setting pages as dirty. What if you have the following:

  guest adds HTAB entry x
  guest writes to page mapped by x
  guest removes HTAB entry x
  host fetches dirty log

You can replace "removes" by "is overwritten by another mapping" if you like.


Alex

PS: Always CC kvm at vger for stuff that other might want to review (basically all patches)



More information about the Linuxppc-dev mailing list