[PATCH 4/5] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit
Alexander Graf
agraf at suse.de
Sat Dec 24 00:23:30 EST 2011
On 15.12.2011, at 13:03, Paul Mackerras wrote:
> This changes the implementation of kvm_vm_ioctl_get_dirty_log() for
> Book3s HV guests to use the hardware C (changed) bits in the guest
> hashed page table. Since this makes the implementation quite different
> from the Book3s PR case, this moves the existing implementation from
> book3s.c to book3s_pr.c and creates a new implementation in book3s_hv.c.
> That implementation calls kvmppc_hv_get_dirty_log() to do the actual
> work by calling kvm_test_clear_dirty on each page. It iterates over
> the HPTEs, clearing the C bit if set, and returns 1 if any C bit was
> set (including the saved C bit in the rmap entry).
>
> Signed-off-by: Paul Mackerras <paulus at samba.org>
> ---
> arch/powerpc/include/asm/kvm_book3s.h | 2 +
> arch/powerpc/kvm/book3s.c | 39 ------------------
> arch/powerpc/kvm/book3s_64_mmu_hv.c | 69 +++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/book3s_hv.c | 37 +++++++++++++++++
> arch/powerpc/kvm/book3s_pr.c | 39 ++++++++++++++++++
> 5 files changed, 147 insertions(+), 39 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 6ececb4..aa795cc 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -158,6 +158,8 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
> long pte_index, unsigned long pteh, unsigned long ptel);
> extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
> long pte_index, unsigned long pteh, unsigned long ptel);
> +extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
> + struct kvm_memory_slot *memslot);
>
> extern void kvmppc_entry_trampoline(void);
> extern void kvmppc_hv_entry_trampoline(void);
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 6bf7e05..7d54f4e 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -477,45 +477,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> -/*
> - * Get (and clear) the dirty memory log for a memory slot.
> - */
> -int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
> - struct kvm_dirty_log *log)
> -{
> - struct kvm_memory_slot *memslot;
> - struct kvm_vcpu *vcpu;
> - ulong ga, ga_end;
> - int is_dirty = 0;
> - int r;
> - unsigned long n;
> -
> - mutex_lock(&kvm->slots_lock);
> -
> - r = kvm_get_dirty_log(kvm, log, &is_dirty);
> - if (r)
> - goto out;
> -
> - /* If nothing is dirty, don't bother messing with page tables. */
> - if (is_dirty) {
> - memslot = id_to_memslot(kvm->memslots, log->slot);
> -
> - ga = memslot->base_gfn << PAGE_SHIFT;
> - ga_end = ga + (memslot->npages << PAGE_SHIFT);
> -
> - kvm_for_each_vcpu(n, vcpu, kvm)
> - kvmppc_mmu_pte_pflush(vcpu, ga, ga_end);
> -
> - n = kvm_dirty_bitmap_bytes(memslot);
> - memset(memslot->dirty_bitmap, 0, n);
> - }
> -
> - r = 0;
> -out:
> - mutex_unlock(&kvm->slots_lock);
> - return r;
> -}
> -
> void kvmppc_decrementer_func(unsigned long data)
> {
> struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 926e2b9..783cd35 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -870,6 +870,75 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
> kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
> }
>
> +static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
> +{
> + struct revmap_entry *rev = kvm->arch.revmap;
> + unsigned long head, i, j;
> + unsigned long *hptep;
> + int ret = 0;
> +
> + retry:
> + lock_rmap(rmapp);
> + if (*rmapp & KVMPPC_RMAP_CHANGED) {
> + *rmapp &= ~KVMPPC_RMAP_CHANGED;
> + ret = 1;
> + }
> + if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
> + unlock_rmap(rmapp);
> + return ret;
> + }
> +
> + i = head = *rmapp & KVMPPC_RMAP_INDEX;
> + do {
> + hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
> + j = rev[i].forw;
> +
> + if (!(hptep[1] & HPTE_R_C))
> + continue;
> +
> + if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
> + /* unlock rmap before spinning on the HPTE lock */
> + unlock_rmap(rmapp);
> + while (hptep[0] & HPTE_V_HVLOCK)
> + cpu_relax();
> + goto retry;
> + }
> +
> + /* Now check and modify the HPTE */
> + if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) {
> + /* need to make it temporarily absent to clear C */
> + hptep[0] |= HPTE_V_ABSENT;
> + kvmppc_invalidate_hpte(kvm, hptep, i);
> + hptep[1] &= ~HPTE_R_C;
> + eieio();
> + hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
> + rev[i].guest_rpte |= HPTE_R_C;
> + ret = 1;
> + }
> + hptep[0] &= ~HPTE_V_HVLOCK;
> + } while ((i = j) != head);
> +
> + unlock_rmap(rmapp);
> + return ret;
> +}
> +
> +long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
> +{
> + unsigned long i;
> + unsigned long *rmapp, *map;
> +
> + preempt_disable();
> + rmapp = memslot->rmap;
> + map = memslot->dirty_bitmap;
> + for (i = 0; i < memslot->npages; ++i) {
> + if (kvm_test_clear_dirty(kvm, rmapp))
> + __set_bit_le(i, map);
So if I read things correctly, this is the only case you're setting pages as dirty. What if you have the following:
guest adds HTAB entry x
guest writes to page mapped by x
guest removes HTAB entry x
host fetches dirty log
You can replace "removes" by "is overwritten by another mapping" if you like.
Alex
PS: Always CC kvm at vger for stuff that other might want to review (basically all patches)
More information about the Linuxppc-dev
mailing list