[PATCH V2 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module
David Matlack
dmatlack at google.com
Tue Jul 12 02:57:10 AEST 2016
On Mon, Jul 11, 2016 at 12:08 AM, Suraj Jitindar Singh
<sjitindarsingh at gmail.com> wrote:
> This patch introduces new halt polling functionality into the kvm_hv kernel
> module. When a vcore is idle it will poll for some period of time before
> scheduling itself out.
Is there any way to reuse the existing halt-polling code? Having two
copies risks them diverging over time.
>
> When all of the runnable vcpus on a vcore have ceded (and thus the vcore is
> idle) we schedule ourselves out to allow something else to run. In the
> event that we need to wake up very quickly (for example an interrupt
> arrives), we are required to wait until we get scheduled again.
>
> Implement halt polling so that when a vcore is idle, and before scheduling
> ourselves, we poll for vcpus in the runnable_threads list which have
> pending exceptions or which leave the ceded state. If we poll successfully
> then we can get back into the guest very quickly without ever scheduling
> ourselves, otherwise we schedule ourselves out as before.
>
> Testing of this patch with a TCP round robin test between two guests with
> virtio network interfaces has found a decrease in round trip time from
> ~140us to ~115us. A performance gain is only seen when going out of and
> back into the guest often and quickly, otherwise there is no net benefit
> from the polling. The polling interval is adjusted such that when we are
> often scheduled out for long periods of time it is reduced, and when we
> often poll successfully it is increased. The rate at which the polling
> interval increases or decreases, and the maximum polling interval, can
> be set through module parameters.
>
> Based on the implementation in the generic kvm module by Wanpeng Li and
> Paolo Bonzini, and on direction from Paul Mackerras.
>
> ---
> Change Log:
>
> V1 -> V2:
> - Nothing
>
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh at gmail.com>
> ---
> arch/powerpc/include/asm/kvm_book3s.h | 1 +
> arch/powerpc/include/asm/kvm_host.h | 1 +
> arch/powerpc/kvm/book3s_hv.c | 115 +++++++++++++++++++++++++++++-----
> arch/powerpc/kvm/trace_hv.h | 22 +++++++
> 4 files changed, 125 insertions(+), 14 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 151f817..c261f52 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -102,6 +102,7 @@ struct kvmppc_vcore {
> ulong pcr;
> ulong dpdes; /* doorbell state (POWER8) */
> ulong conferring_threads;
> + unsigned int halt_poll_ns;
> };
>
> struct kvmppc_vcpu_book3s {
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 02d06e9..610f393 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -294,6 +294,7 @@ struct kvm_arch {
> #define VCORE_SLEEPING 3
> #define VCORE_RUNNING 4
> #define VCORE_EXITING 5
> +#define VCORE_POLLING 6
>
> /*
> * Struct used to manage memory for a virtual processor area
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 3bcf9e6..0d8ce14 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -94,6 +94,23 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
> MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
> #endif
>
> +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
> +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
> +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
> +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
> +
> +/* Factor by which the vcore halt poll interval is grown, default is to double
> + */
> +static unsigned int halt_poll_ns_grow = 2;
> +module_param(halt_poll_ns_grow, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
> +
> +/* Factor by which the vcore halt poll interval is shrunk, default is to reset
> + */
> +static unsigned int halt_poll_ns_shrink;
> +module_param(halt_poll_ns_shrink, int, S_IRUGO);
> +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
> +
> static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
> static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
>
> @@ -2620,32 +2637,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
> finish_wait(&vcpu->arch.cpu_run, &wait);
> }
>
> +static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> + /* 10us base */
> + if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
> + vc->halt_poll_ns = 10000;
> + else
> + vc->halt_poll_ns *= halt_poll_ns_grow;
> +
> + if (vc->halt_poll_ns > halt_poll_max_ns)
> + vc->halt_poll_ns = halt_poll_max_ns;
> +}
> +
> +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
> +{
> + if (halt_poll_ns_shrink == 0)
> + vc->halt_poll_ns = 0;
> + else
> + vc->halt_poll_ns /= halt_poll_ns_shrink;
> +}
> +
> +/* Check to see if any of the runnable vcpus on the vcore have pending
> + * exceptions or are no longer ceded
> + */
> +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
> +{
> + struct kvm_vcpu *vcpu;
> + int i;
> +
> + for_each_runnable_thread(i, vcpu, vc) {
> + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> /*
> * All the vcpus in this vcore are idle, so wait for a decrementer
> * or external interrupt to one of the vcpus. vc->lock is held.
> */
> static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
> {
> - struct kvm_vcpu *vcpu;
> - int do_sleep = 1, i;
> + int do_sleep = 1;
> + ktime_t cur, start;
> + u64 block_ns;
> DECLARE_SWAITQUEUE(wait);
>
> - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
> + /* Poll for pending exceptions and ceded state */
> + cur = start = ktime_get();
> + if (vc->halt_poll_ns) {
> + ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
>
> - /*
> - * Check one last time for pending exceptions and ceded state after
> - * we put ourselves on the wait queue
> - */
> - for_each_runnable_thread(i, vcpu, vc) {
> - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
> - do_sleep = 0;
> - break;
> - }
> + vc->vcore_state = VCORE_POLLING;
> + spin_unlock(&vc->lock);
> +
> + do {
> + if (kvmppc_vcore_check_block(vc)) {
> + do_sleep = 0;
> + break;
> + }
> + cur = ktime_get();
> + } while (ktime_before(cur, stop));
> +
> + spin_lock(&vc->lock);
> + vc->vcore_state = VCORE_INACTIVE;
> +
> + if (!do_sleep)
> + goto out;
> }
>
> - if (!do_sleep) {
> + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
> +
> + if (kvmppc_vcore_check_block(vc)) {
> finish_swait(&vc->wq, &wait);
> - return;
> + do_sleep = 0;
> + goto out;
> }
>
> vc->vcore_state = VCORE_SLEEPING;
> @@ -2656,6 +2723,26 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
> spin_lock(&vc->lock);
> vc->vcore_state = VCORE_INACTIVE;
> trace_kvmppc_vcore_blocked(vc, 1);
> +
> + cur = ktime_get();
> +
> +out:
> + block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
> +
> + if (halt_poll_max_ns) {
> + if (block_ns <= vc->halt_poll_ns)
> + ;
> + /* We slept and blocked for longer than the max halt time */
> + else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
> + shrink_halt_poll_ns(vc);
> + /* We slept and our poll time is too small */
> + else if (vc->halt_poll_ns < halt_poll_max_ns &&
> + block_ns < halt_poll_max_ns)
> + grow_halt_poll_ns(vc);
> + } else
> + vc->halt_poll_ns = 0;
> +
> + trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
> }
>
> static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
> index 33d9daf..fb21990 100644
> --- a/arch/powerpc/kvm/trace_hv.h
> +++ b/arch/powerpc/kvm/trace_hv.h
> @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
> __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
> );
>
> +TRACE_EVENT(kvmppc_vcore_wakeup,
> + TP_PROTO(int do_sleep, __u64 ns),
> +
> + TP_ARGS(do_sleep, ns),
> +
> + TP_STRUCT__entry(
> + __field(__u64, ns)
> + __field(int, waited)
> + __field(pid_t, tgid)
> + ),
> +
> + TP_fast_assign(
> + __entry->ns = ns;
> + __entry->waited = do_sleep;
> + __entry->tgid = current->tgid;
> + ),
> +
> + TP_printk("%s time %lld ns, tgid=%d",
> + __entry->waited ? "wait" : "poll",
> + __entry->ns, __entry->tgid)
> +);
> +
> TRACE_EVENT(kvmppc_run_vcpu_enter,
> TP_PROTO(struct kvm_vcpu *vcpu),
>
> --
> 2.5.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
More information about the Linuxppc-dev
mailing list