[PATCH V4 3/5] kvm/ppc/book3s_hv: Implement halt polling in the kvm_hv kernel module
Suraj Jitindar Singh
sjitindarsingh at gmail.com
Thu Jul 21 19:24:59 AEST 2016
On 20/07/16 04:58, David Matlack wrote:
> On Tue, Jul 19, 2016 at 1:12 AM, Suraj Jitindar Singh
> <sjitindarsingh at gmail.com> wrote:
>> This patch introduces new halt polling functionality into the kvm_hv kernel
>> module. When a vcore is idle it will poll for some period of time before
>> scheduling itself out.
>>
>> When all of the runnable vcpus on a vcore have ceded (and thus the vcore is
>> idle) we schedule ourselves out to allow something else to run. In the
>> event that we need to wake up very quickly (for example an interrupt
>> arrives), we are required to wait until we get scheduled again.
>>
>> Implement halt polling so that when a vcore is idle, and before scheduling
>> ourselves, we poll for vcpus in the runnable_threads list which have
>> pending exceptions or which leave the ceded state. If we poll successfully
>> then we can get back into the guest very quickly without ever scheduling
>> ourselves, otherwise we schedule ourselves out as before.
>>
>> Testing of this patch with a TCP round robin test between two guests with
>> virtio network interfaces has found a decrease in round trip time of ~15us
>> on average. A performance gain is only seen when going out of and
>> back into the guest often and quickly, otherwise there is no net benefit
>> from the polling. The polling interval is adjusted such that when we are
>> often scheduled out for long periods of time it is reduced, and when we
>> often poll successfully it is increased. The rate at which the polling
>> interval increases or decreases, and the maximum polling interval, can
>> be set through module parameters.
>>
>> Based on the implementation in the generic kvm module by Wanpeng Li and
>> Paolo Bonzini, and on direction from Paul Mackerras.
>>
>> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh at gmail.com>
>> ---
>> arch/powerpc/include/asm/kvm_book3s.h | 1 +
>> arch/powerpc/include/asm/kvm_host.h | 1 +
>> arch/powerpc/kvm/book3s_hv.c | 116 ++++++++++++++++++++++++++++++----
>> arch/powerpc/kvm/trace_hv.h | 22 +++++++
>> 4 files changed, 126 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
>> index 151f817..c261f52 100644
>> --- a/arch/powerpc/include/asm/kvm_book3s.h
>> +++ b/arch/powerpc/include/asm/kvm_book3s.h
>> @@ -102,6 +102,7 @@ struct kvmppc_vcore {
>> ulong pcr;
>> ulong dpdes; /* doorbell state (POWER8) */
>> ulong conferring_threads;
>> + unsigned int halt_poll_ns;
>> };
>>
>> struct kvmppc_vcpu_book3s {
>> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
>> index 02d06e9..610f393 100644
>> --- a/arch/powerpc/include/asm/kvm_host.h
>> +++ b/arch/powerpc/include/asm/kvm_host.h
>> @@ -294,6 +294,7 @@ struct kvm_arch {
>> #define VCORE_SLEEPING 3
>> #define VCORE_RUNNING 4
>> #define VCORE_EXITING 5
>> +#define VCORE_POLLING 6
>>
>> /*
>> * Struct used to manage memory for a virtual processor area
>> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
>> index 3bcf9e6..a9de1d4 100644
>> --- a/arch/powerpc/kvm/book3s_hv.c
>> +++ b/arch/powerpc/kvm/book3s_hv.c
>> @@ -94,6 +94,23 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
>> MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
>> #endif
>>
>> +/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
>> +static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
>> +module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
>> +MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
>> +
>> +/* Factor by which the vcore halt poll interval is grown, default is to double
>> + */
>> +static unsigned int halt_poll_ns_grow = 2;
>> +module_param(halt_poll_ns_grow, int, S_IRUGO);
>> +MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
>> +
>> +/* Factor by which the vcore halt poll interval is shrunk, default is to reset
>> + */
>> +static unsigned int halt_poll_ns_shrink;
>> +module_param(halt_poll_ns_shrink, int, S_IRUGO);
>> +MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
>> +
>> static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
>> static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
>>
>> @@ -2620,32 +2637,82 @@ static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
>> finish_wait(&vcpu->arch.cpu_run, &wait);
>> }
>>
>> +static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
>> +{
>> + /* 10us base */
>> + if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
>> + vc->halt_poll_ns = 10000;
>> + else
>> + vc->halt_poll_ns *= halt_poll_ns_grow;
>> +
>> + if (vc->halt_poll_ns > halt_poll_max_ns)
>> + vc->halt_poll_ns = halt_poll_max_ns;
>> +}
>> +
>> +static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
>> +{
>> + if (halt_poll_ns_shrink == 0)
>> + vc->halt_poll_ns = 0;
>> + else
>> + vc->halt_poll_ns /= halt_poll_ns_shrink;
>> +}
>> +
>> +/* Check to see if any of the runnable vcpus on the vcore have pending
>> + * exceptions or are no longer ceded
>> + */
>> +static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
>> +{
>> + struct kvm_vcpu *vcpu;
>> + int i;
>> +
>> + for_each_runnable_thread(i, vcpu, vc) {
>> + if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded)
>> + return 1;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> /*
>> * All the vcpus in this vcore are idle, so wait for a decrementer
>> * or external interrupt to one of the vcpus. vc->lock is held.
>> */
>> static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>> {
>> - struct kvm_vcpu *vcpu;
>> - int do_sleep = 1, i;
>> + int do_sleep = 1;
>> + ktime_t cur, start;
>> + u64 block_ns;
>> DECLARE_SWAITQUEUE(wait);
>>
>> - prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
>> + /* Poll for pending exceptions and ceded state */
>> + cur = start = ktime_get();
>> + if (vc->halt_poll_ns) {
>> + ktime_t stop = ktime_add_ns(start, vc->halt_poll_ns);
>>
>> - /*
>> - * Check one last time for pending exceptions and ceded state after
>> - * we put ourselves on the wait queue
>> - */
>> - for_each_runnable_thread(i, vcpu, vc) {
>> - if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
>> - do_sleep = 0;
>> - break;
>> - }
>> + vc->vcore_state = VCORE_POLLING;
>> + spin_unlock(&vc->lock);
>> +
>> + do {
>> + if (kvmppc_vcore_check_block(vc)) {
>> + do_sleep = 0;
>> + break;
>> + }
>> + cur = ktime_get();
>> + } while (ktime_before(cur, stop));
> The generic halt-polling implementation only polls if there are no
> other threads waiting to run (single_task_running() == 1). Should/Can
> we do the same here?
Thanks for catching that.
Yes I think we should.
>
>> +
>> + spin_lock(&vc->lock);
>> + vc->vcore_state = VCORE_INACTIVE;
>> +
>> + if (!do_sleep)
>> + goto out;
>> }
>>
>> - if (!do_sleep) {
>> + prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
>> +
>> + if (kvmppc_vcore_check_block(vc)) {
>> finish_swait(&vc->wq, &wait);
>> - return;
>> + do_sleep = 0;
>> + goto out;
>> }
>>
>> vc->vcore_state = VCORE_SLEEPING;
>> @@ -2656,6 +2723,27 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>> spin_lock(&vc->lock);
>> vc->vcore_state = VCORE_INACTIVE;
>> trace_kvmppc_vcore_blocked(vc, 1);
>> +
>> + cur = ktime_get();
>> +
>> +out:
>> + block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
>> +
>> + /* Adjust poll time */
>> + if (halt_poll_max_ns) {
>> + if (block_ns <= vc->halt_poll_ns)
>> + ;
>> + /* We slept and blocked for longer than the max halt time */
>> + else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
>> + shrink_halt_poll_ns(vc);
>> + /* We slept and our poll time is too small */
>> + else if (vc->halt_poll_ns < halt_poll_max_ns &&
>> + block_ns < halt_poll_max_ns)
>> + grow_halt_poll_ns(vc);
>> + } else
>> + vc->halt_poll_ns = 0;
>> +
>> + trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
>> }
>>
>> static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>> diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
>> index 33d9daf..fb21990 100644
>> --- a/arch/powerpc/kvm/trace_hv.h
>> +++ b/arch/powerpc/kvm/trace_hv.h
>> @@ -432,6 +432,28 @@ TRACE_EVENT(kvmppc_vcore_blocked,
>> __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
>> );
>>
>> +TRACE_EVENT(kvmppc_vcore_wakeup,
>> + TP_PROTO(int do_sleep, __u64 ns),
>> +
>> + TP_ARGS(do_sleep, ns),
>> +
>> + TP_STRUCT__entry(
>> + __field(__u64, ns)
>> + __field(int, waited)
>> + __field(pid_t, tgid)
>> + ),
>> +
>> + TP_fast_assign(
>> + __entry->ns = ns;
>> + __entry->waited = do_sleep;
>> + __entry->tgid = current->tgid;
>> + ),
>> +
>> + TP_printk("%s time %lld ns, tgid=%d",
>> + __entry->waited ? "wait" : "poll",
>> + __entry->ns, __entry->tgid)
>> +);
>> +
>> TRACE_EVENT(kvmppc_run_vcpu_enter,
>> TP_PROTO(struct kvm_vcpu *vcpu),
>>
>> --
>> 2.5.5
>>
More information about the Linuxppc-dev
mailing list