[PATCH v2 2/4] powerpc/pseries: Implement CONFIG_PARAVIRT_TIME_ACCOUNTING

Nicholas Piggin npiggin at gmail.com
Fri Sep 2 18:53:14 AEST 2022


CONFIG_VIRT_CPU_ACCOUNTING_GEN under pseries does not provide stolen
time accounting unless CONFIG_PARAVIRT_TIME_ACCOUNTING is enabled.
Implement this using the VPA accumulated wait counters.

Note this will not work on current KVM hosts because KVM does not
implement the VPA dispatch counters (yet). It could be implemented
with the dispatch trace log as it is for VIRT_CPU_ACCOUNTING_NATIVE,
but that is not necessary for the more limited accounting provided
by PARAVIRT_TIME_ACCOUNTING, and it is more expensive, complex, and
has downsides like potential log wrap.

>From Shrikanth:

  [...] it was tested on Power10 [PowerVM] Shared LPAR. system has two
  LPAR. we will call first one LPAR1 and second one as LPAR2. Test was
  carried out in SMT=1. Similar observation was seen in SMT=8 as well.

  LPAR config header from each LPAR is below. LPAR1 is twice as big as
  LPAR2. Since Both are sharing the same underlying hardware, work
  stealing will happen when both the LPAR's are contending for the same
  resource.

  LPAR1:
  type=Shared mode=Uncapped smt=Off lcpu=40 cpus=40 ent=20.00
  LPAR2:
  type=Shared mode=Uncapped smt=Off lcpu=20 cpus=40 ent=10.00

  mpstat was used to check for the utilization. stress-ng has been used
  as the workload. Few cases are tested. when the both LPAR are idle
  there is no steal time. when LPAR1 starts running at 100% which
  consumes all of the physical resource, steal time starts to get
  accounted.  With LPAR1 running at 100% and LPAR2 starts running, steal
  time starts increasing. This is as expected. When the LPAR2 Load is
  increased further, steal time increases further.

  Case 1: 0% LPAR1; 0% LPAR2
   %usr  %nice   %sys %iowait  %irq  %soft %steal %guest %gnice  %idle
   0.00   0.00   0.05   0.00   0.00   0.00   0.00   0.00   0.00  99.95

  Case 2: 100% LPAR1; 0% LPAR2
   %usr  %nice   %sys %iowait  %irq  %soft %steal %guest %gnice  %idle
  97.68   0.00   0.00   0.00   0.00   0.00   2.32   0.00   0.00   0.00

  Case 3: 100% LPAR1; 50% LPAR2
   %usr  %nice   %sys %iowait  %irq  %soft %steal %guest %gnice  %idle
  86.34   0.00   0.10   0.00   0.00   0.03  13.54   0.00   0.00   0.00

  Case 4: 100% LPAR1; 100% LPAR2
   %usr  %nice   %sys %iowait  %irq  %soft %steal %guest %gnice  %idle
  78.54   0.00   0.07   0.00   0.00   0.02  21.36   0.00   0.00   0.00

  Case 5: 50% LPAR1; 100% LPAR2
   %usr  %nice   %sys %iowait  %irq  %soft %steal %guest %gnice  %idle
  49.37   0.00   0.00   0.00   0.00   0.00   1.17   0.00   0.00  49.47

  Patch is accounting for the steal time and basic tests are holding
  good.

Tested-by: Shrikanth Hegde <sshegde at linux.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin at gmail.com>
---
 .../admin-guide/kernel-parameters.txt         |  6 +++---
 arch/powerpc/include/asm/paravirt.h           | 12 ++++++++++++
 arch/powerpc/include/asm/paravirt_api_clock.h |  1 +
 arch/powerpc/platforms/pseries/Kconfig        |  8 ++++++++
 arch/powerpc/platforms/pseries/lpar.c         | 11 +++++++++++
 arch/powerpc/platforms/pseries/setup.c        | 19 +++++++++++++++++++
 6 files changed, 54 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/include/asm/paravirt_api_clock.h

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 426fa892d311..7172a91539f2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3741,9 +3741,9 @@
 			[X86,PV_OPS] Disable paravirtualized VMware scheduler
 			clock and use the default one.
 
-	no-steal-acc	[X86,PV_OPS,ARM64] Disable paravirtualized steal time
-			accounting. steal time is computed, but won't
-			influence scheduler behaviour
+	no-steal-acc	[X86,PV_OPS,ARM64,PPC/PSERIES] Disable paravirtualized
+			steal time accounting. steal time is computed, but
+			won't influence scheduler behaviour
 
 	nolapic		[X86-32,APIC] Do not enable or use the local APIC.
 
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
index eb7df559ae74..f5ba1a3c41f8 100644
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -21,6 +21,18 @@ static inline bool is_shared_processor(void)
 	return static_branch_unlikely(&shared_processor);
 }
 
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 pseries_paravirt_steal_clock(int cpu);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+	return pseries_paravirt_steal_clock(cpu);
+}
+#endif
+
 /* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
 static inline u32 yield_count_of(int cpu)
 {
diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h b/arch/powerpc/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..65ac7cee0dad
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt_api_clock.h
@@ -0,0 +1 @@
+#include <asm/paravirt.h>
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index fb6499977f99..a3b4d99567cb 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -23,13 +23,21 @@ config PPC_PSERIES
 	select SWIOTLB
 	default y
 
+config PARAVIRT
+	bool
+
 config PARAVIRT_SPINLOCKS
 	bool
 
+config PARAVIRT_TIME_ACCOUNTING
+	select PARAVIRT
+	bool
+
 config PPC_SPLPAR
 	bool "Support for shared-processor logical partitions"
 	depends on PPC_PSERIES
 	select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+	select PARAVIRT_TIME_ACCOUNTING if VIRT_CPU_ACCOUNTING_GEN
 	default y
 	help
 	  Enabling this option will make the kernel run more efficiently
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index e6c117fb6491..97ef6499e501 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -660,6 +660,17 @@ static int __init vcpudispatch_stats_procfs_init(void)
 }
 
 machine_device_initcall(pseries, vcpudispatch_stats_procfs_init);
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+u64 pseries_paravirt_steal_clock(int cpu)
+{
+	struct lppaca *lppaca = &lppaca_of(cpu);
+
+	return be64_to_cpu(READ_ONCE(lppaca->enqueue_dispatch_tb)) +
+		be64_to_cpu(READ_ONCE(lppaca->ready_enqueue_tb));
+}
+#endif
+
 #endif /* CONFIG_PPC_SPLPAR */
 
 void vpa_init(int cpu)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 489f4c4df468..5e44c65a032c 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -80,6 +80,20 @@
 DEFINE_STATIC_KEY_FALSE(shared_processor);
 EXPORT_SYMBOL(shared_processor);
 
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+struct static_key paravirt_steal_enabled;
+struct static_key paravirt_steal_rq_enabled;
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+	steal_acc = false;
+	return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+#endif
+
 int CMO_PrPSP = -1;
 int CMO_SecPSP = -1;
 unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -834,6 +848,11 @@ static void __init pSeries_setup_arch(void)
 		if (lppaca_shared_proc(get_lppaca())) {
 			static_branch_enable(&shared_processor);
 			pv_spinlocks_init();
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+			static_key_slow_inc(&paravirt_steal_enabled);
+			if (steal_acc)
+				static_key_slow_inc(&paravirt_steal_rq_enabled);
+#endif
 		}
 
 		ppc_md.power_save = pseries_lpar_idle;
-- 
2.37.2



More information about the Linuxppc-dev mailing list