[PATCH v5 1/5] powerpc/code-patching: introduce patch_instructions()

Hari Bathini hbathini at linux.ibm.com
Sat Oct 7 03:22:57 AEDT 2023


Hi Christophe,


On 29/09/23 2:09 pm, Christophe Leroy wrote:
> 
> 
> Le 28/09/2023 à 21:48, Hari Bathini a écrit :
>> patch_instruction() entails setting up pte, patching the instruction,
>> clearing the pte and flushing the tlb. If multiple instructions need
>> to be patched, every instruction would have to go through the above
>> drill unnecessarily. Instead, introduce function patch_instructions()
>> that sets up the pte, clears the pte and flushes the tlb only once per
>> page range of instructions to be patched. This adds a slight overhead
>> to patch_instruction() call while improving the patching time for
>> scenarios where more than one instruction needs to be patched.
> 
> On my powerpc8xx, this patch leads to an increase of about 8% of the
> time needed to activate ftrace function tracer.

Interesting! My observation on ppc64le was somewhat different.
With single cpu, average ticks were almost similar with and without
the patch (~1580). I saw a performance degradation of less than
0.6% without vs with this patch to activate function tracer.

Ticks to activate function tracer in 15 attempts without
this patch (avg: 108734089):
106619626
111712292
111030404
111021344
111313530
106253773
107156175
106887038
107215379
108646636
108040287
108311770
107842343
106894310
112066423

Ticks to activate function tracer in 15 attempts with
this patch (avg: 109328578):
109378357
108794095
108595381
107622142
110689418
107287276
107132093
112540481
111311830
112608265
102883923
112054554
111762570
109874309
107393979

I used the below patch for the experiment:

diff --git a/arch/powerpc/lib/code-patching.c 
b/arch/powerpc/lib/code-patching.c
index b00112d7ad4..0979d12d00c 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -19,6 +19,10 @@
  #include <asm/page.h>
  #include <asm/code-patching.h>
  #include <asm/inst.h>
+#include <asm/time.h>
+
+unsigned long patching_time;
+unsigned long num_times;

  static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 
*patch_addr)
  {
@@ -353,7 +357,7 @@ static int __do_patch_instruction(u32 *addr, 
ppc_inst_t instr)
  	return err;
  }

-int patch_instruction(u32 *addr, ppc_inst_t instr)
+int ___patch_instruction(u32 *addr, ppc_inst_t instr)
  {
  	int err;
  	unsigned long flags;
@@ -376,6 +380,19 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)

  	return err;
  }
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+	u64 start;
+	int err;
+
+	start = get_tb();
+	err = ___patch_instruction(addr, instr);
+	patching_time += (get_tb() - start);
+	num_times++;
+
+	return err;
+}
  NOKPROBE_SYMBOL(patch_instruction);

  int patch_branch(u32 *addr, unsigned long target, int flags)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 1d4bc493b2f..f52694cfeab 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -35,6 +35,18 @@ static struct kobj_attribute _name##_attr = 
__ATTR_RO(_name)
  #define KERNEL_ATTR_RW(_name) \
  static struct kobj_attribute _name##_attr = __ATTR_RW(_name)

+unsigned long patch_avgtime;
+extern unsigned long patching_time;
+extern unsigned long num_times;
+
+static ssize_t patching_avgtime_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	patch_avgtime = patching_time / num_times;
+	return sysfs_emit(buf, "%lu\n", patch_avgtime);
+}
+KERNEL_ATTR_RO(patching_avgtime);
+
  /* current uevent sequence number */
  static ssize_t uevent_seqnum_show(struct kobject *kobj,
  				  struct kobj_attribute *attr, char *buf)
@@ -250,6 +262,7 @@ struct kobject *kernel_kobj;
  EXPORT_SYMBOL_GPL(kernel_kobj);

  static struct attribute * kernel_attrs[] = {
+	&patching_avgtime_attr.attr,
  	&fscaps_attr.attr,
  	&uevent_seqnum_attr.attr,
  	&cpu_byteorder_attr.attr,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index abaaf516fca..5eb950bcab9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -50,6 +50,7 @@
  #include <linux/workqueue.h>

  #include <asm/setup.h> /* COMMAND_LINE_SIZE */
+#include <asm/time.h>

  #include "trace.h"
  #include "trace_output.h"
@@ -6517,6 +6518,7 @@ int tracing_set_tracer(struct trace_array *tr, 
const char *buf)
  	bool had_max_tr;
  #endif
  	int ret = 0;
+	u64 start;

  	mutex_lock(&trace_types_lock);

@@ -6536,6 +6538,10 @@ int tracing_set_tracer(struct trace_array *tr, 
const char *buf)
  		ret = -EINVAL;
  		goto out;
  	}
+
+	pr_warn("Current tracer: %s, Changing to tracer: %s\n",
+		tr->current_trace->name, t->name);
+	start = get_tb();
  	if (t == tr->current_trace)
  		goto out;

@@ -6614,6 +6620,7 @@ int tracing_set_tracer(struct trace_array *tr, 
const char *buf)
  	tr->current_trace->enabled++;
  	trace_branch_enable(tr);
   out:
+	pr_warn("Time taken to enable tracer is %llu\n", (get_tb() - start));
  	mutex_unlock(&trace_types_lock);

  	return ret;

Thanks
Hari


More information about the Linuxppc-dev mailing list