[PATCH v5 1/5] powerpc/code-patching: introduce patch_instructions()
Christophe Leroy
christophe.leroy at csgroup.eu
Fri Sep 29 18:39:00 AEST 2023
Le 28/09/2023 à 21:48, Hari Bathini a écrit :
> patch_instruction() entails setting up pte, patching the instruction,
> clearing the pte and flushing the tlb. If multiple instructions need
> to be patched, every instruction would have to go through the above
> drill unnecessarily. Instead, introduce function patch_instructions()
> that sets up the pte, clears the pte and flushes the tlb only once per
> page range of instructions to be patched. This adds a slight overhead
> to patch_instruction() call while improving the patching time for
> scenarios where more than one instruction needs to be patched.
On my powerpc8xx, this patch leads to an increase of about 8% of the
time needed to activate ftrace function tracer.
The problem is it complexifies patch_instruction().
Before your patch:
00000234 <patch_instruction>:
234: 48 00 00 6c b 2a0 <patch_instruction+0x6c>
238: 7c e0 00 a6 mfmsr r7
23c: 7c 51 13 a6 mtspr 81,r2
240: 3d 40 00 00 lis r10,0
242: R_PPC_ADDR16_HA .data
244: 39 4a 00 00 addi r10,r10,0
246: R_PPC_ADDR16_LO .data
248: 7c 69 1b 78 mr r9,r3
24c: 3d 29 40 00 addis r9,r9,16384
250: 81 0a 00 08 lwz r8,8(r10)
254: 55 29 00 26 clrrwi r9,r9,12
258: 81 4a 00 04 lwz r10,4(r10)
25c: 61 29 01 25 ori r9,r9,293
260: 91 28 00 00 stw r9,0(r8)
264: 55 49 00 26 clrrwi r9,r10,12
268: 50 6a 05 3e rlwimi r10,r3,0,20,31
26c: 90 8a 00 00 stw r4,0(r10)
270: 7c 00 50 6c dcbst 0,r10
274: 7c 00 04 ac hwsync
278: 7c 00 1f ac icbi 0,r3
27c: 7c 00 04 ac hwsync
280: 4c 00 01 2c isync
284: 38 60 00 00 li r3,0
288: 39 40 00 00 li r10,0
28c: 91 48 00 00 stw r10,0(r8)
290: 7c 00 4a 64 tlbie r9,r0
294: 7c 00 04 ac hwsync
298: 7c e0 01 24 mtmsr r7
29c: 4e 80 00 20 blr
2a0: 90 83 00 00 stw r4,0(r3)
2a4: 7c 00 18 6c dcbst 0,r3
2a8: 7c 00 04 ac hwsync
2ac: 7c 00 1f ac icbi 0,r3
2b0: 7c 00 04 ac hwsync
2b4: 4c 00 01 2c isync
2b8: 38 60 00 00 li r3,0
2bc: 4e 80 00 20 blr
2c0: 38 60 ff ff li r3,-1
2c4: 4b ff ff c4 b 288 <patch_instruction+0x54>
2c8: 38 60 ff ff li r3,-1
2cc: 4e 80 00 20 blr
After you patch:
0000020c <__do_patch_instructions>:
20c: 94 21 ff e0 stwu r1,-32(r1)
210: 3d 40 00 00 lis r10,0
212: R_PPC_ADDR16_HA .data
214: 93 81 00 10 stw r28,16(r1)
218: 93 c1 00 18 stw r30,24(r1)
21c: 93 a1 00 14 stw r29,20(r1)
220: 93 e1 00 1c stw r31,28(r1)
224: 39 4a 00 00 addi r10,r10,0
226: R_PPC_ADDR16_LO .data
228: 7c 69 1b 78 mr r9,r3
22c: 7c be 2b 79 mr. r30,r5
230: 3d 29 40 00 addis r9,r9,16384
234: 83 ea 00 04 lwz r31,4(r10)
238: 83 aa 00 08 lwz r29,8(r10)
23c: 55 29 00 26 clrrwi r9,r9,12
240: 61 29 01 25 ori r9,r9,293
244: 57 fc 00 26 clrrwi r28,r31,12
248: 91 3d 00 00 stw r9,0(r29)
24c: 50 7f 05 3e rlwimi r31,r3,0,20,31
250: 40 82 00 4c bne 29c <__do_patch_instructions+0x90>
254: 81 24 00 00 lwz r9,0(r4)
258: 91 3f 00 00 stw r9,0(r31)
25c: 7c 00 f8 6c dcbst 0,r31
260: 7c 00 04 ac hwsync
264: 7c 00 1f ac icbi 0,r3
268: 7c 00 04 ac hwsync
26c: 4c 00 01 2c isync
270: 38 60 00 00 li r3,0
274: 39 20 00 00 li r9,0
278: 91 3d 00 00 stw r9,0(r29)
27c: 7c 00 e2 64 tlbie r28,r0
280: 7c 00 04 ac hwsync
284: 83 81 00 10 lwz r28,16(r1)
288: 83 a1 00 14 lwz r29,20(r1)
28c: 83 c1 00 18 lwz r30,24(r1)
290: 83 e1 00 1c lwz r31,28(r1)
294: 38 21 00 20 addi r1,r1,32
298: 4e 80 00 20 blr
29c: 2c 06 00 00 cmpwi r6,0
2a0: 7c 08 02 a6 mflr r0
2a4: 90 01 00 24 stw r0,36(r1)
2a8: 40 82 00 24 bne 2cc <__do_patch_instructions+0xc0>
2ac: 7f e3 fb 78 mr r3,r31
2b0: 48 00 00 01 bl 2b0 <__do_patch_instructions+0xa4>
2b0: R_PPC_REL24 memcpy
2b4: 7c 9f f2 14 add r4,r31,r30
2b8: 7f e3 fb 78 mr r3,r31
2bc: 48 00 00 01 bl 2bc <__do_patch_instructions+0xb0>
2bc: R_PPC_REL24 flush_icache_range
2c0: 80 01 00 24 lwz r0,36(r1)
2c4: 7c 08 03 a6 mtlr r0
2c8: 4b ff ff a8 b 270 <__do_patch_instructions+0x64>
2cc: 80 84 00 00 lwz r4,0(r4)
2d0: 57 c5 f0 be srwi r5,r30,2
2d4: 7f e3 fb 78 mr r3,r31
2d8: 48 00 00 01 bl 2d8 <__do_patch_instructions+0xcc>
2d8: R_PPC_REL24 memset32
2dc: 4b ff ff d8 b 2b4 <__do_patch_instructions+0xa8>
2e0: 38 60 ff ff li r3,-1
2e4: 4b ff ff 90 b 274 <__do_patch_instructions+0x68>
...
00000310 <patch_instruction>:
310: 94 21 ff e0 stwu r1,-32(r1)
314: 90 81 00 08 stw r4,8(r1)
318: 48 00 00 40 b 358 <patch_instruction+0x48>
31c: 7c 08 02 a6 mflr r0
320: 90 01 00 24 stw r0,36(r1)
324: 93 e1 00 1c stw r31,28(r1)
328: 7f e0 00 a6 mfmsr r31
32c: 7c 51 13 a6 mtspr 81,r2
330: 38 c0 00 00 li r6,0
334: 38 81 00 08 addi r4,r1,8
338: 38 a0 00 00 li r5,0
33c: 4b ff fe d1 bl 20c <__do_patch_instructions>
340: 7f e0 01 24 mtmsr r31
344: 80 01 00 24 lwz r0,36(r1)
348: 83 e1 00 1c lwz r31,28(r1)
34c: 7c 08 03 a6 mtlr r0
350: 38 21 00 20 addi r1,r1,32
354: 4e 80 00 20 blr
358: 81 21 00 08 lwz r9,8(r1)
35c: 91 23 00 00 stw r9,0(r3)
360: 7c 00 18 6c dcbst 0,r3
364: 7c 00 04 ac hwsync
368: 7c 00 1f ac icbi 0,r3
36c: 7c 00 04 ac hwsync
370: 4c 00 01 2c isync
374: 38 60 00 00 li r3,0
378: 4b ff ff d8 b 350 <patch_instruction+0x40>
37c: 38 60 ff ff li r3,-1
380: 4b ff ff d0 b 350 <patch_instruction+0x40>
Christophe
>
> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> ---
> arch/powerpc/include/asm/code-patching.h | 1 +
> arch/powerpc/lib/code-patching.c | 93 +++++++++++++++++++++---
> 2 files changed, 85 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..43a4aedfa703 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
> int patch_branch(u32 *addr, unsigned long target, int flags);
> int patch_instruction(u32 *addr, ppc_inst_t instr);
> int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr);
>
> static inline unsigned long patch_site_addr(s32 *site)
> {
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index b00112d7ad46..4ff002bc41f6 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr)
> flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
> }
>
> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> +static int __patch_instructions(u32 *patch_addr, void *code, size_t len, bool repeat_instr)
> +{
> + unsigned long start = (unsigned long)patch_addr;
> +
> + /* Repeat instruction */
> + if (repeat_instr) {
> + ppc_inst_t instr = ppc_inst_read(code);
> +
> + if (ppc_inst_prefixed(instr)) {
> + u64 val = ppc_inst_as_ulong(instr);
> +
> + memset64((uint64_t *)patch_addr, val, len / 8);
> + } else {
> + u32 val = ppc_inst_val(instr);
> +
> + memset32(patch_addr, val, len / 4);
> + }
> + } else
> + memcpy(patch_addr, code, len);
> +
> + smp_wmb(); /* smp write barrier */
> + flush_icache_range(start, start + len);
> + return 0;
> +}
> +
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, bool repeat_instr)
> {
> int err;
> u32 *patch_addr;
> @@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
>
> orig_mm = start_using_temp_mm(patching_mm);
>
> - err = __patch_instruction(addr, instr, patch_addr);
> + /* Single instruction case. */
> + if (len == 0) {
> + err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr);
>
> - /* hwsync performed by __patch_instruction (sync) if successful */
> - if (err)
> - mb(); /* sync */
> + /* hwsync performed by __patch_instruction (sync) if successful */
> + if (err)
> + mb(); /* sync */
> + } else
> + err = __patch_instructions(patch_addr, code, len, repeat_instr);
>
> /* context synchronisation performed by __patch_instruction (isync or exception) */
> stop_using_temp_mm(patching_mm, orig_mm);
> @@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> return err;
> }
>
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool repeat_instr)
> {
> int err;
> u32 *patch_addr;
> @@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> if (radix_enabled())
> asm volatile("ptesync": : :"memory");
>
> - err = __patch_instruction(addr, instr, patch_addr);
> + /* Single instruction case. */
> + if (len == 0)
> + err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr);
> + else
> + err = __patch_instructions(patch_addr, code, len, repeat_instr);
>
> pte_clear(&init_mm, text_poke_addr, pte);
> flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> @@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>
> local_irq_save(flags);
> if (mm_patch_enabled())
> - err = __do_patch_instruction_mm(addr, instr);
> + err = __do_patch_instructions_mm(addr, &instr, 0, false);
> else
> - err = __do_patch_instruction(addr, instr);
> + err = __do_patch_instructions(addr, &instr, 0, false);
> local_irq_restore(flags);
>
> return err;
> }
> NOKPROBE_SYMBOL(patch_instruction);
>
> +/*
> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
> + *
> + * If repeat_instr is true, the same instruction is filled for
> + * 'len' bytes.
> + */
> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr)
> +{
> + unsigned long flags;
> + size_t plen;
> + int err;
> +
> + while (len > 0) {
> + plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
> +
> + local_irq_save(flags);
> + if (mm_patch_enabled())
> + err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
> + else
> + err = __do_patch_instructions(addr, code, plen, repeat_instr);
> + local_irq_restore(flags);
> + if (err)
> + break;
> +
> + len -= plen;
> + addr = addr + plen;
> + if (!repeat_instr)
> + code = code + plen;
> + }
> +
> + return err;
> +}
> +NOKPROBE_SYMBOL(patch_instructions);
> +
> int patch_branch(u32 *addr, unsigned long target, int flags)
> {
> ppc_inst_t instr;
More information about the Linuxppc-dev
mailing list