[PATCH v5 1/5] powerpc/code-patching: introduce patch_instructions()

Christophe Leroy christophe.leroy at csgroup.eu
Fri Sep 29 18:39:00 AEST 2023



Le 28/09/2023 à 21:48, Hari Bathini a écrit :
> patch_instruction() entails setting up pte, patching the instruction,
> clearing the pte and flushing the tlb. If multiple instructions need
> to be patched, every instruction would have to go through the above
> drill unnecessarily. Instead, introduce function patch_instructions()
> that sets up the pte, clears the pte and flushes the tlb only once per
> page range of instructions to be patched. This adds a slight overhead
> to patch_instruction() call while improving the patching time for
> scenarios where more than one instruction needs to be patched.

On my powerpc8xx, this patch leads to an increase of about 8% of the 
time needed to activate ftrace function tracer.

The problem is it complexifies patch_instruction().

Before your patch:

00000234 <patch_instruction>:
  234:	48 00 00 6c 	b       2a0 <patch_instruction+0x6c>
  238:	7c e0 00 a6 	mfmsr   r7
  23c:	7c 51 13 a6 	mtspr   81,r2
  240:	3d 40 00 00 	lis     r10,0
			242: R_PPC_ADDR16_HA	.data
  244:	39 4a 00 00 	addi    r10,r10,0
			246: R_PPC_ADDR16_LO	.data
  248:	7c 69 1b 78 	mr      r9,r3
  24c:	3d 29 40 00 	addis   r9,r9,16384
  250:	81 0a 00 08 	lwz     r8,8(r10)
  254:	55 29 00 26 	clrrwi  r9,r9,12
  258:	81 4a 00 04 	lwz     r10,4(r10)
  25c:	61 29 01 25 	ori     r9,r9,293
  260:	91 28 00 00 	stw     r9,0(r8)
  264:	55 49 00 26 	clrrwi  r9,r10,12
  268:	50 6a 05 3e 	rlwimi  r10,r3,0,20,31
  26c:	90 8a 00 00 	stw     r4,0(r10)
  270:	7c 00 50 6c 	dcbst   0,r10
  274:	7c 00 04 ac 	hwsync
  278:	7c 00 1f ac 	icbi    0,r3
  27c:	7c 00 04 ac 	hwsync
  280:	4c 00 01 2c 	isync
  284:	38 60 00 00 	li      r3,0
  288:	39 40 00 00 	li      r10,0
  28c:	91 48 00 00 	stw     r10,0(r8)
  290:	7c 00 4a 64 	tlbie   r9,r0
  294:	7c 00 04 ac 	hwsync
  298:	7c e0 01 24 	mtmsr   r7
  29c:	4e 80 00 20 	blr
  2a0:	90 83 00 00 	stw     r4,0(r3)
  2a4:	7c 00 18 6c 	dcbst   0,r3
  2a8:	7c 00 04 ac 	hwsync
  2ac:	7c 00 1f ac 	icbi    0,r3
  2b0:	7c 00 04 ac 	hwsync
  2b4:	4c 00 01 2c 	isync
  2b8:	38 60 00 00 	li      r3,0
  2bc:	4e 80 00 20 	blr
  2c0:	38 60 ff ff 	li      r3,-1
  2c4:	4b ff ff c4 	b       288 <patch_instruction+0x54>
  2c8:	38 60 ff ff 	li      r3,-1
  2cc:	4e 80 00 20 	blr


After you patch:

0000020c <__do_patch_instructions>:
  20c:	94 21 ff e0 	stwu    r1,-32(r1)
  210:	3d 40 00 00 	lis     r10,0
			212: R_PPC_ADDR16_HA	.data
  214:	93 81 00 10 	stw     r28,16(r1)
  218:	93 c1 00 18 	stw     r30,24(r1)
  21c:	93 a1 00 14 	stw     r29,20(r1)
  220:	93 e1 00 1c 	stw     r31,28(r1)
  224:	39 4a 00 00 	addi    r10,r10,0
			226: R_PPC_ADDR16_LO	.data
  228:	7c 69 1b 78 	mr      r9,r3
  22c:	7c be 2b 79 	mr.     r30,r5
  230:	3d 29 40 00 	addis   r9,r9,16384
  234:	83 ea 00 04 	lwz     r31,4(r10)
  238:	83 aa 00 08 	lwz     r29,8(r10)
  23c:	55 29 00 26 	clrrwi  r9,r9,12
  240:	61 29 01 25 	ori     r9,r9,293
  244:	57 fc 00 26 	clrrwi  r28,r31,12
  248:	91 3d 00 00 	stw     r9,0(r29)
  24c:	50 7f 05 3e 	rlwimi  r31,r3,0,20,31
  250:	40 82 00 4c 	bne     29c <__do_patch_instructions+0x90>
  254:	81 24 00 00 	lwz     r9,0(r4)
  258:	91 3f 00 00 	stw     r9,0(r31)
  25c:	7c 00 f8 6c 	dcbst   0,r31
  260:	7c 00 04 ac 	hwsync
  264:	7c 00 1f ac 	icbi    0,r3
  268:	7c 00 04 ac 	hwsync
  26c:	4c 00 01 2c 	isync
  270:	38 60 00 00 	li      r3,0
  274:	39 20 00 00 	li      r9,0
  278:	91 3d 00 00 	stw     r9,0(r29)
  27c:	7c 00 e2 64 	tlbie   r28,r0
  280:	7c 00 04 ac 	hwsync
  284:	83 81 00 10 	lwz     r28,16(r1)
  288:	83 a1 00 14 	lwz     r29,20(r1)
  28c:	83 c1 00 18 	lwz     r30,24(r1)
  290:	83 e1 00 1c 	lwz     r31,28(r1)
  294:	38 21 00 20 	addi    r1,r1,32
  298:	4e 80 00 20 	blr
  29c:	2c 06 00 00 	cmpwi   r6,0
  2a0:	7c 08 02 a6 	mflr    r0
  2a4:	90 01 00 24 	stw     r0,36(r1)
  2a8:	40 82 00 24 	bne     2cc <__do_patch_instructions+0xc0>
  2ac:	7f e3 fb 78 	mr      r3,r31
  2b0:	48 00 00 01 	bl      2b0 <__do_patch_instructions+0xa4>
			2b0: R_PPC_REL24	memcpy
  2b4:	7c 9f f2 14 	add     r4,r31,r30
  2b8:	7f e3 fb 78 	mr      r3,r31
  2bc:	48 00 00 01 	bl      2bc <__do_patch_instructions+0xb0>
			2bc: R_PPC_REL24	flush_icache_range
  2c0:	80 01 00 24 	lwz     r0,36(r1)
  2c4:	7c 08 03 a6 	mtlr    r0
  2c8:	4b ff ff a8 	b       270 <__do_patch_instructions+0x64>
  2cc:	80 84 00 00 	lwz     r4,0(r4)
  2d0:	57 c5 f0 be 	srwi    r5,r30,2
  2d4:	7f e3 fb 78 	mr      r3,r31
  2d8:	48 00 00 01 	bl      2d8 <__do_patch_instructions+0xcc>
			2d8: R_PPC_REL24	memset32
  2dc:	4b ff ff d8 	b       2b4 <__do_patch_instructions+0xa8>
  2e0:	38 60 ff ff 	li      r3,-1
  2e4:	4b ff ff 90 	b       274 <__do_patch_instructions+0x68>

...

00000310 <patch_instruction>:
  310:	94 21 ff e0 	stwu    r1,-32(r1)
  314:	90 81 00 08 	stw     r4,8(r1)
  318:	48 00 00 40 	b       358 <patch_instruction+0x48>
  31c:	7c 08 02 a6 	mflr    r0
  320:	90 01 00 24 	stw     r0,36(r1)
  324:	93 e1 00 1c 	stw     r31,28(r1)
  328:	7f e0 00 a6 	mfmsr   r31
  32c:	7c 51 13 a6 	mtspr   81,r2
  330:	38 c0 00 00 	li      r6,0
  334:	38 81 00 08 	addi    r4,r1,8
  338:	38 a0 00 00 	li      r5,0
  33c:	4b ff fe d1 	bl      20c <__do_patch_instructions>
  340:	7f e0 01 24 	mtmsr   r31
  344:	80 01 00 24 	lwz     r0,36(r1)
  348:	83 e1 00 1c 	lwz     r31,28(r1)
  34c:	7c 08 03 a6 	mtlr    r0
  350:	38 21 00 20 	addi    r1,r1,32
  354:	4e 80 00 20 	blr
  358:	81 21 00 08 	lwz     r9,8(r1)
  35c:	91 23 00 00 	stw     r9,0(r3)
  360:	7c 00 18 6c 	dcbst   0,r3
  364:	7c 00 04 ac 	hwsync
  368:	7c 00 1f ac 	icbi    0,r3
  36c:	7c 00 04 ac 	hwsync
  370:	4c 00 01 2c 	isync
  374:	38 60 00 00 	li      r3,0
  378:	4b ff ff d8 	b       350 <patch_instruction+0x40>
  37c:	38 60 ff ff 	li      r3,-1
  380:	4b ff ff d0 	b       350 <patch_instruction+0x40>


Christophe

> 
> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
> ---
>   arch/powerpc/include/asm/code-patching.h |  1 +
>   arch/powerpc/lib/code-patching.c         | 93 +++++++++++++++++++++---
>   2 files changed, 85 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
> index 3f881548fb61..43a4aedfa703 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>   int patch_branch(u32 *addr, unsigned long target, int flags);
>   int patch_instruction(u32 *addr, ppc_inst_t instr);
>   int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr);
>   
>   static inline unsigned long patch_site_addr(s32 *site)
>   {
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index b00112d7ad46..4ff002bc41f6 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr)
>   	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>   }
>   
> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
> +static int __patch_instructions(u32 *patch_addr, void *code, size_t len, bool repeat_instr)
> +{
> +	unsigned long start = (unsigned long)patch_addr;
> +
> +	/* Repeat instruction */
> +	if (repeat_instr) {
> +		ppc_inst_t instr = ppc_inst_read(code);
> +
> +		if (ppc_inst_prefixed(instr)) {
> +			u64 val = ppc_inst_as_ulong(instr);
> +
> +			memset64((uint64_t *)patch_addr, val, len / 8);
> +		} else {
> +			u32 val = ppc_inst_val(instr);
> +
> +			memset32(patch_addr, val, len / 4);
> +		}
> +	} else
> +		memcpy(patch_addr, code, len);
> +
> +	smp_wmb();	/* smp write barrier */
> +	flush_icache_range(start, start + len);
> +	return 0;
> +}
> +
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, bool repeat_instr)
>   {
>   	int err;
>   	u32 *patch_addr;
> @@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
>   
>   	orig_mm = start_using_temp_mm(patching_mm);
>   
> -	err = __patch_instruction(addr, instr, patch_addr);
> +	/* Single instruction case. */
> +	if (len == 0) {
> +		err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr);
>   
> -	/* hwsync performed by __patch_instruction (sync) if successful */
> -	if (err)
> -		mb();  /* sync */
> +		/* hwsync performed by __patch_instruction (sync) if successful */
> +		if (err)
> +			mb();  /* sync */
> +	} else
> +		err = __patch_instructions(patch_addr, code, len, repeat_instr);
>   
>   	/* context synchronisation performed by __patch_instruction (isync or exception) */
>   	stop_using_temp_mm(patching_mm, orig_mm);
> @@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
>   	return err;
>   }
>   
> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
> +/*
> + * A page is mapped and instructions that fit the page are patched.
> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
> + */
> +static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool repeat_instr)
>   {
>   	int err;
>   	u32 *patch_addr;
> @@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
>   	if (radix_enabled())
>   		asm volatile("ptesync": : :"memory");
>   
> -	err = __patch_instruction(addr, instr, patch_addr);
> +	/* Single instruction case. */
> +	if (len == 0)
> +		err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr);
> +	else
> +		err = __patch_instructions(patch_addr, code, len, repeat_instr);
>   
>   	pte_clear(&init_mm, text_poke_addr, pte);
>   	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
> @@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>   
>   	local_irq_save(flags);
>   	if (mm_patch_enabled())
> -		err = __do_patch_instruction_mm(addr, instr);
> +		err = __do_patch_instructions_mm(addr, &instr, 0, false);
>   	else
> -		err = __do_patch_instruction(addr, instr);
> +		err = __do_patch_instructions(addr, &instr, 0, false);
>   	local_irq_restore(flags);
>   
>   	return err;
>   }
>   NOKPROBE_SYMBOL(patch_instruction);
>   
> +/*
> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
> + *
> + * If repeat_instr is true, the same instruction is filled for
> + * 'len' bytes.
> + */
> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr)
> +{
> +	unsigned long flags;
> +	size_t plen;
> +	int err;
> +
> +	while (len > 0) {
> +		plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
> +
> +		local_irq_save(flags);
> +		if (mm_patch_enabled())
> +			err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
> +		else
> +			err = __do_patch_instructions(addr, code, plen, repeat_instr);
> +		local_irq_restore(flags);
> +		if (err)
> +			break;
> +
> +		len -= plen;
> +		addr = addr + plen;
> +		if (!repeat_instr)
> +			code = code + plen;
> +	}
> +
> +	return err;
> +}
> +NOKPROBE_SYMBOL(patch_instructions);
> +
>   int patch_branch(u32 *addr, unsigned long target, int flags)
>   {
>   	ppc_inst_t instr;


More information about the Linuxppc-dev mailing list