[PATCH v5 1/5] powerpc/code-patching: introduce patch_instructions()

Hari Bathini hbathini at linux.ibm.com
Fri Oct 13 07:11:12 AEDT 2023


Thanks for the review, Christophe.

On 10/10/23 11:16 pm, Christophe Leroy wrote:
> 
> 
> Le 28/09/2023 à 21:48, Hari Bathini a écrit :
>> patch_instruction() entails setting up pte, patching the instruction,
>> clearing the pte and flushing the tlb. If multiple instructions need
>> to be patched, every instruction would have to go through the above
>> drill unnecessarily. Instead, introduce function patch_instructions()
>> that sets up the pte, clears the pte and flushes the tlb only once per
>> page range of instructions to be patched. This adds a slight overhead
>> to patch_instruction() call while improving the patching time for
>> scenarios where more than one instruction needs to be patched.
> 
> Not a "slight" but a "significant" overhead on PPC32.
> 
> Thinking about it once more I don't think it is a good idea to try and
> merge that into the existing code_patching logic which is really single
> instruction performance oriented.
> 
> Anyway, comments below.
> 
>>
>> Signed-off-by: Hari Bathini <hbathini at linux.ibm.com>
>> ---
>>    arch/powerpc/include/asm/code-patching.h |  1 +
>>    arch/powerpc/lib/code-patching.c         | 93 +++++++++++++++++++++---
>>    2 files changed, 85 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
>> index 3f881548fb61..43a4aedfa703 100644
>> --- a/arch/powerpc/include/asm/code-patching.h
>> +++ b/arch/powerpc/include/asm/code-patching.h
>> @@ -74,6 +74,7 @@ int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>>    int patch_branch(u32 *addr, unsigned long target, int flags);
>>    int patch_instruction(u32 *addr, ppc_inst_t instr);
>>    int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
>> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr);
> 
> I don't like void *, you can do to much nasty things with that.
> I think you want u32 *
> 
>>    
>>    static inline unsigned long patch_site_addr(s32 *site)
>>    {
>> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
>> index b00112d7ad46..4ff002bc41f6 100644
>> --- a/arch/powerpc/lib/code-patching.c
>> +++ b/arch/powerpc/lib/code-patching.c
>> @@ -278,7 +278,36 @@ static void unmap_patch_area(unsigned long addr)
>>    	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
>>    }
>>    
>> -static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
>> +static int __patch_instructions(u32 *patch_addr, void *code, size_t len, bool repeat_instr)
>> +{
>> +	unsigned long start = (unsigned long)patch_addr;
>> +
>> +	/* Repeat instruction */
>> +	if (repeat_instr) {
>> +		ppc_inst_t instr = ppc_inst_read(code);
>> +
>> +		if (ppc_inst_prefixed(instr)) {
>> +			u64 val = ppc_inst_as_ulong(instr);
>> +
>> +			memset64((uint64_t *)patch_addr, val, len / 8);
> 
> Use u64 instead of uint64_t.
> 
>> +		} else {
>> +			u32 val = ppc_inst_val(instr);
>> +
>> +			memset32(patch_addr, val, len / 4);
>> +		}
>> +	} else
>> +		memcpy(patch_addr, code, len);
> 
> Missing braces, see
> https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces
> 
>> +
>> +	smp_wmb();	/* smp write barrier */
>> +	flush_icache_range(start, start + len);
>> +	return 0;
>> +}
>> +
>> +/*
>> + * A page is mapped and instructions that fit the page are patched.
>> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
>> + */
>> +static int __do_patch_instructions_mm(u32 *addr, void *code, size_t len, bool repeat_instr)
>>    {
>>    	int err;
>>    	u32 *patch_addr;
>> @@ -307,11 +336,15 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
>>    
>>    	orig_mm = start_using_temp_mm(patching_mm);
>>    
>> -	err = __patch_instruction(addr, instr, patch_addr);
>> +	/* Single instruction case. */
>> +	if (len == 0) {
>> +		err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr);
> 
> Take care, you can't convert u32 * to ppc_inst_t that way, you have to
> use ppc_inst_read() otherwise you'll get odd result with prefixed
> instructions depending on endianness.
> 
>>    
>> -	/* hwsync performed by __patch_instruction (sync) if successful */
>> -	if (err)
>> -		mb();  /* sync */
>> +		/* hwsync performed by __patch_instruction (sync) if successful */
>> +		if (err)
>> +			mb();  /* sync */
> 
> Get this away, see my patch at
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/e88b154eaf2efd9ff177d472d3411dcdec8ff4f5.1696675567.git.christophe.leroy@csgroup.eu/
> 
>> +	} else
>> +		err = __patch_instructions(patch_addr, code, len, repeat_instr);
>>    
>>    	/* context synchronisation performed by __patch_instruction (isync or exception) */
>>    	stop_using_temp_mm(patching_mm, orig_mm);
>> @@ -328,7 +361,11 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
>>    	return err;
>>    }
>>    
>> -static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
>> +/*
>> + * A page is mapped and instructions that fit the page are patched.
>> + * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
>> + */
>> +static int __do_patch_instructions(u32 *addr, void *code, size_t len, bool repeat_instr)
>>    {
>>    	int err;
>>    	u32 *patch_addr;
>> @@ -345,7 +382,11 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
>>    	if (radix_enabled())
>>    		asm volatile("ptesync": : :"memory");
>>    
>> -	err = __patch_instruction(addr, instr, patch_addr);
>> +	/* Single instruction case. */
>> +	if (len == 0)
>> +		err = __patch_instruction(addr, *(ppc_inst_t *)code, patch_addr);
> 
> Same, use ppc_inst_read() instead of this nasty casting.
> 
>> +	else
>> +		err = __patch_instructions(patch_addr, code, len, repeat_instr);
>>    
>>    	pte_clear(&init_mm, text_poke_addr, pte);
>>    	flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
>> @@ -369,15 +410,49 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>>    
>>    	local_irq_save(flags);
>>    	if (mm_patch_enabled())
>> -		err = __do_patch_instruction_mm(addr, instr);
>> +		err = __do_patch_instructions_mm(addr, &instr, 0, false);
>>    	else
>> -		err = __do_patch_instruction(addr, instr);
>> +		err = __do_patch_instructions(addr, &instr, 0, false);
>>    	local_irq_restore(flags);
>>    
>>    	return err;
>>    }
>>    NOKPROBE_SYMBOL(patch_instruction);
>>    
>> +/*
>> + * Patch 'addr' with 'len' bytes of instructions from 'code'.
>> + *
>> + * If repeat_instr is true, the same instruction is filled for
>> + * 'len' bytes.
>> + */
>> +int patch_instructions(void *addr, void *code, size_t len, bool repeat_instr)
> 
> I'd like to see code as a u32 *
> 
>> +{
>> +	unsigned long flags;
>> +	size_t plen;
>> +	int err;
> 
> Move those three variables inside the only block in which they are used.
> 
>> +
>> +	while (len > 0) {
>> +		plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
>> +
>> +		local_irq_save(flags);
>> +		if (mm_patch_enabled())
>> +			err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
>> +		else
>> +			err = __do_patch_instructions(addr, code, plen, repeat_instr);
>> +		local_irq_restore(flags);
>> +		if (err)
>> +			break;
> 
> replace by 'return err'
> 
>> +
>> +		len -= plen;
>> +		addr = addr + plen;
>> +		if (!repeat_instr)
>> +			code = code + plen;
>> +	}
>> +
>> +	return err;
> 
> If len is 0 err will be undefined. Is that expected ?
> 
> Replace by return 0;

Posted v6 
(https://lore.kernel.org/linuxppc-dev/20231012200310.235137-1-hbathini@linux.ibm.com/)
with code path for patch_instruction() & patch_instriuctions() unmerged
to avoid performance hit reported on ppc32. Also, addressed other review
comments.

Thanks
Hari


More information about the Linuxppc-dev mailing list