[PATCH v2 0/3] Add generic data patching functions

Tue Oct 17 17:56:55 AEDT 2023

On 17/10/23 5:39 pm, Christophe Leroy wrote:
> Le 16/10/2023 à 07:01, Benjamin Gray a écrit :
>> Currently patch_instruction() bases the write length on the value being
>> written. If the value looks like a prefixed instruction it writes 8 bytes,
>> otherwise it writes 4 bytes. This makes it potentially buggy to use for
>> writing arbitrary data, as if you want to write 4 bytes but it decides to
>> write 8 bytes it may clobber the following memory or be unaligned and
>> trigger an oops if it tries to cross a page boundary.
>>
>> To solve this, this series pulls out the size parameter to the 'top' of
>> the text patching logic, and propagates it through the various functions.
>>
>> The two sizes supported are int and long; this allows for patching
>> instructions and pointers on both ppc32 and ppc64. On ppc32 these are the
>> same size, so care is taken to only use the size parameter on static
>> functions, so the compiler can optimise it out entirely. Unfortunately
>> GCC trips over its own feet here and won't optimise in a way that is
>> optimal for strict RWX (mpc85xx_smp_defconfig) and no RWX
>> (pmac32_defconfig).
>>
>> In the first case, patch_memory() is very large and can only be inlined
>> if a single function calls it. While the source only calls it in
>> patch_instruction(), an earlier optimisation pass inlined
>> patch_instruction() into patch_branch(), so now there are 'two' references
>> to patch_memory() and it cannot be inlined into patch_instruction().
>> Instead patch_instruction() becomes a single branch directly to
>> patch_memory().
>>
>> We can fix this by marking patch_instruction() as noinline, but this
>> prevents patch_memory() from being directly inlined into patch_branch()
>> when RWX is disabled and patch_memory() is very small.
>>
>> It may be possible to avoid this by merging together patch_instruction()
>> and patch_memory() on ppc32, but the only way I can think to do this
>> without duplicating the implementation involves using the preprocessor
>> to change if is_dword is a parameter or a local variable, which is gross.
> 
> What about:
> 
> diff --git a/arch/powerpc/include/asm/code-patching.h
> b/arch/powerpc/include/asm/code-patching.h
> index 7c6056bb1706..af89ef450c93 100644
> --- a/arch/powerpc/include/asm/code-patching.h
> +++ b/arch/powerpc/include/asm/code-patching.h
> @@ -72,7 +72,7 @@ static inline int create_branch(ppc_inst_t *instr,
> const u32 *addr,
>    int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
>    		       unsigned long target, int flags);
>    int patch_branch(u32 *addr, unsigned long target, int flags);
> -int patch_instruction(u32 *addr, ppc_inst_t instr);
> +int patch_memory(void *addr, unsigned long val, bool is_dword);
>    int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> 
>    /*
> @@ -87,24 +87,28 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
> 
>    #ifdef CONFIG_PPC64
> 
> -int patch_uint(void *addr, unsigned int val);
> -int patch_ulong(void *addr, unsigned long val);
> +int patch_instruction(u32 *addr, ppc_inst_t instr);
> 
>    #define patch_u64 patch_ulong
> 
>    #else
> 
> -static inline int patch_uint(u32 *addr, unsigned int val)
> +static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
>    {
> -	return patch_instruction(addr, ppc_inst(val));
> +	return patch_memory(addr, ppc_inst_val(instr), false);
>    }
> 
> +#endif
> +
>    static inline int patch_ulong(void *addr, unsigned long val)
>    {
> -	return patch_instruction(addr, ppc_inst(val));
> +	return patch_memory(addr, val, true);
>    }
> 
> -#endif
> +static inline int patch_uint(void *addr, unsigned int val)
> +{
> +	return patch_memory(addr, val, false);
> +}
> 
>    #define patch_u32 patch_uint
> 
> diff --git a/arch/powerpc/lib/code-patching.c
> b/arch/powerpc/lib/code-patching.c
> index 60289332412f..77418b2a4aa4 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -355,7 +355,7 @@ static int __do_patch_memory(void *addr, unsigned
> long val, bool is_dword)
>    	return err;
>    }
> 
> -static int patch_memory(void *addr, unsigned long val, bool is_dword)
> +int patch_memory(void *addr, unsigned long val, bool is_dword)
>    {
>    	int err;
>    	unsigned long flags;
> @@ -378,6 +378,7 @@ static int patch_memory(void *addr, unsigned long
> val, bool is_dword)
> 
>    	return err;
>    }
> +NOKPROBE_SYMBOL(patch_memory)
> 
>    #ifdef CONFIG_PPC64
> 
> @@ -390,26 +391,6 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
>    }
>    NOKPROBE_SYMBOL(patch_instruction)
> 
> -int patch_uint(void *addr, unsigned int val)
> -{
> -	return patch_memory(addr, val, false);
> -}
> -NOKPROBE_SYMBOL(patch_uint)
> -
> -int patch_ulong(void *addr, unsigned long val)
> -{
> -	return patch_memory(addr, val, true);
> -}
> -NOKPROBE_SYMBOL(patch_ulong)
> -
> -#else
> -
> -int patch_instruction(u32 *addr, ppc_inst_t instr)
> -{
> -	return patch_memory(addr, ppc_inst_val(instr), false);
> -}
> -NOKPROBE_SYMBOL(patch_instruction)
> -
>    #endif
> 
>    int patch_branch(u32 *addr, unsigned long target, int flags)
> 

Wouldn't every caller need to initialise the is_dword parameter in that 
case? It can't tell it's unused across a translation unit boundary 
without LTO.