[PATCH 5/5] Powerpc/Watchpoint: Fix length calculation for unaligned target

Christophe Leroy christophe.leroy at c-s.fr
Tue Jun 18 16:46:14 AEST 2019



Le 18/06/2019 à 06:27, Ravi Bangoria a écrit :
> Watchpoint match range is always doubleword(8 bytes) aligned on
> powerpc. If the given range is crossing doubleword boundary, we
> need to increase the length such that next doubleword also get
> covered. Ex,
> 
>            address   len = 6 bytes
>                  |=========.
>     |------------v--|------v--------|
>     | | | | | | | | | | | | | | | | |
>     |---------------|---------------|
>      <---8 bytes--->
> 
> In such case, current code configures hw as:
>    start_addr = address & ~HW_BREAKPOINT_ALIGN
>    len = 8 bytes
> 
> And thus read/write in last 4 bytes of the given range is ignored.
> Fix this by including next doubleword in the length. Watchpoint
> exception handler already ignores extraneous exceptions, so no
> changes required for that.
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria at linux.ibm.com>
> ---
>   arch/powerpc/include/asm/hw_breakpoint.h |  7 ++--
>   arch/powerpc/kernel/hw_breakpoint.c      | 44 +++++++++++++-----------
>   arch/powerpc/kernel/process.c            | 34 ++++++++++++++++--
>   3 files changed, 60 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
> index 8acbbdd4a2d5..749a357164d5 100644
> --- a/arch/powerpc/include/asm/hw_breakpoint.h
> +++ b/arch/powerpc/include/asm/hw_breakpoint.h
> @@ -34,6 +34,8 @@ struct arch_hw_breakpoint {
>   #define HW_BRK_TYPE_PRIV_ALL	(HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
>   				 HW_BRK_TYPE_HYP)
>   
> +#define HW_BREAKPOINT_ALIGN 0x7
> +
>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>   #include <linux/kdebug.h>
>   #include <asm/reg.h>
> @@ -45,8 +47,6 @@ struct pmu;
>   struct perf_sample_data;
>   struct task_struct;
>   
> -#define HW_BREAKPOINT_ALIGN 0x7
> -
>   extern int hw_breakpoint_slots(int type);
>   extern int arch_bp_generic_fields(int type, int *gen_bp_type);
>   extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
> @@ -76,7 +76,8 @@ static inline void hw_breakpoint_disable(void)
>   }
>   extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
>   int hw_breakpoint_handler(struct die_args *args);
> -
> +extern u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +		unsigned long *start_addr, unsigned long *end_addr);
>   extern int set_dawr(struct arch_hw_breakpoint *brk);
>   extern bool dawr_force_enable;
>   static inline bool dawr_enabled(void)
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 36bcf705df65..c122fd55aa44 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -126,6 +126,28 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
>   	return 0;
>   }
>   
> +/* Maximum len for DABR is 8 bytes and DAWR is 512 bytes */
> +static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
> +{
> +	u16 length_max = 8;
> +	u16 final_len;

You should be more consistent in naming. If one is called final_len, the 
other one should be called max_len.

> +	unsigned long start_addr, end_addr;
> +
> +	final_len = hw_breakpoint_get_final_len(hw, &start_addr, &end_addr);
> +
> +	if (dawr_enabled()) {
> +		length_max = 512;
> +		/* DAWR region can't cross 512 bytes boundary */
> +		if ((start_addr >> 9) != (end_addr >> 9))
> +			return -EINVAL;
> +	}
> +
> +	if (final_len > length_max)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +

Is many places, we have those numeric 512 and 9 shift. Could we replace 
them by some symbol, for instance DAWR_SIZE and DAWR_SHIFT ?

>   /*
>    * Validate the arch-specific HW Breakpoint register settings
>    */
> @@ -133,12 +155,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>   			     const struct perf_event_attr *attr,
>   			     struct arch_hw_breakpoint *hw)
>   {
> -	int length_max;
> -
>   	if (!ppc_breakpoint_available())
>   		return -ENODEV;
>   
> -	if (!bp)
> +	if (!bp || !attr->bp_len)
>   		return -EINVAL;
>   
>   	hw->type = HW_BRK_TYPE_TRANSLATE;
> @@ -160,23 +180,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>   	hw->address = attr->bp_addr;
>   	hw->len = attr->bp_len;
>   
> -	length_max = 8; /* DABR */
> -	if (dawr_enabled()) {
> -		length_max = 512 ; /* 64 doublewords */
> -		/* DAWR region can't cross 512 bytes boundary */
> -		if ((hw->address >> 9) != ((hw->address + hw->len - 1) >> 9))
> -			return -EINVAL;
> -	}
> -
> -	/*
> -	 * Since breakpoint length can be a maximum of length_max and
> -	 * breakpoint addresses are aligned to nearest double-word
> -	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address,
> -	 * the 'symbolsize' should satisfy the check below.
> -	 */
> -	if (hw->len > (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
> -		return -EINVAL;
> -	return 0;
> +	return hw_breakpoint_validate_len(hw);
>   }
>   
>   /*
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 265fac9fb3a4..159aaa70de46 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -802,9 +802,39 @@ static int disable_dawr(void)
>   	return 0;
>   }
>   
> +/*
> + * Watchpoint match range is always doubleword(8 bytes) aligned on
> + * powerpc. If the given range is crossing doubleword boundary, we
> + * need to increase the length such that next doubleword also get
> + * covered. Ex,
> + *
> + *          address   len = 6 bytes
> + *                |=========.
> + *   |------------v--|------v--------|
> + *   | | | | | | | | | | | | | | | | |
> + *   |---------------|---------------|
> + *    <---8 bytes--->
> + *
> + * In this case, we should configure hw as:
> + *   start_addr = address & ~HW_BREAKPOINT_ALIGN
> + *   len = 16 bytes
> + *
> + * @start_addr and @end_addr are inclusive.
> + */
> +u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
> +				unsigned long *start_addr,
> +				unsigned long *end_addr)
> +{
> +	*start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
> +	*end_addr = (brk->address + brk->len - 1) | HW_BREAKPOINT_ALIGN;
> +	return *end_addr - *start_addr + 1;
> +}

This function gives horrible code (a couple of unneeded store/re-read 
and read/re-read).

000006bc <hw_breakpoint_get_final_len>:
      6bc:	81 23 00 00 	lwz     r9,0(r3)
      6c0:	55 29 00 38 	rlwinm  r9,r9,0,0,28
      6c4:	91 24 00 00 	stw     r9,0(r4)
      6c8:	81 43 00 00 	lwz     r10,0(r3)
      6cc:	a1 23 00 06 	lhz     r9,6(r3)
      6d0:	38 6a ff ff 	addi    r3,r10,-1
      6d4:	7c 63 4a 14 	add     r3,r3,r9
      6d8:	60 63 00 07 	ori     r3,r3,7
      6dc:	90 65 00 00 	stw     r3,0(r5)
      6e0:	38 63 00 01 	addi    r3,r3,1
      6e4:	81 24 00 00 	lwz     r9,0(r4)
      6e8:	7c 69 18 50 	subf    r3,r9,r3
      6ec:	54 63 04 3e 	clrlwi  r3,r3,16
      6f0:	4e 80 00 20 	blr

Below code gives something better:

u16 hw_breakpoint_get_final_len(struct arch_hw_breakpoint *brk,
				unsigned long *start_addr,
				unsigned long *end_addr)
{
	unsigned long address = brk->address;
	unsigned long len = brk->len;
	unsigned long start = address & ~HW_BREAKPOINT_ALIGN;
	unsigned long end = (address + len - 1) | HW_BREAKPOINT_ALIGN;

	*start_addr = start;
	*end_addr = end;
	return end - start + 1;
}

000006bc <hw_breakpoint_get_final_len>:
      6bc:	81 43 00 00 	lwz     r10,0(r3)
      6c0:	a1 03 00 06 	lhz     r8,6(r3)
      6c4:	39 2a ff ff 	addi    r9,r10,-1
      6c8:	7d 28 4a 14 	add     r9,r8,r9
      6cc:	55 4a 00 38 	rlwinm  r10,r10,0,0,28
      6d0:	61 29 00 07 	ori     r9,r9,7
      6d4:	91 44 00 00 	stw     r10,0(r4)
      6d8:	20 6a 00 01 	subfic  r3,r10,1
      6dc:	91 25 00 00 	stw     r9,0(r5)
      6e0:	7c 63 4a 14 	add     r3,r3,r9
      6e4:	54 63 04 3e 	clrlwi  r3,r3,16
      6e8:	4e 80 00 20 	blr


And regardless, that's a pitty to have this function using pointers 
which are from local variables in the callers, as we loose the benefit 
of registers. Couldn't this function go in the .h as a static inline ? 
I'm sure the result would be worth it.

Christophe

> +
>   int set_dawr(struct arch_hw_breakpoint *brk)
>   {
>   	unsigned long dawr, dawrx, mrd;
> +	unsigned long start_addr, end_addr;
> +	u16 final_len;
>   
>   	if (brk->type == HW_BRK_TYPE_DISABLE)
>   		return disable_dawr();
> @@ -815,8 +845,8 @@ int set_dawr(struct arch_hw_breakpoint *brk)
>   	dawrx |= ((brk->type & HW_BRK_TYPE_TRANSLATE) >> 2) << (63 - 59);
>   	dawrx |= (brk->type & HW_BRK_TYPE_PRIV_ALL) >> 3;
>   
> -	/* brk->len is in bytes. */
> -	mrd = ((brk->len + 7) >> 3) - 1;
> +	final_len = hw_breakpoint_get_final_len(brk, &start_addr, &end_addr);
> +	mrd = ((final_len + 7) >> 3) - 1;
>   	dawrx |= (mrd & 0x3f) << (63 - 53);
>   
>   	if (ppc_md.set_dawr)
> 


More information about the Linuxppc-dev mailing list