[PATCH v8 23/30] powerpc: Add prefixed instructions to instruction data type

Thu May 14 16:11:43 AEST 2020

Le 06/05/2020 à 05:40, Jordan Niethe a écrit :
> For powerpc64, redefine the ppc_inst type so both word and prefixed
> instructions can be represented. On powerpc32 the type will remain the
> same.  Update places which had assumed instructions to be 4 bytes long.
> 
> Reviewed-by: Alistair Popple <alistair at popple.id.au>
> Signed-off-by: Jordan Niethe <jniethe5 at gmail.com>
> ---
> v4: New to series
> v5:  - Distinguish normal instructions from prefixed instructions with a
>         0xff marker for the suffix.
>       - __patch_instruction() using std for prefixed instructions
> v6:  - Return false instead of 0 in ppc_inst_prefixed()
>       - Fix up types for ppc32 so it compiles
>       - remove ppc_inst_write()
>       - __patching_instruction(): move flush out of condition
> v8:  - style
>       - Define and use OP_PREFIX instead of '1' (back from v3)
>       - __patch_instruction() fix for big endian
> ---
>   arch/powerpc/include/asm/inst.h       | 69 ++++++++++++++++++++++++---
>   arch/powerpc/include/asm/kprobes.h    |  2 +-
>   arch/powerpc/include/asm/ppc-opcode.h |  3 ++
>   arch/powerpc/include/asm/uaccess.h    | 40 +++++++++++++++-
>   arch/powerpc/include/asm/uprobes.h    |  2 +-
>   arch/powerpc/kernel/crash_dump.c      |  2 +-
>   arch/powerpc/kernel/optprobes.c       | 42 ++++++++--------
>   arch/powerpc/kernel/optprobes_head.S  |  3 ++
>   arch/powerpc/lib/code-patching.c      | 19 ++++++--
>   arch/powerpc/lib/feature-fixups.c     |  5 +-
>   arch/powerpc/lib/inst.c               | 41 ++++++++++++++++
>   arch/powerpc/lib/sstep.c              |  4 +-
>   arch/powerpc/xmon/xmon.c              |  4 +-
>   arch/powerpc/xmon/xmon_bpts.S         |  2 +
>   14 files changed, 200 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
> index 2f3c9d5bcf7c..7868b80b610e 100644
> --- a/arch/powerpc/include/asm/inst.h
> +++ b/arch/powerpc/include/asm/inst.h
> @@ -2,29 +2,79 @@
>   #ifndef _ASM_INST_H
>   #define _ASM_INST_H
>   
> +#include <asm/ppc-opcode.h>
>   /*
>    * Instruction data type for POWER
>    */
>   
>   struct ppc_inst {
>   	u32 val;
> +#ifdef __powerpc64__

CONFIG_PPC64 should be used instead. This is also reported by checkpatch.

> +	u32 suffix;
> +#endif /* __powerpc64__ */
>   } __packed;
>   
> -#define ppc_inst(x) ((struct ppc_inst){ .val = x })
> -
>   static inline u32 ppc_inst_val(struct ppc_inst x)
>   {
>   	return x.val;
>   }
>   
> -static inline int ppc_inst_len(struct ppc_inst x)
> +static inline int ppc_inst_primary_opcode(struct ppc_inst x)
>   {
> -	return sizeof(struct ppc_inst);
> +	return ppc_inst_val(x) >> 26;

What about using get_op() from asm/disassemble.h instead of hardcodiing ?

>   }
>   
> -static inline int ppc_inst_primary_opcode(struct ppc_inst x)
> +#ifdef __powerpc64__

Use CONFIG_PPC64

> +#define ppc_inst(x) ((struct ppc_inst){ .val = (x), .suffix = 0xff })
> +
> +#define ppc_inst_prefix(x, y) ((struct ppc_inst){ .val = (x), .suffix = (y) })
> +
> +static inline u32 ppc_inst_suffix(struct ppc_inst x)
>   {
> -	return ppc_inst_val(x) >> 26;
> +	return x.suffix;
> +}
> +
> +static inline bool ppc_inst_prefixed(struct ppc_inst x)
> +{
> +	return (ppc_inst_primary_opcode(x) == 1) && ppc_inst_suffix(x) != 0xff;
> +}
> +
> +static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
> +{
> +	return ppc_inst_prefix(swab32(ppc_inst_val(x)),
> +			       swab32(ppc_inst_suffix(x)));
> +}
> +
> +static inline struct ppc_inst ppc_inst_read(const struct ppc_inst *ptr)
> +{
> +	u32 val, suffix;
> +
> +	val = *(u32 *)ptr;
> +	if ((val >> 26) == 1) {

Don't hardcode, use ppc_inst_primary_opcode() and compare it to OP_PREFIX
Or use get_op() from asm/disassemble.h

> +		suffix = *((u32 *)ptr + 1);
> +		return ppc_inst_prefix(val, suffix);
> +	} else {
> +		return ppc_inst(val);
> +	}
> +}
> +
> +static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
> +{
> +	return *(u64 *)&x == *(u64 *)&y;
> +}
> +
> +#else
> +
> +#define ppc_inst(x) ((struct ppc_inst){ .val = x })
> +
> +static inline bool ppc_inst_prefixed(struct ppc_inst x)
> +{
> +	return false;
> +}
> +
> +static inline u32 ppc_inst_suffix(struct ppc_inst x)
> +{
> +	return 0;
>   }
>   
>   static inline struct ppc_inst ppc_inst_swab(struct ppc_inst x)
> @@ -42,6 +92,13 @@ static inline bool ppc_inst_equal(struct ppc_inst x, struct ppc_inst y)
>   	return ppc_inst_val(x) == ppc_inst_val(y);
>   }
>   
> +#endif /* __powerpc64__ */
> +
> +static inline int ppc_inst_len(struct ppc_inst x)
> +{
> +	return (ppc_inst_prefixed(x)) ? 8  : 4;
> +}
> +
>   int probe_user_read_inst(struct ppc_inst *inst,
>   			 struct ppc_inst *nip);
>   int probe_kernel_read_inst(struct ppc_inst *inst,
> diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
> index 66b3f2983b22..4fc0e15e23a5 100644
> --- a/arch/powerpc/include/asm/kprobes.h
> +++ b/arch/powerpc/include/asm/kprobes.h
> @@ -43,7 +43,7 @@ extern kprobe_opcode_t optprobe_template_ret[];
>   extern kprobe_opcode_t optprobe_template_end[];
>   
>   /* Fixed instruction size for powerpc */
> -#define MAX_INSN_SIZE		1
> +#define MAX_INSN_SIZE		2
>   #define MAX_OPTIMIZED_LENGTH	sizeof(kprobe_opcode_t)	/* 4 bytes */
>   #define MAX_OPTINSN_SIZE	(optprobe_template_end - optprobe_template_entry)
>   #define RELATIVEJUMP_SIZE	sizeof(kprobe_opcode_t)	/* 4 bytes */
> diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
> index c1df75edde44..2a39c716c343 100644
> --- a/arch/powerpc/include/asm/ppc-opcode.h
> +++ b/arch/powerpc/include/asm/ppc-opcode.h
> @@ -158,6 +158,9 @@
>   /* VMX Vector Store Instructions */
>   #define OP_31_XOP_STVX          231
>   
> +/* Prefixed Instructions */
> +#define OP_PREFIX		1
> +
>   #define OP_31   31
>   #define OP_LWZ  32
>   #define OP_STFS 52
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index c0a35e4586a5..217897927926 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -105,11 +105,49 @@ static inline int __access_ok(unsigned long addr, unsigned long size,
>   #define __put_user_inatomic(x, ptr) \
>   	__put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
>   
> +#ifdef __powerpc64__

Replace by CONFIG_PPC64

> +#define __get_user_instr(x, ptr)			\
> +({							\
> +	long __gui_ret = 0;				\
> +	unsigned long __gui_ptr = (unsigned long)ptr;	\
> +	struct ppc_inst __gui_inst;			\
> +	unsigned int prefix, suffix;			\
> +	__gui_ret = __get_user(prefix, (unsigned int __user *)__gui_ptr);	\

__get_user() can be costly especially with KUAP. I think you should 
perform a 64 bits read and fallback on a 32 bits read only if the 64 
bits read failed.

> +	if (!__gui_ret && (prefix >> 26) == OP_PREFIX) {	\

What about using get_op() from asm/disassemble.h instead of hardcodiing ?

> +		__gui_ret = __get_user(suffix,		\
> +				       (unsigned int __user *)__gui_ptr + 1);	\
> +		__gui_inst = ppc_inst_prefix(prefix, suffix);	\
> +	} else {					\
> +		__gui_inst = ppc_inst(prefix);		\
> +	}						\
> +	(x) = __gui_inst;				\
> +	__gui_ret;					\
> +})
> +
> +#define __get_user_instr_inatomic(x, ptr)		\
> +({							\
> +	long __gui_ret = 0;				\
> +	unsigned long __gui_ptr = (unsigned long)ptr;	\
> +	struct ppc_inst __gui_inst;			\
> +	unsigned int prefix, suffix;			\
> +	__gui_ret = __get_user_inatomic(prefix, (unsigned int __user *)__gui_ptr);	\

Same

> +	if (!__gui_ret && (prefix >> 26) == OP_PREFIX) {	\
> +		__gui_ret = __get_user_inatomic(suffix,	\
> +						(unsigned int __user *)__gui_ptr + 1);	\
> +		__gui_inst = ppc_inst_prefix(prefix, suffix);	\
> +	} else {					\
> +		__gui_inst = ppc_inst(prefix);		\
> +	}						\
> +	(x) = __gui_inst;				\
> +	__gui_ret;					\
> +})
> +#else
>   #define __get_user_instr(x, ptr) \
>   	__get_user_nocheck((x).val, (u32 *)(ptr), sizeof(u32), true)
> -
>   #define __get_user_instr_inatomic(x, ptr) \
>   	__get_user_nosleep((x).val, (u32 *)(ptr), sizeof(u32))
> +#endif
> +
>   extern long __put_user_bad(void);
>   
>   /*
> diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
> index 7e3b329ba2d3..5bf65f5d44a9 100644
> --- a/arch/powerpc/include/asm/uprobes.h
> +++ b/arch/powerpc/include/asm/uprobes.h
> @@ -15,7 +15,7 @@
>   
>   typedef ppc_opcode_t uprobe_opcode_t;
>   
> -#define MAX_UINSN_BYTES		4
> +#define MAX_UINSN_BYTES		8
>   #define UPROBE_XOL_SLOT_BYTES	(MAX_UINSN_BYTES)
>   
>   /* The following alias is needed for reference from arch-agnostic code */
> diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
> index 72bafb47e757..735e89337398 100644
> --- a/arch/powerpc/kernel/crash_dump.c
> +++ b/arch/powerpc/kernel/crash_dump.c
> @@ -46,7 +46,7 @@ static void __init create_trampoline(unsigned long addr)
>   	 * two instructions it doesn't require any registers.
>   	 */
>   	patch_instruction(p, ppc_inst(PPC_INST_NOP));
> -	patch_branch(++p, addr + PHYSICAL_START, 0);
> +	patch_branch((void *)p + 4, addr + PHYSICAL_START, 0);
>   }
>   
>   void __init setup_kdump_trampoline(void)
> diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
> index 52c1ab3f85aa..a8e66603d12b 100644
> --- a/arch/powerpc/kernel/optprobes.c
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -162,43 +162,43 @@ void patch_imm32_load_insns(unsigned int val, kprobe_opcode_t *addr)
>   
>   /*
>    * Generate instructions to load provided immediate 64-bit value
> - * to register 'r3' and patch these instructions at 'addr'.
> + * to register 'reg' and patch these instructions at 'addr'.
>    */
> -void patch_imm64_load_insns(unsigned long val, kprobe_opcode_t *addr)
> +void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)

I think this change should go in a separate patch.

>   {
> -	/* lis r3,(op)@highest */
> +	/* lis reg,(op)@highest */
>   	patch_instruction((struct ppc_inst *)addr,
> -			  ppc_inst(PPC_INST_ADDIS | ___PPC_RT(3) |
> +			  ppc_inst(PPC_INST_ADDIS | ___PPC_RT(reg) |
>   				   ((val >> 48) & 0xffff)));
>   	addr++;
>   
> -	/* ori r3,r3,(op)@higher */
> +	/* ori reg,reg,(op)@higher */
>   	patch_instruction((struct ppc_inst *)addr,
> -			  ppc_inst(PPC_INST_ORI | ___PPC_RA(3) |
> -				   ___PPC_RS(3) | ((val >> 32) & 0xffff)));
> +			  ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
> +				   ___PPC_RS(reg) | ((val >> 32) & 0xffff)));
>   	addr++;
>   
> -	/* rldicr r3,r3,32,31 */
> +	/* rldicr reg,reg,32,31 */
>   	patch_instruction((struct ppc_inst *)addr,
> -			  ppc_inst(PPC_INST_RLDICR | ___PPC_RA(3) |
> -				   ___PPC_RS(3) | __PPC_SH64(32) | __PPC_ME64(31)));
> +			  ppc_inst(PPC_INST_RLDICR | ___PPC_RA(reg) |
> +				   ___PPC_RS(reg) | __PPC_SH64(32) | __PPC_ME64(31)));
>   	addr++;
>   
> -	/* oris r3,r3,(op)@h */
> +	/* oris reg,reg,(op)@h */
>   	patch_instruction((struct ppc_inst *)addr,
> -			  ppc_inst(PPC_INST_ORIS | ___PPC_RA(3) |
> -				   ___PPC_RS(3) | ((val >> 16) & 0xffff)));
> +			  ppc_inst(PPC_INST_ORIS | ___PPC_RA(reg) |
> +				   ___PPC_RS(reg) | ((val >> 16) & 0xffff)));
>   	addr++;
>   
> -	/* ori r3,r3,(op)@l */
> +	/* ori reg,reg,(op)@l */
>   	patch_instruction((struct ppc_inst *)addr,
> -			  ppc_inst(PPC_INST_ORI | ___PPC_RA(3) |
> -				   ___PPC_RS(3) | (val & 0xffff)));
> +			  ppc_inst(PPC_INST_ORI | ___PPC_RA(reg) |
> +				   ___PPC_RS(reg) | (val & 0xffff)));
>   }
>   
>   int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
>   {
> -	struct ppc_inst branch_op_callback, branch_emulate_step;
> +	struct ppc_inst branch_op_callback, branch_emulate_step, temp;
>   	kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff;
>   	long b_offset;
>   	unsigned long nip, size;
> @@ -249,7 +249,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
>   	 * Fixup the template with instructions to:
>   	 * 1. load the address of the actual probepoint
>   	 */
> -	patch_imm64_load_insns((unsigned long)op, buff + TMPL_OP_IDX);
> +	patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
>   
>   	/*
>   	 * 2. branch to optimized_callback() and emulate_step()
> @@ -282,7 +282,11 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
>   	/*
>   	 * 3. load instruction to be emulated into relevant register, and
>   	 */
> -	patch_imm32_load_insns(*p->ainsn.insn, buff + TMPL_INSN_IDX);
> +	temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
> +	patch_imm64_load_insns(ppc_inst_val(temp) |
> +			       ((u64)ppc_inst_suffix(temp) << 32),
> +			       4,

So now we are also using r4 ? Any explanation somewhere on the way it 
works ? This change seems unrelated to this patch, nothing in the 
description about it. Can we suddenly use a new register without problem ?

> +			       buff + TMPL_INSN_IDX);

What's the point with splitting this line in 4 lines ? Can't it fit in 2 
lines ?

>   
>   	/*
>   	 * 4. branch back from trampoline
> diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
> index cf383520843f..ff8ba4d3824d 100644
> --- a/arch/powerpc/kernel/optprobes_head.S
> +++ b/arch/powerpc/kernel/optprobes_head.S
> @@ -94,6 +94,9 @@ optprobe_template_insn:
>   	/* 2, Pass instruction to be emulated in r4 */
>   	nop
>   	nop
> +	nop
> +	nop
> +	nop
>   
>   	.global optprobe_template_call_emulate
>   optprobe_template_call_emulate:
> diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
> index d946f7d6bb32..58b67b62d5d3 100644
> --- a/arch/powerpc/lib/code-patching.c
> +++ b/arch/powerpc/lib/code-patching.c
> @@ -24,13 +24,24 @@ static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst instr
>   {
>   	int err = 0;
>   
> -	__put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw");
> -	if (err)
> -		return err;
> +	if (!ppc_inst_prefixed(instr)) {
> +		__put_user_asm(ppc_inst_val(instr), patch_addr, err, "stw");
> +		if (err)
> +			return err;

This test should remain outside of the if/else, it doesn't need to be 
duplicated.

> +	} else {
> +#ifdef CONFIG_CPU_LITTLE_ENDIAN
> +		__put_user_asm((u64)ppc_inst_suffix(instr) << 32 |
> +			       ppc_inst_val(instr), patch_addr, err, "std");
> +#else
> +		__put_user_asm((u64)ppc_inst_val(instr) << 32 |
> +			       ppc_inst_suffix(instr), patch_addr, err, "std");
> +#endif /* CONFIG_CPU_LITTLE_ENDIAN */
> +		if (err)
> +			return err;
> +	}
>   
>   	asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
>   							    "r" (exec_addr));
> -

Why remove the blank line ?

>   	return 0;
>   }
>   
> diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
> index 2bd2b752de4f..a8238eff3a31 100644
> --- a/arch/powerpc/lib/feature-fixups.c
> +++ b/arch/powerpc/lib/feature-fixups.c
> @@ -84,12 +84,13 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
>   	src = alt_start;
>   	dest = start;
>   
> -	for (; src < alt_end; src++, dest++) {
> +	for (; src < alt_end; src = (void *)src + ppc_inst_len(ppc_inst_read(src)),
> +	     (dest = (void *)dest + ppc_inst_len(ppc_inst_read(dest)))) {

Can we do this outside the for() for readability ?

>   		if (patch_alt_instruction(src, dest, alt_start, alt_end))
>   			return 1;
>   	}
>   
> -	for (; dest < end; dest++)
> +	for (; dest < end; dest = (void *)dest + ppc_inst_len(ppc_inst(PPC_INST_NOP)))
>   		raw_patch_instruction(dest, ppc_inst(PPC_INST_NOP));
>   
>   	return 0;
> diff --git a/arch/powerpc/lib/inst.c b/arch/powerpc/lib/inst.c
> index 08dedd927268..eb6f9ee28ac6 100644
> --- a/arch/powerpc/lib/inst.c
> +++ b/arch/powerpc/lib/inst.c
> @@ -3,9 +3,49 @@
>    *  Copyright 2020, IBM Corporation.
>    */
>   
> +#include <asm/ppc-opcode.h>
>   #include <linux/uaccess.h>
>   #include <asm/inst.h>
>   
> +#ifdef __powerpc64__
> +int probe_user_read_inst(struct ppc_inst *inst,
> +			 struct ppc_inst *nip)
> +{
> +	unsigned int val, suffix;
> +	int err;
> +
> +	err = probe_user_read(&val, nip, sizeof(val));

A user read is costly with KUAP. Can we do a 64 bits read and perform a 
32 bits read only when 64 bits read fails ?

> +	if (err)
> +		return err;
> +	if ((val >> 26) == OP_PREFIX) {

What about using get_op() from asm/disassemble.h instead of hardcodiing ?

> +		err = probe_user_read(&suffix, (void *)nip + 4,

4 or sizeof(unsigned int) ? Why use both in the same line ?

> +				      sizeof(unsigned int));
> +		*inst = ppc_inst_prefix(val, suffix);
> +	} else {
> +		*inst = ppc_inst(val);
> +	}
> +	return err;
> +}
> +
> +int probe_kernel_read_inst(struct ppc_inst *inst,
> +			   struct ppc_inst *src)
> +{
> +	unsigned int val, suffix;
> +	int err;
> +
> +	err = probe_kernel_read(&val, src, sizeof(val));
> +	if (err)
> +		return err;
> +	if ((val >> 26) == OP_PREFIX) {
> +		err = probe_kernel_read(&suffix, (void *)src + 4,
> +					sizeof(unsigned int));
> +		*inst = ppc_inst_prefix(val, suffix);
> +	} else {
> +		*inst = ppc_inst(val);
> +	}
> +	return err;
> +}
> +#else
>   int probe_user_read_inst(struct ppc_inst *inst,
>   			 struct ppc_inst *nip)
>   {
> @@ -27,3 +67,4 @@ int probe_kernel_read_inst(struct ppc_inst *inst,
>   	*inst = ppc_inst(val);
>   	return err;
>   }
> +#endif /* __powerpc64__ */
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index 95a56bb1ba3f..ecd756c346fd 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -1169,10 +1169,12 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
>   	unsigned long int imm;
>   	unsigned long int val, val2;
>   	unsigned int mb, me, sh;
> -	unsigned int word;
> +	unsigned int word, suffix;
>   	long ival;
>   
>   	word = ppc_inst_val(instr);
> +	suffix = ppc_inst_suffix(instr);
> +
>   	op->type = COMPUTE;
>   
>   	opcode = ppc_inst_primary_opcode(instr);
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index 4d6980d51456..647b3829c4eb 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -758,8 +758,8 @@ static int xmon_bpt(struct pt_regs *regs)
>   
>   	/* Are we at the trap at bp->instr[1] for some bp? */
>   	bp = in_breakpoint_table(regs->nip, &offset);
> -	if (bp != NULL && offset == 4) {
> -		regs->nip = bp->address + 4;
> +	if (bp != NULL && (offset == 4 || offset == 8)) {
> +		regs->nip = bp->address + offset;
>   		atomic_dec(&bp->ref_count);
>   		return 1;
>   	}
> diff --git a/arch/powerpc/xmon/xmon_bpts.S b/arch/powerpc/xmon/xmon_bpts.S
> index f3ad0ab50854..69726814cd27 100644
> --- a/arch/powerpc/xmon/xmon_bpts.S
> +++ b/arch/powerpc/xmon/xmon_bpts.S
> @@ -4,6 +4,8 @@
>   #include <asm/asm-offsets.h>
>   #include "xmon_bpts.h"
>   
> +/* Prefixed instructions can not cross 64 byte boundaries */
> +.align 6
>   .global bpt_table
>   bpt_table:
>   	.space NBPTS * BPT_SIZE
> 

Christophe