[PATCH v3 2/4] powerpc/sstep: support emulation for vsx vector paired storage access instructions
Balamuruhan S
bala24 at linux.ibm.com
Fri Jul 31 18:16:35 AEST 2020
add emulate_step() changes to support vsx vector paired storage
access instructions that provides octword operands loads/stores
between storage and set of 64 Vector Scalar Registers (VSRs).
Suggested-by: Ravi Bangoria <ravi.bangoria at linux.ibm.com>
Suggested-by: Naveen N. Rao <naveen.n.rao at linux.vnet.ibm.com>
Signed-off-by: Balamuruhan S <bala24 at linux.ibm.com>
---
arch/powerpc/lib/sstep.c | 77 +++++++++++++++++++++++++++++++---------
1 file changed, 60 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 22147257d74d..01e1a3adc406 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -280,6 +280,19 @@ static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
up[1] = tmp;
break;
}
+ case 32: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[3]);
+ up[3] = tmp;
+ tmp = byterev_8(up[2]);
+ up[2] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+
#endif
default:
WARN_ON_ONCE(1);
@@ -710,6 +723,8 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
reg->d[0] = reg->d[1] = 0;
switch (op->element_size) {
+ case 32:
+ /* [p]lxvp[x] */
case 16:
/* whole vector; lxv[x] or lxvl[l] */
if (size == 0)
@@ -718,7 +733,7 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
rev = !rev;
if (rev)
- do_byte_reverse(reg, 16);
+ do_byte_reverse(reg, size);
break;
case 8:
/* scalar loads, lxvd2x, lxvdsx */
@@ -794,6 +809,20 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
size = GETSIZE(op->type);
switch (op->element_size) {
+ case 32:
+ /* [p]stxvp[x] */
+ if (size == 0)
+ break;
+ if (rev) {
+ /* reverse 32 bytes */
+ buf.d[0] = byterev_8(reg->d[3]);
+ buf.d[1] = byterev_8(reg->d[2]);
+ buf.d[2] = byterev_8(reg->d[1]);
+ buf.d[3] = byterev_8(reg->d[0]);
+ reg = &buf;
+ }
+ memcpy(mem, reg, size);
+ break;
case 16:
/* stxv, stxvx, stxvl, stxvll */
if (size == 0)
@@ -862,28 +891,35 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op,
bool cross_endian)
{
int reg = op->reg;
- u8 mem[16];
- union vsx_reg buf;
+ int i, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
int size = GETSIZE(op->type);
if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
return -EFAULT;
- emulate_vsx_load(op, &buf, mem, cross_endian);
+ nr_vsx_regs = size / sizeof(__vector128);
+ emulate_vsx_load(op, buf, mem, cross_endian);
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
- load_vsrn(reg, &buf);
+ for (i = 0; i < nr_vsx_regs; i++)
+ load_vsrn(reg + i, &buf[i].v);
} else {
- current->thread.fp_state.fpr[reg][0] = buf.d[0];
- current->thread.fp_state.fpr[reg][1] = buf.d[1];
+ for (i = 0; i < nr_vsx_regs; i++) {
+ current->thread.fp_state.fpr[reg + i][0] = buf[i].d[0];
+ current->thread.fp_state.fpr[reg + i][1] = buf[i].d[1];
+ }
}
} else {
if (regs->msr & MSR_VEC)
- load_vsrn(reg, &buf);
+ for (i = 0; i < nr_vsx_regs; i++)
+ load_vsrn(reg + i, &buf[i].v);
else
- current->thread.vr_state.vr[reg - 32] = buf.v;
+ for (i = 0; i < nr_vsx_regs; i++)
+ current->thread.vr_state.vr[reg - 32 + i] = buf[i].v;
}
preempt_enable();
return 0;
@@ -894,30 +930,37 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
bool cross_endian)
{
int reg = op->reg;
- u8 mem[16];
- union vsx_reg buf;
+ int i, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
int size = GETSIZE(op->type);
if (!address_ok(regs, ea, size))
return -EFAULT;
+ nr_vsx_regs = size / sizeof(__vector128);
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
if (regs->msr & MSR_FP) {
- store_vsrn(reg, &buf);
+ for (i = 0; i < nr_vsx_regs; i++)
+ store_vsrn(reg + i, &buf[i].v);
} else {
- buf.d[0] = current->thread.fp_state.fpr[reg][0];
- buf.d[1] = current->thread.fp_state.fpr[reg][1];
+ for (i = 0; i < nr_vsx_regs; i++) {
+ buf[i].d[0] = current->thread.fp_state.fpr[reg + i][0];
+ buf[i].d[1] = current->thread.fp_state.fpr[reg + i][1];
+ }
}
} else {
if (regs->msr & MSR_VEC)
- store_vsrn(reg, &buf);
+ for (i = 0; i < nr_vsx_regs; i++)
+ store_vsrn(reg + i, &buf[i].v);
else
- buf.v = current->thread.vr_state.vr[reg - 32];
+ for (i = 0; i < nr_vsx_regs; i++)
+ buf[i].v = current->thread.vr_state.vr[reg - 32 + i];
}
preempt_enable();
- emulate_vsx_store(op, &buf, mem, cross_endian);
+ emulate_vsx_store(op, buf, mem, cross_endian);
return copy_mem_out(mem, ea, size, regs);
}
#endif /* CONFIG_VSX */
--
2.24.1
More information about the Linuxppc-dev
mailing list