[PATCH] powerpc: handle VSX alignment faults correctly in little-endian mode
Neil Campbell
neilc at linux.vnet.ibm.com
Tue Dec 15 01:21:19 EST 2009
Neil Campbell wrote:
> This patch fixes the handling of VSX alignment faults in little-endian
> mode (the current code assumes the processor is in big-endian mode).
>
> The patch also makes the handlers clear the top 8 bytes of the register
> when handling an 8 byte VSX load.
For the interested, here is a test case that demonstrates the problem.
It should compile with something like:
gcc -m64 -Wa,-mregnames -fno-strict-aliasing -mcpu=power7 -mvsx vsx_le.c -o vsx_le
On an unpatched kernel it reports 8 failures for me, the patch fixes all 8 of these.
---
#include <stdio.h>
#include <string.h>
int fails = 0;
#define LOAD_FUNC(name,inst) \
void test_load_##name(char* input, char* output, int le) \
{ \
int aligned = (0 == ((long)input & 15)); \
char* alignstr = aligned?"aligned: ":"unaligned: "; \
char* modestr = le?"(le)":"(be)"; \
int i; \
char dummydata[16] __attribute__((__aligned__(16))) = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; \
\
memset(output, 0, 16); \
\
asm ( \
"mr r15, %[address1]\n\t" \
"mr r16, %[address2]\n\t" \
"lvx v0, r0, %[address3]\n\t" /* set register to dummy values */ \
"cmpwi %[le],1 \n\t" \
"beq "#name"leversion \n\t" \
#name" vs32, r0, r15\n\t" \
"b " #name"store\n\t" \
#name"leversion: \n\t" \
"li r0, 171\n\t" \
"li r3, 20\n\t" \
"li r4, 1\n\t" \
"sc\n\t" \
".long " inst "\n\t" \
".long 0xab000038\n\t" /*"li 0, 171\n\t"*/ \
".long 0x14006038\n\t" /*"li 3, 20\n\t"*/ \
".long 0x00008038\n\t" /*"li 4, 0\n\t"*/ \
".long 0x02000044\n\t" /*"sc\n\t"*/ \
#name"store: \n\t" \
"stvx v0,r0,r16 \n\t" \
: \
: [address1] "b" (input), [address2] "b" (output), [address3] "b" (dummydata), [le] "b" (le) \
: "vs32", "r0", "r3", "r4", "r9", "r15", "r16", "cc", "memory"); \
\
fprintf(stderr, #name" %s after %s ", alignstr, modestr); \
for (i = 0; i < 16; ++i) \
{ \
fprintf(stderr, " %x ", output[i]); \
} \
fprintf(stderr, "\n"); \
} \
#define STORE_FUNC(name,inst) \
void test_store_##name(char* input, char* output, int le) \
{ \
int aligned = (0 == ((long)output & 15)); \
char* alignstr = aligned?"aligned: ":"unaligned: "; \
char* modestr = le?"(le)":"(be)"; \
int i; \
\
memset(output, 0, 16); \
\
asm ( \
"mr r15, %[address2]\n\t" \
"lvx v0, r0, %[address1]\n\t" \
"cmpwi %[le],1 \n\t" \
"beq "#name"leversion \n\t" \
#name" vs32, r0, r15\n\t" \
"b " #name"end\n\t" \
#name"leversion: \n\t" \
"li r0, 171\n\t" \
"li r3, 20\n\t" \
"li r4, 1\n\t" \
"sc\n\t" \
".long " inst "\n\t" \
".long 0xab000038\n\t" /*"li 0, 171\n\t"*/ \
".long 0x14006038\n\t" /*"li 3, 20\n\t"*/ \
".long 0x00008038\n\t" /*"li 4, 0\n\t"*/ \
".long 0x02000044\n\t" /*"sc\n\t"*/ \
#name"end: \n\t" \
: \
: [address1] "b" (input), [address2] "b" (output), [le] "b" (le) \
: "vs32", "r0", "r3", "r4", "r9", "r15", "cc", "memory"); \
\
fprintf(stderr, #name" %s after %s ", alignstr, modestr); \
for (i = 0; i < 16; ++i) \
{ \
fprintf(stderr, " %x ", output[i]); \
} \
fprintf(stderr, "\n"); \
} \
void do_compare(char* buf1, char* buf2)
{
if(0 == memcmp(buf1,buf2,16))
{
fprintf(stderr, "PASS\n");
}
else
{
fprintf(stderr, "FAIL\n");
fails++;
}
}
STORE_FUNC(stxvw4x, "0x197f007c")
STORE_FUNC(stxvd2x, "0x997f007c")
STORE_FUNC(stxsdx, "0x997d007c")
LOAD_FUNC(lxvw4x, "0x197e007c")
LOAD_FUNC(lxvd2x, "0x997e007c")
LOAD_FUNC(lxsdx, "0x997c007c")
LOAD_FUNC(lxvdsx, "0x997a007c")
int main(int argc, char* argv[])
{
char inbuf[17] __attribute__((__aligned__(16))) = { -1, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf };
char alignedinbuf[16] __attribute__((__aligned__(16))) = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf };
char outbuf[17] __attribute__((__aligned__(16))) = { -1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
char alignedoutbuf[16] __attribute__((__aligned__(16))) = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
char alignedoutbuf2[16] __attribute__((__aligned__(16))) = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 };
test_store_stxvw4x(alignedinbuf, alignedoutbuf, 0);
test_store_stxvw4x(alignedinbuf, &outbuf[1], 0);
do_compare(alignedoutbuf, &outbuf[1]);
test_store_stxvw4x(alignedinbuf, alignedoutbuf, 1);
test_store_stxvw4x(alignedinbuf, &outbuf[1], 1);
do_compare(alignedoutbuf, &outbuf[1]);
fprintf(stderr, "\n");
test_store_stxvd2x(alignedinbuf, alignedoutbuf, 0);
test_store_stxvd2x(alignedinbuf, &outbuf[1], 0);
do_compare(alignedoutbuf, &outbuf[1]);
test_store_stxvd2x(alignedinbuf, alignedoutbuf, 1);
test_store_stxvd2x(alignedinbuf, &outbuf[1], 1);
do_compare(alignedoutbuf, &outbuf[1]);
fprintf(stderr, "\n");
test_store_stxsdx(alignedinbuf, alignedoutbuf, 0);
test_store_stxsdx(alignedinbuf, &outbuf[1], 0);
do_compare(alignedoutbuf, &outbuf[1]);
test_store_stxsdx(alignedinbuf, alignedoutbuf, 1);
test_store_stxsdx(alignedinbuf, &outbuf[1], 1);
do_compare(alignedoutbuf, &outbuf[1]);
fprintf(stderr, "\n");
test_load_lxvw4x(alignedinbuf, alignedoutbuf, 0);
test_load_lxvw4x(&inbuf[1], alignedoutbuf2, 0);
do_compare(alignedoutbuf, alignedoutbuf2);
test_load_lxvw4x(alignedinbuf, alignedoutbuf, 1);
test_load_lxvw4x(&inbuf[1], alignedoutbuf2, 1);
do_compare(alignedoutbuf, alignedoutbuf2);
fprintf(stderr, "\n");
test_load_lxvd2x(alignedinbuf, alignedoutbuf, 0);
test_load_lxvd2x(&inbuf[1], alignedoutbuf2, 0);
do_compare(alignedoutbuf, alignedoutbuf2);
test_load_lxvd2x(alignedinbuf, alignedoutbuf, 1);
test_load_lxvd2x(&inbuf[1], alignedoutbuf2, 1);
do_compare(alignedoutbuf, alignedoutbuf2);
fprintf(stderr, "\n");
test_load_lxsdx(alignedinbuf, alignedoutbuf, 0);
test_load_lxsdx(&inbuf[1], alignedoutbuf2, 0);
do_compare(alignedoutbuf, alignedoutbuf2);
test_load_lxsdx(alignedinbuf, alignedoutbuf, 1);
test_load_lxsdx(&inbuf[1], alignedoutbuf2, 1);
do_compare(alignedoutbuf, alignedoutbuf2);
fprintf(stderr, "\n");
test_load_lxvdsx(alignedinbuf, alignedoutbuf, 0);
test_load_lxvdsx(&inbuf[1], alignedoutbuf2, 0);
do_compare(alignedoutbuf, alignedoutbuf2);
test_load_lxvdsx(alignedinbuf, alignedoutbuf, 1);
test_load_lxvdsx(&inbuf[1], alignedoutbuf2, 1);
do_compare(alignedoutbuf, alignedoutbuf2);
fprintf(stderr, "\n");
fprintf(stderr, "%d tests failed\n", fails);
return fails;
}
More information about the Linuxppc-dev
mailing list