[v3 5/7] powerpc/memcpy_mcsafe: return remaining bytes
Santosh Sivaraj
santosh at fossix.org
Sat Jul 6 07:26:45 AEST 2019
memcpy_mcsafe currently return -EFAULT on a machine check exception, change
it to return the remaining bytes that needs to be copied, so that machine
check safe copy_to_user can maintain the same behavior as copy_to_user.
Signed-off-by: Santosh Sivaraj <santosh at fossix.org>
---
arch/powerpc/lib/memcpy_mcsafe_64.S | 142 ++++++++++++++++------------
1 file changed, 83 insertions(+), 59 deletions(-)
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
index 50f865db0338..4d8a3d315992 100644
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
+++ b/arch/powerpc/lib/memcpy_mcsafe_64.S
@@ -30,11 +30,25 @@
ld r14,STK_REG(R14)(r1)
addi r1,r1,STACKFRAMESIZE
.Ldo_err1:
- li r3,-EFAULT
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+100: EX_TABLE(100b, .Ldone)
+46:
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
blr
_GLOBAL(memcpy_mcsafe)
+ mr r7,r5
cmpldi r5,16
blt .Lshort_copy
@@ -49,18 +63,21 @@ err1; lbz r0,0(r4)
addi r4,r4,1
err1; stb r0,0(r3)
addi r3,r3,1
+ subi r7,r7,1
1: bf cr7*4+2,2f
err1; lhz r0,0(r4)
addi r4,r4,2
err1; sth r0,0(r3)
addi r3,r3,2
+ subi r7,r7,2
2: bf cr7*4+1,3f
err1; lwz r0,0(r4)
addi r4,r4,4
err1; stw r0,0(r3)
addi r3,r3,4
+ subi r7,r7,4
3: sub r5,r5,r6
cmpldi r5,128
@@ -87,43 +104,69 @@ err1; stw r0,0(r3)
4:
err2; ld r0,0(r4)
err2; ld r6,8(r4)
-err2; ld r7,16(r4)
-err2; ld r8,24(r4)
-err2; ld r9,32(r4)
-err2; ld r10,40(r4)
-err2; ld r11,48(r4)
-err2; ld r12,56(r4)
-err2; ld r14,64(r4)
-err2; ld r15,72(r4)
-err2; ld r16,80(r4)
-err2; ld r17,88(r4)
-err2; ld r18,96(r4)
-err2; ld r19,104(r4)
-err2; ld r20,112(r4)
-err2; ld r21,120(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
addi r4,r4,128
err2; std r0,0(r3)
err2; std r6,8(r3)
-err2; std r7,16(r3)
-err2; std r8,24(r3)
-err2; std r9,32(r3)
-err2; std r10,40(r3)
-err2; std r11,48(r3)
-err2; std r12,56(r3)
-err2; std r14,64(r3)
-err2; std r15,72(r3)
-err2; std r16,80(r3)
-err2; std r17,88(r3)
-err2; std r18,96(r3)
-err2; std r19,104(r3)
-err2; std r20,112(r3)
-err2; std r21,120(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
addi r3,r3,128
+ subi r7,r7,128
bdnz 4b
clrldi r5,r5,(64-7)
- ld r14,STK_REG(R14)(r1)
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
ld r17,STK_REG(R17)(r1)
@@ -134,42 +177,19 @@ err2; std r21,120(r3)
ld r22,STK_REG(R22)(r1)
addi r1,r1,STACKFRAMESIZE
- /* Up to 127B to go */
-5: srdi r6,r5,4
- mtocrf 0x01,r6
-
-6: bf cr7*4+1,7f
-err1; ld r0,0(r4)
-err1; ld r6,8(r4)
-err1; ld r7,16(r4)
-err1; ld r8,24(r4)
-err1; ld r9,32(r4)
-err1; ld r10,40(r4)
-err1; ld r11,48(r4)
-err1; ld r12,56(r4)
- addi r4,r4,64
-err1; std r0,0(r3)
-err1; std r6,8(r3)
-err1; std r7,16(r3)
-err1; std r8,24(r3)
-err1; std r9,32(r3)
-err1; std r10,40(r3)
-err1; std r11,48(r3)
-err1; std r12,56(r3)
- addi r3,r3,64
-
/* Up to 63B to go */
-7: bf cr7*4+2,8f
+ bf cr7*4+2,8f
err1; ld r0,0(r4)
err1; ld r6,8(r4)
-err1; ld r7,16(r4)
-err1; ld r8,24(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
addi r4,r4,32
err1; std r0,0(r3)
err1; std r6,8(r3)
-err1; std r7,16(r3)
-err1; std r8,24(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
addi r3,r3,32
+ subi r7,r7,32
/* Up to 31B to go */
8: bf cr7*4+3,9f
@@ -179,6 +199,7 @@ err1; ld r6,8(r4)
err1; std r0,0(r3)
err1; std r6,8(r3)
addi r3,r3,16
+ subi r7,r7,16
9: clrldi r5,r5,(64-4)
@@ -192,18 +213,21 @@ err1; lwz r6,4(r4)
err1; stw r0,0(r3)
err1; stw r6,4(r3)
addi r3,r3,8
+ subi r7,r7,8
12: bf cr7*4+1,13f
err1; lwz r0,0(r4)
addi r4,r4,4
err1; stw r0,0(r3)
addi r3,r3,4
+ subi r7,r7,4
13: bf cr7*4+2,14f
err1; lhz r0,0(r4)
addi r4,r4,2
err1; sth r0,0(r3)
addi r3,r3,2
+ subi r7,r7,2
14: bf cr7*4+3,15f
err1; lbz r0,0(r4)
--
2.20.1
More information about the Linuxppc-dev
mailing list