[PATCH] crypto: vmx: Improved AES/XTS performance of 6-way unrolling for ppc.

Michael Ellerman mpe at ellerman.id.au
Wed Aug 30 14:37:25 AEST 2023


Danny Tsen <dtsen at linux.ibm.com> writes:
> Improve AES/XTS performance of 6-way unrolling for PowerPC up
> to 17% with tcrypt.  This is done by using one instruction,
> vpermxor, to replace xor and vsldoi.
>
> This patch has been tested with the kernel crypto module tcrypt.ko and
> has passed the selftest.  The patch is also tested with
> CONFIG_CRYPTO_MANAGER_EXTRA_TESTS enabled.
>
> Signed-off-by: Danny Tsen <dtsen at linux.ibm.com>
> ---
>  drivers/crypto/vmx/aesp8-ppc.pl | 141 +++++++++++++++++++++-----------
>  1 file changed, 92 insertions(+), 49 deletions(-)

That's CRYPTOGAMS code, and is so far largely unchanged from the
original. I see you've sent the same change to openssl, but it's not
merged yet. Please document that in the change log, we want to keep the
code in sync as much as possible, and document any divergences.

cheers

> diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
> index 50a0a18f35da..f729589d792e 100644
> --- a/drivers/crypto/vmx/aesp8-ppc.pl
> +++ b/drivers/crypto/vmx/aesp8-ppc.pl
> @@ -132,11 +132,12 @@ rcon:
>  .long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
>  .long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
>  .long	0,0,0,0						?asis
> +.long	0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
>  Lconsts:
>  	mflr	r0
>  	bcl	20,31,\$+4
>  	mflr	$ptr	 #vvvvv "distance between . and rcon
> -	addi	$ptr,$ptr,-0x48
> +	addi	$ptr,$ptr,-0x58
>  	mtlr	r0
>  	blr
>  	.long	0
> @@ -2495,6 +2496,17 @@ _aesp8_xts_encrypt6x:
>  	li		$x70,0x70
>  	mtspr		256,r0
>  
> +	xxlor		2, 32+$eighty7, 32+$eighty7
> +	vsldoi		$eighty7,$tmp,$eighty7,1        # 0x010101..87
> +	xxlor		1, 32+$eighty7, 32+$eighty7
> +
> +	# Load XOR Lconsts.
> +	mr		$x70, r6
> +	bl		Lconsts
> +	lxvw4x		0, $x40, r6		# load XOR contents
> +	mr		r6, $x70
> +	li		$x70,0x70
> +
>  	subi		$rounds,$rounds,3	# -4 in total
>  
>  	lvx		$rndkey0,$x00,$key1	# load key schedule
> @@ -2537,69 +2549,77 @@ Load_xts_enc_key:
>  	?vperm		v31,v31,$twk5,$keyperm
>  	lvx		v25,$x10,$key_		# pre-load round[2]
>  
> +	# Switch to use the following codes with 0x010101..87 to generate tweak.
> +	#     eighty7 = 0x010101..87
> +	# vsrab         tmp, tweak, seven       # next tweak value, right shift 7 bits
> +	# vand          tmp, tmp, eighty7       # last byte with carry
> +	# vaddubm       tweak, tweak, tweak     # left shift 1 bit (x2)
> +	# xxlor         vsx, 0, 0
> +	# vpermxor      tweak, tweak, tmp, vsx
> +
>  	 vperm		$in0,$inout,$inptail,$inpperm
>  	 subi		$inp,$inp,31		# undo "caller"
>  	vxor		$twk0,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out0,$in0,$twk0
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in1, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in1
>  
>  	 lvx_u		$in1,$x10,$inp
>  	vxor		$twk1,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in1,$in1,$in1,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out1,$in1,$twk1
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in2, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in2
>  
>  	 lvx_u		$in2,$x20,$inp
>  	 andi.		$taillen,$len,15
>  	vxor		$twk2,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in2,$in2,$in2,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out2,$in2,$twk2
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in3, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in3
>  
>  	 lvx_u		$in3,$x30,$inp
>  	 sub		$len,$len,$taillen
>  	vxor		$twk3,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in3,$in3,$in3,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out3,$in3,$twk3
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in4, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in4
>  
>  	 lvx_u		$in4,$x40,$inp
>  	 subi		$len,$len,0x60
>  	vxor		$twk4,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in4,$in4,$in4,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out4,$in4,$twk4
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in5, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in5
>  
>  	 lvx_u		$in5,$x50,$inp
>  	 addi		$inp,$inp,0x60
>  	vxor		$twk5,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in5,$in5,$in5,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out5,$in5,$twk5
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in0, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in0
>  
>  	vxor		v31,v31,$rndkey0
>  	mtctr		$rounds
> @@ -2625,6 +2645,8 @@ Loop_xts_enc6x:
>  	lvx		v25,$x10,$key_		# round[4]
>  	bdnz		Loop_xts_enc6x
>  
> +	xxlor		32+$eighty7, 1, 1	# 0x010101..87
> +
>  	subic		$len,$len,96		# $len-=96
>  	 vxor		$in0,$twk0,v31		# xor with last round key
>  	vcipher		$out0,$out0,v24
> @@ -2634,7 +2656,6 @@ Loop_xts_enc6x:
>  	 vaddubm	$tweak,$tweak,$tweak
>  	vcipher		$out2,$out2,v24
>  	vcipher		$out3,$out3,v24
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vcipher		$out4,$out4,v24
>  	vcipher		$out5,$out5,v24
>  
> @@ -2642,7 +2663,8 @@ Loop_xts_enc6x:
>  	 vand		$tmp,$tmp,$eighty7
>  	vcipher		$out0,$out0,v25
>  	vcipher		$out1,$out1,v25
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in1, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in1
>  	vcipher		$out2,$out2,v25
>  	vcipher		$out3,$out3,v25
>  	 vxor		$in1,$twk1,v31
> @@ -2653,13 +2675,13 @@ Loop_xts_enc6x:
>  
>  	and		r0,r0,$len
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vcipher		$out0,$out0,v26
>  	vcipher		$out1,$out1,v26
>  	 vand		$tmp,$tmp,$eighty7
>  	vcipher		$out2,$out2,v26
>  	vcipher		$out3,$out3,v26
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in2, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in2
>  	vcipher		$out4,$out4,v26
>  	vcipher		$out5,$out5,v26
>  
> @@ -2673,7 +2695,6 @@ Loop_xts_enc6x:
>  	 vaddubm	$tweak,$tweak,$tweak
>  	vcipher		$out0,$out0,v27
>  	vcipher		$out1,$out1,v27
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vcipher		$out2,$out2,v27
>  	vcipher		$out3,$out3,v27
>  	 vand		$tmp,$tmp,$eighty7
> @@ -2681,7 +2702,8 @@ Loop_xts_enc6x:
>  	vcipher		$out5,$out5,v27
>  
>  	addi		$key_,$sp,$FRAME+15	# rewind $key_
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in3, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in3
>  	vcipher		$out0,$out0,v28
>  	vcipher		$out1,$out1,v28
>  	 vxor		$in3,$twk3,v31
> @@ -2690,7 +2712,6 @@ Loop_xts_enc6x:
>  	vcipher		$out2,$out2,v28
>  	vcipher		$out3,$out3,v28
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vcipher		$out4,$out4,v28
>  	vcipher		$out5,$out5,v28
>  	lvx		v24,$x00,$key_		# re-pre-load round[1]
> @@ -2698,7 +2719,8 @@ Loop_xts_enc6x:
>  
>  	vcipher		$out0,$out0,v29
>  	vcipher		$out1,$out1,v29
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in4, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in4
>  	vcipher		$out2,$out2,v29
>  	vcipher		$out3,$out3,v29
>  	 vxor		$in4,$twk4,v31
> @@ -2708,14 +2730,14 @@ Loop_xts_enc6x:
>  	vcipher		$out5,$out5,v29
>  	lvx		v25,$x10,$key_		# re-pre-load round[2]
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  
>  	vcipher		$out0,$out0,v30
>  	vcipher		$out1,$out1,v30
>  	 vand		$tmp,$tmp,$eighty7
>  	vcipher		$out2,$out2,v30
>  	vcipher		$out3,$out3,v30
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in5, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in5
>  	vcipher		$out4,$out4,v30
>  	vcipher		$out5,$out5,v30
>  	 vxor		$in5,$twk5,v31
> @@ -2725,7 +2747,6 @@ Loop_xts_enc6x:
>  	vcipherlast	$out0,$out0,$in0
>  	 lvx_u		$in0,$x00,$inp		# load next input block
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vcipherlast	$out1,$out1,$in1
>  	 lvx_u		$in1,$x10,$inp
>  	vcipherlast	$out2,$out2,$in2
> @@ -2738,7 +2759,10 @@ Loop_xts_enc6x:
>  	vcipherlast	$out4,$out4,$in4
>  	 le?vperm	$in2,$in2,$in2,$leperm
>  	 lvx_u		$in4,$x40,$inp
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		10, 32+$in0, 32+$in0
> +	 xxlor		32+$in0, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in0
> +	 xxlor		32+$in0, 10, 10
>  	vcipherlast	$tmp,$out5,$in5		# last block might be needed
>  						# in stealing mode
>  	 le?vperm	$in3,$in3,$in3,$leperm
> @@ -2771,6 +2795,8 @@ Loop_xts_enc6x:
>  	mtctr		$rounds
>  	beq		Loop_xts_enc6x		# did $len-=96 borrow?
>  
> +	xxlor		32+$eighty7, 2, 2	# 0x010101..87
> +
>  	addic.		$len,$len,0x60
>  	beq		Lxts_enc6x_zero
>  	cmpwi		$len,0x20
> @@ -3147,6 +3173,17 @@ _aesp8_xts_decrypt6x:
>  	li		$x70,0x70
>  	mtspr		256,r0
>  
> +	xxlor		2, 32+$eighty7, 32+$eighty7
> +	vsldoi		$eighty7,$tmp,$eighty7,1        # 0x010101..87
> +	xxlor		1, 32+$eighty7, 32+$eighty7
> +
> +	# Load XOR Lconsts.
> +	mr		$x70, r6
> +	bl		Lconsts
> +	lxvw4x		0, $x40, r6		# load XOR contents
> +	mr		r6, $x70
> +	li		$x70,0x70
> +
>  	subi		$rounds,$rounds,3	# -4 in total
>  
>  	lvx		$rndkey0,$x00,$key1	# load key schedule
> @@ -3194,64 +3231,64 @@ Load_xts_dec_key:
>  	vxor		$twk0,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out0,$in0,$twk0
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in1, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in1
>  
>  	 lvx_u		$in1,$x10,$inp
>  	vxor		$twk1,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in1,$in1,$in1,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out1,$in1,$twk1
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in2, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in2
>  
>  	 lvx_u		$in2,$x20,$inp
>  	 andi.		$taillen,$len,15
>  	vxor		$twk2,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in2,$in2,$in2,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out2,$in2,$twk2
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in3, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in3
>  
>  	 lvx_u		$in3,$x30,$inp
>  	 sub		$len,$len,$taillen
>  	vxor		$twk3,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in3,$in3,$in3,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out3,$in3,$twk3
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in4, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in4
>  
>  	 lvx_u		$in4,$x40,$inp
>  	 subi		$len,$len,0x60
>  	vxor		$twk4,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in4,$in4,$in4,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out4,$in4,$twk4
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in5, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in5
>  
>  	 lvx_u		$in5,$x50,$inp
>  	 addi		$inp,$inp,0x60
>  	vxor		$twk5,$tweak,$rndkey0
>  	vsrab		$tmp,$tweak,$seven	# next tweak value
>  	vaddubm		$tweak,$tweak,$tweak
> -	vsldoi		$tmp,$tmp,$tmp,15
>  	 le?vperm	$in5,$in5,$in5,$leperm
>  	vand		$tmp,$tmp,$eighty7
>  	 vxor		$out5,$in5,$twk5
> -	vxor		$tweak,$tweak,$tmp
> +	xxlor		32+$in0, 0, 0
> +	vpermxor	$tweak, $tweak, $tmp, $in0
>  
>  	vxor		v31,v31,$rndkey0
>  	mtctr		$rounds
> @@ -3277,6 +3314,8 @@ Loop_xts_dec6x:
>  	lvx		v25,$x10,$key_		# round[4]
>  	bdnz		Loop_xts_dec6x
>  
> +	xxlor		32+$eighty7, 1, 1	# 0x010101..87
> +
>  	subic		$len,$len,96		# $len-=96
>  	 vxor		$in0,$twk0,v31		# xor with last round key
>  	vncipher	$out0,$out0,v24
> @@ -3286,7 +3325,6 @@ Loop_xts_dec6x:
>  	 vaddubm	$tweak,$tweak,$tweak
>  	vncipher	$out2,$out2,v24
>  	vncipher	$out3,$out3,v24
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vncipher	$out4,$out4,v24
>  	vncipher	$out5,$out5,v24
>  
> @@ -3294,7 +3332,8 @@ Loop_xts_dec6x:
>  	 vand		$tmp,$tmp,$eighty7
>  	vncipher	$out0,$out0,v25
>  	vncipher	$out1,$out1,v25
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in1, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in1
>  	vncipher	$out2,$out2,v25
>  	vncipher	$out3,$out3,v25
>  	 vxor		$in1,$twk1,v31
> @@ -3305,13 +3344,13 @@ Loop_xts_dec6x:
>  
>  	and		r0,r0,$len
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vncipher	$out0,$out0,v26
>  	vncipher	$out1,$out1,v26
>  	 vand		$tmp,$tmp,$eighty7
>  	vncipher	$out2,$out2,v26
>  	vncipher	$out3,$out3,v26
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in2, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in2
>  	vncipher	$out4,$out4,v26
>  	vncipher	$out5,$out5,v26
>  
> @@ -3325,7 +3364,6 @@ Loop_xts_dec6x:
>  	 vaddubm	$tweak,$tweak,$tweak
>  	vncipher	$out0,$out0,v27
>  	vncipher	$out1,$out1,v27
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vncipher	$out2,$out2,v27
>  	vncipher	$out3,$out3,v27
>  	 vand		$tmp,$tmp,$eighty7
> @@ -3333,7 +3371,8 @@ Loop_xts_dec6x:
>  	vncipher	$out5,$out5,v27
>  
>  	addi		$key_,$sp,$FRAME+15	# rewind $key_
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in3, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in3
>  	vncipher	$out0,$out0,v28
>  	vncipher	$out1,$out1,v28
>  	 vxor		$in3,$twk3,v31
> @@ -3342,7 +3381,6 @@ Loop_xts_dec6x:
>  	vncipher	$out2,$out2,v28
>  	vncipher	$out3,$out3,v28
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vncipher	$out4,$out4,v28
>  	vncipher	$out5,$out5,v28
>  	lvx		v24,$x00,$key_		# re-pre-load round[1]
> @@ -3350,7 +3388,8 @@ Loop_xts_dec6x:
>  
>  	vncipher	$out0,$out0,v29
>  	vncipher	$out1,$out1,v29
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in4, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in4
>  	vncipher	$out2,$out2,v29
>  	vncipher	$out3,$out3,v29
>  	 vxor		$in4,$twk4,v31
> @@ -3360,14 +3399,14 @@ Loop_xts_dec6x:
>  	vncipher	$out5,$out5,v29
>  	lvx		v25,$x10,$key_		# re-pre-load round[2]
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  
>  	vncipher	$out0,$out0,v30
>  	vncipher	$out1,$out1,v30
>  	 vand		$tmp,$tmp,$eighty7
>  	vncipher	$out2,$out2,v30
>  	vncipher	$out3,$out3,v30
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		32+$in5, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in5
>  	vncipher	$out4,$out4,v30
>  	vncipher	$out5,$out5,v30
>  	 vxor		$in5,$twk5,v31
> @@ -3377,7 +3416,6 @@ Loop_xts_dec6x:
>  	vncipherlast	$out0,$out0,$in0
>  	 lvx_u		$in0,$x00,$inp		# load next input block
>  	 vaddubm	$tweak,$tweak,$tweak
> -	 vsldoi		$tmp,$tmp,$tmp,15
>  	vncipherlast	$out1,$out1,$in1
>  	 lvx_u		$in1,$x10,$inp
>  	vncipherlast	$out2,$out2,$in2
> @@ -3390,7 +3428,10 @@ Loop_xts_dec6x:
>  	vncipherlast	$out4,$out4,$in4
>  	 le?vperm	$in2,$in2,$in2,$leperm
>  	 lvx_u		$in4,$x40,$inp
> -	 vxor		$tweak,$tweak,$tmp
> +	 xxlor		10, 32+$in0, 32+$in0
> +	 xxlor		32+$in0, 0, 0
> +	 vpermxor	$tweak, $tweak, $tmp, $in0
> +	 xxlor		32+$in0, 10, 10
>  	vncipherlast	$out5,$out5,$in5
>  	 le?vperm	$in3,$in3,$in3,$leperm
>  	 lvx_u		$in5,$x50,$inp
> @@ -3421,6 +3462,8 @@ Loop_xts_dec6x:
>  	mtctr		$rounds
>  	beq		Loop_xts_dec6x		# did $len-=96 borrow?
>  
> +	xxlor		32+$eighty7, 2, 2	# 0x010101..87
> +
>  	addic.		$len,$len,0x60
>  	beq		Lxts_dec6x_zero
>  	cmpwi		$len,0x20
> -- 
> 2.31.1


More information about the Linuxppc-dev mailing list