[PATCH 3/4] crypto: powerpc - Add CRC-T10DIF acceleration

Daniel Axtens dja at axtens.net
Wed Mar 15 23:37:36 AEDT 2017


T10DIF is a CRC16 used heavily in NVMe.

It turns out we can accelerate it with a CRC32 library and a few
little tricks.

Provide the accelerator based the refactored CRC32 code.

Cc: Anton Blanchard <anton at samba.org>
Thanks-to: Hong Bo Peng <penghb at cn.ibm.com>
Signed-off-by: Daniel Axtens <dja at axtens.net>
---
 arch/powerpc/crypto/Makefile                |   2 +
 arch/powerpc/crypto/crct10dif-vpmsum_asm.S  | 850 ++++++++++++++++++++++++++++
 arch/powerpc/crypto/crct10dif-vpmsum_glue.c | 125 ++++
 crypto/Kconfig                              |   9 +
 4 files changed, 986 insertions(+)
 create mode 100644 arch/powerpc/crypto/crct10dif-vpmsum_asm.S
 create mode 100644 arch/powerpc/crypto/crct10dif-vpmsum_glue.c

diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 87f40454bad3..e66aaf19764d 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
 obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
 obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
 obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
@@ -17,3 +18,4 @@ sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
 sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
 sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
 crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o
+crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
new file mode 100644
index 000000000000..5e3d81a0af1b
--- /dev/null
+++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S
@@ -0,0 +1,850 @@
+/*
+ * Calculate a CRC T10DIF  with vpmsum acceleration
+ *
+ * Constants generated by crc32-vpmsum, available at
+ * https://github.com/antonblanchard/crc32-vpmsum
+ *
+ * crc32-vpmsum is
+ * Copyright (C) 2015 Anton Blanchard <anton at au.ibm.com>, IBM
+ * and is available under the GPL v2 or later.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+	.section	.rodata
+.balign 16
+
+.byteswap_constant:
+	/* byte reverse permute constant */
+	.octa 0x0F0E0D0C0B0A09080706050403020100
+
+.constants:
+
+	/* Reduce 262144 kbits to 1024 bits */
+	/* x^261184 mod p(x), x^261120 mod p(x) */
+	.octa 0x0000000056d300000000000052550000
+
+	/* x^260160 mod p(x), x^260096 mod p(x) */
+	.octa 0x00000000ee67000000000000a1e40000
+
+	/* x^259136 mod p(x), x^259072 mod p(x) */
+	.octa 0x0000000060830000000000004ad10000
+
+	/* x^258112 mod p(x), x^258048 mod p(x) */
+	.octa 0x000000008cfe0000000000009ab40000
+
+	/* x^257088 mod p(x), x^257024 mod p(x) */
+	.octa 0x000000003e93000000000000fdb50000
+
+	/* x^256064 mod p(x), x^256000 mod p(x) */
+	.octa 0x000000003c2000000000000045480000
+
+	/* x^255040 mod p(x), x^254976 mod p(x) */
+	.octa 0x00000000b1fc0000000000008d690000
+
+	/* x^254016 mod p(x), x^253952 mod p(x) */
+	.octa 0x00000000f82b00000000000024ad0000
+
+	/* x^252992 mod p(x), x^252928 mod p(x) */
+	.octa 0x0000000044420000000000009f1a0000
+
+	/* x^251968 mod p(x), x^251904 mod p(x) */
+	.octa 0x00000000e88c00000000000066ec0000
+
+	/* x^250944 mod p(x), x^250880 mod p(x) */
+	.octa 0x00000000385c000000000000c87d0000
+
+	/* x^249920 mod p(x), x^249856 mod p(x) */
+	.octa 0x000000003227000000000000c8ff0000
+
+	/* x^248896 mod p(x), x^248832 mod p(x) */
+	.octa 0x00000000a9a900000000000033440000
+
+	/* x^247872 mod p(x), x^247808 mod p(x) */
+	.octa 0x00000000abaa00000000000066eb0000
+
+	/* x^246848 mod p(x), x^246784 mod p(x) */
+	.octa 0x000000001ac3000000000000c4ef0000
+
+	/* x^245824 mod p(x), x^245760 mod p(x) */
+	.octa 0x0000000063f000000000000056f30000
+
+	/* x^244800 mod p(x), x^244736 mod p(x) */
+	.octa 0x0000000032cc00000000000002050000
+
+	/* x^243776 mod p(x), x^243712 mod p(x) */
+	.octa 0x00000000f8b5000000000000568e0000
+
+	/* x^242752 mod p(x), x^242688 mod p(x) */
+	.octa 0x000000008db100000000000064290000
+
+	/* x^241728 mod p(x), x^241664 mod p(x) */
+	.octa 0x0000000059ca0000000000006b660000
+
+	/* x^240704 mod p(x), x^240640 mod p(x) */
+	.octa 0x000000005f5c00000000000018f80000
+
+	/* x^239680 mod p(x), x^239616 mod p(x) */
+	.octa 0x0000000061af000000000000b6090000
+
+	/* x^238656 mod p(x), x^238592 mod p(x) */
+	.octa 0x00000000e29e000000000000099a0000
+
+	/* x^237632 mod p(x), x^237568 mod p(x) */
+	.octa 0x000000000975000000000000a8360000
+
+	/* x^236608 mod p(x), x^236544 mod p(x) */
+	.octa 0x0000000043900000000000004f570000
+
+	/* x^235584 mod p(x), x^235520 mod p(x) */
+	.octa 0x00000000f9cd000000000000134c0000
+
+	/* x^234560 mod p(x), x^234496 mod p(x) */
+	.octa 0x000000007c29000000000000ec380000
+
+	/* x^233536 mod p(x), x^233472 mod p(x) */
+	.octa 0x000000004c6a000000000000b0d10000
+
+	/* x^232512 mod p(x), x^232448 mod p(x) */
+	.octa 0x00000000e7290000000000007d3e0000
+
+	/* x^231488 mod p(x), x^231424 mod p(x) */
+	.octa 0x00000000f1ab000000000000f0b20000
+
+	/* x^230464 mod p(x), x^230400 mod p(x) */
+	.octa 0x0000000039db0000000000009c270000
+
+	/* x^229440 mod p(x), x^229376 mod p(x) */
+	.octa 0x000000005e2800000000000092890000
+
+	/* x^228416 mod p(x), x^228352 mod p(x) */
+	.octa 0x00000000d44e000000000000d5ee0000
+
+	/* x^227392 mod p(x), x^227328 mod p(x) */
+	.octa 0x00000000cd0a00000000000041f50000
+
+	/* x^226368 mod p(x), x^226304 mod p(x) */
+	.octa 0x00000000c5b400000000000010520000
+
+	/* x^225344 mod p(x), x^225280 mod p(x) */
+	.octa 0x00000000fd2100000000000042170000
+
+	/* x^224320 mod p(x), x^224256 mod p(x) */
+	.octa 0x000000002f2500000000000095c20000
+
+	/* x^223296 mod p(x), x^223232 mod p(x) */
+	.octa 0x000000001b0100000000000001ce0000
+
+	/* x^222272 mod p(x), x^222208 mod p(x) */
+	.octa 0x000000000d430000000000002aca0000
+
+	/* x^221248 mod p(x), x^221184 mod p(x) */
+	.octa 0x0000000030a6000000000000385e0000
+
+	/* x^220224 mod p(x), x^220160 mod p(x) */
+	.octa 0x00000000e37b0000000000006f7a0000
+
+	/* x^219200 mod p(x), x^219136 mod p(x) */
+	.octa 0x00000000873600000000000024320000
+
+	/* x^218176 mod p(x), x^218112 mod p(x) */
+	.octa 0x00000000e9fb000000000000bd9c0000
+
+	/* x^217152 mod p(x), x^217088 mod p(x) */
+	.octa 0x000000003b9500000000000054bc0000
+
+	/* x^216128 mod p(x), x^216064 mod p(x) */
+	.octa 0x00000000133e000000000000a4660000
+
+	/* x^215104 mod p(x), x^215040 mod p(x) */
+	.octa 0x00000000784500000000000079930000
+
+	/* x^214080 mod p(x), x^214016 mod p(x) */
+	.octa 0x00000000b9800000000000001bb80000
+
+	/* x^213056 mod p(x), x^212992 mod p(x) */
+	.octa 0x00000000687600000000000024400000
+
+	/* x^212032 mod p(x), x^211968 mod p(x) */
+	.octa 0x00000000aff300000000000029e10000
+
+	/* x^211008 mod p(x), x^210944 mod p(x) */
+	.octa 0x0000000024b50000000000005ded0000
+
+	/* x^209984 mod p(x), x^209920 mod p(x) */
+	.octa 0x0000000017e8000000000000b12e0000
+
+	/* x^208960 mod p(x), x^208896 mod p(x) */
+	.octa 0x00000000128400000000000026d20000
+
+	/* x^207936 mod p(x), x^207872 mod p(x) */
+	.octa 0x000000002115000000000000a32a0000
+
+	/* x^206912 mod p(x), x^206848 mod p(x) */
+	.octa 0x000000009595000000000000a1210000
+
+	/* x^205888 mod p(x), x^205824 mod p(x) */
+	.octa 0x00000000281e000000000000ee8b0000
+
+	/* x^204864 mod p(x), x^204800 mod p(x) */
+	.octa 0x0000000006010000000000003d0d0000
+
+	/* x^203840 mod p(x), x^203776 mod p(x) */
+	.octa 0x00000000e2b600000000000034e90000
+
+	/* x^202816 mod p(x), x^202752 mod p(x) */
+	.octa 0x000000001bd40000000000004cdb0000
+
+	/* x^201792 mod p(x), x^201728 mod p(x) */
+	.octa 0x00000000df2800000000000030e90000
+
+	/* x^200768 mod p(x), x^200704 mod p(x) */
+	.octa 0x0000000049c200000000000042590000
+
+	/* x^199744 mod p(x), x^199680 mod p(x) */
+	.octa 0x000000009b97000000000000df950000
+
+	/* x^198720 mod p(x), x^198656 mod p(x) */
+	.octa 0x000000006184000000000000da7b0000
+
+	/* x^197696 mod p(x), x^197632 mod p(x) */
+	.octa 0x00000000461700000000000012510000
+
+	/* x^196672 mod p(x), x^196608 mod p(x) */
+	.octa 0x000000009b40000000000000f37e0000
+
+	/* x^195648 mod p(x), x^195584 mod p(x) */
+	.octa 0x00000000eeb2000000000000ecf10000
+
+	/* x^194624 mod p(x), x^194560 mod p(x) */
+	.octa 0x00000000b2e800000000000050f20000
+
+	/* x^193600 mod p(x), x^193536 mod p(x) */
+	.octa 0x00000000f59a000000000000e0b30000
+
+	/* x^192576 mod p(x), x^192512 mod p(x) */
+	.octa 0x00000000467f0000000000004d5a0000
+
+	/* x^191552 mod p(x), x^191488 mod p(x) */
+	.octa 0x00000000da92000000000000bb010000
+
+	/* x^190528 mod p(x), x^190464 mod p(x) */
+	.octa 0x000000001e1000000000000022a40000
+
+	/* x^189504 mod p(x), x^189440 mod p(x) */
+	.octa 0x0000000058fe000000000000836f0000
+
+	/* x^188480 mod p(x), x^188416 mod p(x) */
+	.octa 0x00000000b9ce000000000000d78d0000
+
+	/* x^187456 mod p(x), x^187392 mod p(x) */
+	.octa 0x0000000022210000000000004f8d0000
+
+	/* x^186432 mod p(x), x^186368 mod p(x) */
+	.octa 0x00000000744600000000000033760000
+
+	/* x^185408 mod p(x), x^185344 mod p(x) */
+	.octa 0x000000001c2e000000000000a1e50000
+
+	/* x^184384 mod p(x), x^184320 mod p(x) */
+	.octa 0x00000000dcc8000000000000a1a40000
+
+	/* x^183360 mod p(x), x^183296 mod p(x) */
+	.octa 0x00000000910f00000000000019a20000
+
+	/* x^182336 mod p(x), x^182272 mod p(x) */
+	.octa 0x0000000055d5000000000000f6ae0000
+
+	/* x^181312 mod p(x), x^181248 mod p(x) */
+	.octa 0x00000000c8ba000000000000a7ac0000
+
+	/* x^180288 mod p(x), x^180224 mod p(x) */
+	.octa 0x0000000031f8000000000000eea20000
+
+	/* x^179264 mod p(x), x^179200 mod p(x) */
+	.octa 0x000000001966000000000000c4d90000
+
+	/* x^178240 mod p(x), x^178176 mod p(x) */
+	.octa 0x00000000b9810000000000002b470000
+
+	/* x^177216 mod p(x), x^177152 mod p(x) */
+	.octa 0x000000008303000000000000f7cf0000
+
+	/* x^176192 mod p(x), x^176128 mod p(x) */
+	.octa 0x000000002ce500000000000035b30000
+
+	/* x^175168 mod p(x), x^175104 mod p(x) */
+	.octa 0x000000002fae0000000000000c7c0000
+
+	/* x^174144 mod p(x), x^174080 mod p(x) */
+	.octa 0x00000000f50c0000000000009edf0000
+
+	/* x^173120 mod p(x), x^173056 mod p(x) */
+	.octa 0x00000000714f00000000000004cd0000
+
+	/* x^172096 mod p(x), x^172032 mod p(x) */
+	.octa 0x00000000c161000000000000541b0000
+
+	/* x^171072 mod p(x), x^171008 mod p(x) */
+	.octa 0x0000000021c8000000000000e2700000
+
+	/* x^170048 mod p(x), x^169984 mod p(x) */
+	.octa 0x00000000b93d00000000000009a60000
+
+	/* x^169024 mod p(x), x^168960 mod p(x) */
+	.octa 0x00000000fbcf000000000000761c0000
+
+	/* x^168000 mod p(x), x^167936 mod p(x) */
+	.octa 0x0000000026350000000000009db30000
+
+	/* x^166976 mod p(x), x^166912 mod p(x) */
+	.octa 0x00000000b64f0000000000003e9f0000
+
+	/* x^165952 mod p(x), x^165888 mod p(x) */
+	.octa 0x00000000bd0e00000000000078590000
+
+	/* x^164928 mod p(x), x^164864 mod p(x) */
+	.octa 0x00000000d9360000000000008bc80000
+
+	/* x^163904 mod p(x), x^163840 mod p(x) */
+	.octa 0x000000002f140000000000008c9f0000
+
+	/* x^162880 mod p(x), x^162816 mod p(x) */
+	.octa 0x000000006a270000000000006af70000
+
+	/* x^161856 mod p(x), x^161792 mod p(x) */
+	.octa 0x000000006685000000000000e5210000
+
+	/* x^160832 mod p(x), x^160768 mod p(x) */
+	.octa 0x0000000062da00000000000008290000
+
+	/* x^159808 mod p(x), x^159744 mod p(x) */
+	.octa 0x00000000bb4b000000000000e4d00000
+
+	/* x^158784 mod p(x), x^158720 mod p(x) */
+	.octa 0x00000000d2490000000000004ae10000
+
+	/* x^157760 mod p(x), x^157696 mod p(x) */
+	.octa 0x00000000c85b00000000000000e70000
+
+	/* x^156736 mod p(x), x^156672 mod p(x) */
+	.octa 0x00000000c37a00000000000015650000
+
+	/* x^155712 mod p(x), x^155648 mod p(x) */
+	.octa 0x0000000018530000000000001c2f0000
+
+	/* x^154688 mod p(x), x^154624 mod p(x) */
+	.octa 0x00000000b46600000000000037bd0000
+
+	/* x^153664 mod p(x), x^153600 mod p(x) */
+	.octa 0x00000000439b00000000000012190000
+
+	/* x^152640 mod p(x), x^152576 mod p(x) */
+	.octa 0x00000000b1260000000000005ece0000
+
+	/* x^151616 mod p(x), x^151552 mod p(x) */
+	.octa 0x00000000d8110000000000002a5e0000
+
+	/* x^150592 mod p(x), x^150528 mod p(x) */
+	.octa 0x00000000099f00000000000052330000
+
+	/* x^149568 mod p(x), x^149504 mod p(x) */
+	.octa 0x00000000f9f9000000000000f9120000
+
+	/* x^148544 mod p(x), x^148480 mod p(x) */
+	.octa 0x000000005cc00000000000000ddc0000
+
+	/* x^147520 mod p(x), x^147456 mod p(x) */
+	.octa 0x00000000343b00000000000012200000
+
+	/* x^146496 mod p(x), x^146432 mod p(x) */
+	.octa 0x000000009222000000000000d12b0000
+
+	/* x^145472 mod p(x), x^145408 mod p(x) */
+	.octa 0x00000000d781000000000000eb2d0000
+
+	/* x^144448 mod p(x), x^144384 mod p(x) */
+	.octa 0x000000000bf400000000000058970000
+
+	/* x^143424 mod p(x), x^143360 mod p(x) */
+	.octa 0x00000000094200000000000013690000
+
+	/* x^142400 mod p(x), x^142336 mod p(x) */
+	.octa 0x00000000d55100000000000051950000
+
+	/* x^141376 mod p(x), x^141312 mod p(x) */
+	.octa 0x000000008f11000000000000954b0000
+
+	/* x^140352 mod p(x), x^140288 mod p(x) */
+	.octa 0x00000000140f000000000000b29e0000
+
+	/* x^139328 mod p(x), x^139264 mod p(x) */
+	.octa 0x00000000c6db000000000000db5d0000
+
+	/* x^138304 mod p(x), x^138240 mod p(x) */
+	.octa 0x00000000715b000000000000dfaf0000
+
+	/* x^137280 mod p(x), x^137216 mod p(x) */
+	.octa 0x000000000dea000000000000e3b60000
+
+	/* x^136256 mod p(x), x^136192 mod p(x) */
+	.octa 0x000000006f94000000000000ddaf0000
+
+	/* x^135232 mod p(x), x^135168 mod p(x) */
+	.octa 0x0000000024e1000000000000e4f70000
+
+	/* x^134208 mod p(x), x^134144 mod p(x) */
+	.octa 0x000000008810000000000000aa110000
+
+	/* x^133184 mod p(x), x^133120 mod p(x) */
+	.octa 0x0000000030c2000000000000a8e60000
+
+	/* x^132160 mod p(x), x^132096 mod p(x) */
+	.octa 0x00000000e6d0000000000000ccf30000
+
+	/* x^131136 mod p(x), x^131072 mod p(x) */
+	.octa 0x000000004da000000000000079bf0000
+
+	/* x^130112 mod p(x), x^130048 mod p(x) */
+	.octa 0x000000007759000000000000b3a30000
+
+	/* x^129088 mod p(x), x^129024 mod p(x) */
+	.octa 0x00000000597400000000000028790000
+
+	/* x^128064 mod p(x), x^128000 mod p(x) */
+	.octa 0x000000007acd000000000000b5820000
+
+	/* x^127040 mod p(x), x^126976 mod p(x) */
+	.octa 0x00000000e6e400000000000026ad0000
+
+	/* x^126016 mod p(x), x^125952 mod p(x) */
+	.octa 0x000000006d49000000000000985b0000
+
+	/* x^124992 mod p(x), x^124928 mod p(x) */
+	.octa 0x000000000f0800000000000011520000
+
+	/* x^123968 mod p(x), x^123904 mod p(x) */
+	.octa 0x000000002c7f000000000000846c0000
+
+	/* x^122944 mod p(x), x^122880 mod p(x) */
+	.octa 0x000000005ce7000000000000ae1d0000
+
+	/* x^121920 mod p(x), x^121856 mod p(x) */
+	.octa 0x00000000d4cb000000000000e21d0000
+
+	/* x^120896 mod p(x), x^120832 mod p(x) */
+	.octa 0x000000003a2300000000000019bb0000
+
+	/* x^119872 mod p(x), x^119808 mod p(x) */
+	.octa 0x000000000e1700000000000095290000
+
+	/* x^118848 mod p(x), x^118784 mod p(x) */
+	.octa 0x000000006e6400000000000050d20000
+
+	/* x^117824 mod p(x), x^117760 mod p(x) */
+	.octa 0x000000008d5c0000000000000cd10000
+
+	/* x^116800 mod p(x), x^116736 mod p(x) */
+	.octa 0x00000000ef310000000000007b570000
+
+	/* x^115776 mod p(x), x^115712 mod p(x) */
+	.octa 0x00000000645d00000000000053d60000
+
+	/* x^114752 mod p(x), x^114688 mod p(x) */
+	.octa 0x0000000018fc00000000000077510000
+
+	/* x^113728 mod p(x), x^113664 mod p(x) */
+	.octa 0x000000000cb3000000000000a7b70000
+
+	/* x^112704 mod p(x), x^112640 mod p(x) */
+	.octa 0x00000000991b000000000000d0780000
+
+	/* x^111680 mod p(x), x^111616 mod p(x) */
+	.octa 0x00000000845a000000000000be3c0000
+
+	/* x^110656 mod p(x), x^110592 mod p(x) */
+	.octa 0x00000000d3a9000000000000df020000
+
+	/* x^109632 mod p(x), x^109568 mod p(x) */
+	.octa 0x0000000017d7000000000000063e0000
+
+	/* x^108608 mod p(x), x^108544 mod p(x) */
+	.octa 0x000000007a860000000000008ab40000
+
+	/* x^107584 mod p(x), x^107520 mod p(x) */
+	.octa 0x00000000fd7c000000000000c7bd0000
+
+	/* x^106560 mod p(x), x^106496 mod p(x) */
+	.octa 0x00000000a56b000000000000efd60000
+
+	/* x^105536 mod p(x), x^105472 mod p(x) */
+	.octa 0x0000000010e400000000000071380000
+
+	/* x^104512 mod p(x), x^104448 mod p(x) */
+	.octa 0x00000000994500000000000004d30000
+
+	/* x^103488 mod p(x), x^103424 mod p(x) */
+	.octa 0x00000000b83c0000000000003b0e0000
+
+	/* x^102464 mod p(x), x^102400 mod p(x) */
+	.octa 0x00000000d6c10000000000008b020000
+
+	/* x^101440 mod p(x), x^101376 mod p(x) */
+	.octa 0x000000009efc000000000000da940000
+
+	/* x^100416 mod p(x), x^100352 mod p(x) */
+	.octa 0x000000005e87000000000000f9f70000
+
+	/* x^99392 mod p(x), x^99328 mod p(x) */
+	.octa 0x000000006c9b00000000000045e40000
+
+	/* x^98368 mod p(x), x^98304 mod p(x) */
+	.octa 0x00000000178a00000000000083940000
+
+	/* x^97344 mod p(x), x^97280 mod p(x) */
+	.octa 0x00000000f0c8000000000000f0a00000
+
+	/* x^96320 mod p(x), x^96256 mod p(x) */
+	.octa 0x00000000f699000000000000b74b0000
+
+	/* x^95296 mod p(x), x^95232 mod p(x) */
+	.octa 0x00000000316d000000000000c1cf0000
+
+	/* x^94272 mod p(x), x^94208 mod p(x) */
+	.octa 0x00000000987e00000000000072680000
+
+	/* x^93248 mod p(x), x^93184 mod p(x) */
+	.octa 0x00000000acff000000000000e0ab0000
+
+	/* x^92224 mod p(x), x^92160 mod p(x) */
+	.octa 0x00000000a1f6000000000000c5a80000
+
+	/* x^91200 mod p(x), x^91136 mod p(x) */
+	.octa 0x0000000061bd000000000000cf690000
+
+	/* x^90176 mod p(x), x^90112 mod p(x) */
+	.octa 0x00000000c9f2000000000000cbcc0000
+
+	/* x^89152 mod p(x), x^89088 mod p(x) */
+	.octa 0x000000005a33000000000000de050000
+
+	/* x^88128 mod p(x), x^88064 mod p(x) */
+	.octa 0x00000000e416000000000000ccd70000
+
+	/* x^87104 mod p(x), x^87040 mod p(x) */
+	.octa 0x0000000058930000000000002f670000
+
+	/* x^86080 mod p(x), x^86016 mod p(x) */
+	.octa 0x00000000a9d3000000000000152f0000
+
+	/* x^85056 mod p(x), x^84992 mod p(x) */
+	.octa 0x00000000c114000000000000ecc20000
+
+	/* x^84032 mod p(x), x^83968 mod p(x) */
+	.octa 0x00000000b9270000000000007c890000
+
+	/* x^83008 mod p(x), x^82944 mod p(x) */
+	.octa 0x000000002e6000000000000006ee0000
+
+	/* x^81984 mod p(x), x^81920 mod p(x) */
+	.octa 0x00000000dfc600000000000009100000
+
+	/* x^80960 mod p(x), x^80896 mod p(x) */
+	.octa 0x000000004911000000000000ad4e0000
+
+	/* x^79936 mod p(x), x^79872 mod p(x) */
+	.octa 0x00000000ae1b000000000000b04d0000
+
+	/* x^78912 mod p(x), x^78848 mod p(x) */
+	.octa 0x0000000005fa000000000000e9900000
+
+	/* x^77888 mod p(x), x^77824 mod p(x) */
+	.octa 0x0000000004a1000000000000cc6f0000
+
+	/* x^76864 mod p(x), x^76800 mod p(x) */
+	.octa 0x00000000af73000000000000ed110000
+
+	/* x^75840 mod p(x), x^75776 mod p(x) */
+	.octa 0x0000000082530000000000008f7e0000
+
+	/* x^74816 mod p(x), x^74752 mod p(x) */
+	.octa 0x00000000cfdc000000000000594f0000
+
+	/* x^73792 mod p(x), x^73728 mod p(x) */
+	.octa 0x00000000a6b6000000000000a8750000
+
+	/* x^72768 mod p(x), x^72704 mod p(x) */
+	.octa 0x00000000fd76000000000000aa0c0000
+
+	/* x^71744 mod p(x), x^71680 mod p(x) */
+	.octa 0x0000000006f500000000000071db0000
+
+	/* x^70720 mod p(x), x^70656 mod p(x) */
+	.octa 0x0000000037ca000000000000ab0c0000
+
+	/* x^69696 mod p(x), x^69632 mod p(x) */
+	.octa 0x00000000d7ab000000000000b7a00000
+
+	/* x^68672 mod p(x), x^68608 mod p(x) */
+	.octa 0x00000000440800000000000090d30000
+
+	/* x^67648 mod p(x), x^67584 mod p(x) */
+	.octa 0x00000000186100000000000054730000
+
+	/* x^66624 mod p(x), x^66560 mod p(x) */
+	.octa 0x000000007368000000000000a3a20000
+
+	/* x^65600 mod p(x), x^65536 mod p(x) */
+	.octa 0x0000000026d0000000000000f9040000
+
+	/* x^64576 mod p(x), x^64512 mod p(x) */
+	.octa 0x00000000fe770000000000009c0a0000
+
+	/* x^63552 mod p(x), x^63488 mod p(x) */
+	.octa 0x000000002cba000000000000d1e70000
+
+	/* x^62528 mod p(x), x^62464 mod p(x) */
+	.octa 0x00000000f8bd0000000000005ac10000
+
+	/* x^61504 mod p(x), x^61440 mod p(x) */
+	.octa 0x000000007372000000000000d68d0000
+
+	/* x^60480 mod p(x), x^60416 mod p(x) */
+	.octa 0x00000000f37f00000000000089f60000
+
+	/* x^59456 mod p(x), x^59392 mod p(x) */
+	.octa 0x00000000078400000000000008a90000
+
+	/* x^58432 mod p(x), x^58368 mod p(x) */
+	.octa 0x00000000d3e400000000000042360000
+
+	/* x^57408 mod p(x), x^57344 mod p(x) */
+	.octa 0x00000000eba800000000000092d50000
+
+	/* x^56384 mod p(x), x^56320 mod p(x) */
+	.octa 0x00000000afbe000000000000b4d50000
+
+	/* x^55360 mod p(x), x^55296 mod p(x) */
+	.octa 0x00000000d8ca000000000000c9060000
+
+	/* x^54336 mod p(x), x^54272 mod p(x) */
+	.octa 0x00000000c2d00000000000008f4f0000
+
+	/* x^53312 mod p(x), x^53248 mod p(x) */
+	.octa 0x00000000373200000000000028690000
+
+	/* x^52288 mod p(x), x^52224 mod p(x) */
+	.octa 0x0000000046ae000000000000c3b30000
+
+	/* x^51264 mod p(x), x^51200 mod p(x) */
+	.octa 0x00000000b243000000000000f8700000
+
+	/* x^50240 mod p(x), x^50176 mod p(x) */
+	.octa 0x00000000f7f500000000000029eb0000
+
+	/* x^49216 mod p(x), x^49152 mod p(x) */
+	.octa 0x000000000c7e000000000000fe730000
+
+	/* x^48192 mod p(x), x^48128 mod p(x) */
+	.octa 0x00000000c38200000000000096000000
+
+	/* x^47168 mod p(x), x^47104 mod p(x) */
+	.octa 0x000000008956000000000000683c0000
+
+	/* x^46144 mod p(x), x^46080 mod p(x) */
+	.octa 0x00000000422d0000000000005f1e0000
+
+	/* x^45120 mod p(x), x^45056 mod p(x) */
+	.octa 0x00000000ac0f0000000000006f810000
+
+	/* x^44096 mod p(x), x^44032 mod p(x) */
+	.octa 0x00000000ce30000000000000031f0000
+
+	/* x^43072 mod p(x), x^43008 mod p(x) */
+	.octa 0x000000003d43000000000000455a0000
+
+	/* x^42048 mod p(x), x^41984 mod p(x) */
+	.octa 0x000000007ebe000000000000a6050000
+
+	/* x^41024 mod p(x), x^40960 mod p(x) */
+	.octa 0x00000000976e00000000000077eb0000
+
+	/* x^40000 mod p(x), x^39936 mod p(x) */
+	.octa 0x000000000872000000000000389c0000
+
+	/* x^38976 mod p(x), x^38912 mod p(x) */
+	.octa 0x000000008979000000000000c7b20000
+
+	/* x^37952 mod p(x), x^37888 mod p(x) */
+	.octa 0x000000005c1e0000000000001d870000
+
+	/* x^36928 mod p(x), x^36864 mod p(x) */
+	.octa 0x00000000aebb00000000000045810000
+
+	/* x^35904 mod p(x), x^35840 mod p(x) */
+	.octa 0x000000004f7e0000000000006d4a0000
+
+	/* x^34880 mod p(x), x^34816 mod p(x) */
+	.octa 0x00000000ea98000000000000b9200000
+
+	/* x^33856 mod p(x), x^33792 mod p(x) */
+	.octa 0x00000000f39600000000000022f20000
+
+	/* x^32832 mod p(x), x^32768 mod p(x) */
+	.octa 0x000000000bc500000000000041ca0000
+
+	/* x^31808 mod p(x), x^31744 mod p(x) */
+	.octa 0x00000000786400000000000078500000
+
+	/* x^30784 mod p(x), x^30720 mod p(x) */
+	.octa 0x00000000be970000000000009e7e0000
+
+	/* x^29760 mod p(x), x^29696 mod p(x) */
+	.octa 0x00000000dd6d000000000000a53c0000
+
+	/* x^28736 mod p(x), x^28672 mod p(x) */
+	.octa 0x000000004c3f00000000000039340000
+
+	/* x^27712 mod p(x), x^27648 mod p(x) */
+	.octa 0x0000000093a4000000000000b58e0000
+
+	/* x^26688 mod p(x), x^26624 mod p(x) */
+	.octa 0x0000000050fb00000000000062d40000
+
+	/* x^25664 mod p(x), x^25600 mod p(x) */
+	.octa 0x00000000f505000000000000a26f0000
+
+	/* x^24640 mod p(x), x^24576 mod p(x) */
+	.octa 0x0000000064f900000000000065e60000
+
+	/* x^23616 mod p(x), x^23552 mod p(x) */
+	.octa 0x00000000e8c2000000000000aad90000
+
+	/* x^22592 mod p(x), x^22528 mod p(x) */
+	.octa 0x00000000720b000000000000a3b00000
+
+	/* x^21568 mod p(x), x^21504 mod p(x) */
+	.octa 0x00000000e992000000000000d2680000
+
+	/* x^20544 mod p(x), x^20480 mod p(x) */
+	.octa 0x000000009132000000000000cf4c0000
+
+	/* x^19520 mod p(x), x^19456 mod p(x) */
+	.octa 0x00000000608a00000000000076610000
+
+	/* x^18496 mod p(x), x^18432 mod p(x) */
+	.octa 0x000000009948000000000000fb9f0000
+
+	/* x^17472 mod p(x), x^17408 mod p(x) */
+	.octa 0x00000000173000000000000003770000
+
+	/* x^16448 mod p(x), x^16384 mod p(x) */
+	.octa 0x000000006fe300000000000004880000
+
+	/* x^15424 mod p(x), x^15360 mod p(x) */
+	.octa 0x00000000e15300000000000056a70000
+
+	/* x^14400 mod p(x), x^14336 mod p(x) */
+	.octa 0x0000000092d60000000000009dfd0000
+
+	/* x^13376 mod p(x), x^13312 mod p(x) */
+	.octa 0x0000000002fd00000000000074c80000
+
+	/* x^12352 mod p(x), x^12288 mod p(x) */
+	.octa 0x00000000c78b000000000000a3ec0000
+
+	/* x^11328 mod p(x), x^11264 mod p(x) */
+	.octa 0x000000009262000000000000b3530000
+
+	/* x^10304 mod p(x), x^10240 mod p(x) */
+	.octa 0x0000000084f200000000000047bf0000
+
+	/* x^9280 mod p(x), x^9216 mod p(x) */
+	.octa 0x0000000067ee000000000000e97c0000
+
+	/* x^8256 mod p(x), x^8192 mod p(x) */
+	.octa 0x00000000535b00000000000091e10000
+
+	/* x^7232 mod p(x), x^7168 mod p(x) */
+	.octa 0x000000007ebb00000000000055060000
+
+	/* x^6208 mod p(x), x^6144 mod p(x) */
+	.octa 0x00000000c6a1000000000000fd360000
+
+	/* x^5184 mod p(x), x^5120 mod p(x) */
+	.octa 0x000000001be500000000000055860000
+
+	/* x^4160 mod p(x), x^4096 mod p(x) */
+	.octa 0x00000000ae0e0000000000005bd00000
+
+	/* x^3136 mod p(x), x^3072 mod p(x) */
+	.octa 0x0000000022040000000000008db20000
+
+	/* x^2112 mod p(x), x^2048 mod p(x) */
+	.octa 0x00000000c9eb000000000000efe20000
+
+	/* x^1088 mod p(x), x^1024 mod p(x) */
+	.octa 0x0000000039b400000000000051d10000
+
+.short_constants:
+
+	/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+	/* x^2048 mod p(x), x^2016 mod p(x), x^1984 mod p(x), x^1952 mod p(x) */
+	.octa 0xefe20000dccf00009440000033590000
+
+	/* x^1920 mod p(x), x^1888 mod p(x), x^1856 mod p(x), x^1824 mod p(x) */
+	.octa 0xee6300002f3f000062180000e0ed0000
+
+	/* x^1792 mod p(x), x^1760 mod p(x), x^1728 mod p(x), x^1696 mod p(x) */
+	.octa 0xcf5f000017ef0000ccbe000023d30000
+
+	/* x^1664 mod p(x), x^1632 mod p(x), x^1600 mod p(x), x^1568 mod p(x) */
+	.octa 0x6d0c0000a30e00000920000042630000
+
+	/* x^1536 mod p(x), x^1504 mod p(x), x^1472 mod p(x), x^1440 mod p(x) */
+	.octa 0x21d30000932b0000a7a00000efcc0000
+
+	/* x^1408 mod p(x), x^1376 mod p(x), x^1344 mod p(x), x^1312 mod p(x) */
+	.octa 0x10be00000b310000666f00000d1c0000
+
+	/* x^1280 mod p(x), x^1248 mod p(x), x^1216 mod p(x), x^1184 mod p(x) */
+	.octa 0x1f240000ce9e0000caad0000589e0000
+
+	/* x^1152 mod p(x), x^1120 mod p(x), x^1088 mod p(x), x^1056 mod p(x) */
+	.octa 0x29610000d02b000039b400007cf50000
+
+	/* x^1024 mod p(x), x^992 mod p(x), x^960 mod p(x), x^928 mod p(x) */
+	.octa 0x51d100009d9d00003c0e0000bfd60000
+
+	/* x^896 mod p(x), x^864 mod p(x), x^832 mod p(x), x^800 mod p(x) */
+	.octa 0xda390000ceae000013830000713c0000
+
+	/* x^768 mod p(x), x^736 mod p(x), x^704 mod p(x), x^672 mod p(x) */
+	.octa 0xb67800001e16000085c0000080a60000
+
+	/* x^640 mod p(x), x^608 mod p(x), x^576 mod p(x), x^544 mod p(x) */
+	.octa 0x0db40000f7f90000371d0000e6580000
+
+	/* x^512 mod p(x), x^480 mod p(x), x^448 mod p(x), x^416 mod p(x) */
+	.octa 0x87e70000044c0000aadb0000a4970000
+
+	/* x^384 mod p(x), x^352 mod p(x), x^320 mod p(x), x^288 mod p(x) */
+	.octa 0x1f990000ad180000d8b30000e7b50000
+
+	/* x^256 mod p(x), x^224 mod p(x), x^192 mod p(x), x^160 mod p(x) */
+	.octa 0xbe6c00006ee300004c1a000006df0000
+
+	/* x^128 mod p(x), x^96 mod p(x), x^64 mod p(x), x^32 mod p(x) */
+	.octa 0xfb0b00002d560000136800008bb70000
+
+
+.barrett_constants:
+	/* Barrett constant m - (4^32)/n */
+	.octa 0x000000000000000000000001f65a57f8	/* x^64 div p(x) */
+	/* Barrett constant n */
+	.octa 0x0000000000000000000000018bb70000
+
+#define CRC_FUNCTION_NAME __crct10dif_vpmsum
+#include "crc32-vpmsum_core.S"
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
new file mode 100644
index 000000000000..3ab7ba159c20
--- /dev/null
+++ b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
@@ -0,0 +1,125 @@
+/*
+ * Calculate a CRC T10-DIF with vpmsum acceleration
+ *
+ * Copyright 2017, Daniel Axtens, IBM Corporation.
+ * [based on crc32c-vpmsum_glue.c]
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ */
+
+#include <linux/crc-t10dif.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/switch_to.h>
+
+#define VMX_ALIGN		16
+#define VMX_ALIGN_MASK		(VMX_ALIGN-1)
+
+#define VECTOR_BREAKPOINT	64
+
+u32 __crct10dif_vpmsum(u32 crc, unsigned char const *p, size_t len);
+
+static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len)
+{
+	unsigned int prealign;
+	unsigned int tail;
+	u32 crc = crci;
+
+	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt())
+		return crc_t10dif_generic(crc, p, len);
+
+	if ((unsigned long)p & VMX_ALIGN_MASK) {
+		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
+		crc = crc_t10dif_generic(crc, p, prealign);
+		len -= prealign;
+		p += prealign;
+	}
+
+	if (len & ~VMX_ALIGN_MASK) {
+		crc <<= 16;
+		pagefault_disable();
+		enable_kernel_altivec();
+		crc = __crct10dif_vpmsum(crc, p, len & ~VMX_ALIGN_MASK);
+		pagefault_enable();
+		crc >>= 16;
+	}
+
+	tail = len & VMX_ALIGN_MASK;
+	if (tail) {
+		p += len & ~VMX_ALIGN_MASK;
+		crc = crc_t10dif_generic(crc, p, tail);
+	}
+
+	return crc & 0xffff;
+}
+
+static int crct10dif_vpmsum_init(struct shash_desc *desc)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = 0;
+	return 0;
+}
+
+static int crct10dif_vpmsum_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int length)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = crct10dif_vpmsum(*crc, data, length);
+	
+	return 0;
+}
+
+
+static int crct10dif_vpmsum_final(struct shash_desc *desc, u8 *out)
+{
+	u16 *crcp = shash_desc_ctx(desc);
+
+	*(u16 *)out = *crcp;
+	return 0;
+}
+
+static struct shash_alg alg = {
+	.init		= crct10dif_vpmsum_init,
+	.update		= crct10dif_vpmsum_update,
+	.final		= crct10dif_vpmsum_final,
+	.descsize	= CRC_T10DIF_DIGEST_SIZE,
+	.digestsize	= CRC_T10DIF_DIGEST_SIZE,
+	.base		= {
+		.cra_name		= "crct10dif",
+		.cra_driver_name	= "crct10dif-vpmsum",
+		.cra_priority		= 200,
+		.cra_blocksize		= CRC_T10DIF_BLOCK_SIZE,
+		.cra_module		= THIS_MODULE,
+	}
+};
+
+static int __init crct10dif_vpmsum_mod_init(void)
+{
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return -ENODEV;
+
+	return crypto_register_shash(&alg);
+}
+
+static void __exit crct10dif_vpmsum_mod_fini(void)
+{
+	crypto_unregister_shash(&alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crct10dif_vpmsum_mod_init);
+module_exit(crct10dif_vpmsum_mod_fini);
+
+MODULE_AUTHOR("Daniel Axtens <dja at axtens.net>");
+MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-vpmsum");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index f37e9cca50e1..9cf63dd84364 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -513,6 +513,15 @@ config CRYPTO_CRCT10DIF_PCLMUL
 	  'crct10dif-plcmul' module, which is faster when computing the
 	  crct10dif checksum as compared with the generic table implementation.
 
+config CRYPTO_CRCT10DIF_VPMSUM
+	tristate "CRC32T10DIF powerpc64 hardware acceleration"
+	depends on PPC64 && ALTIVEC && CRC_T10DIF
+	select CRYPTO_HASH
+	help
+	  CRC10T10DIF algorithm implemented using vector polynomial
+	  multiply-sum (vpmsum) instructions, introduced in POWER8. Enable on
+	  POWER8 and newer processors for improved performance.
+
 config CRYPTO_GHASH
 	tristate "GHASH digest algorithm"
 	select CRYPTO_GF128MUL
-- 
2.9.3



More information about the Linuxppc-dev mailing list