[PATCH 03/11] crypto: add software 842 decompression

Wed Apr 8 03:34:22 AEST 2015

Add an 842-format software decompression function.  Update the MAINTAINERS
842 section to include the new files.

This decompression function can decompress any standard-format 842
compressed data.  The 842 compressed format is explained in the header
comments.  This general-use decompression function is required by later
patches that update the crypto 842 driver to fall back to software 842
decompression if the NX-842 hardware fails and/or returns an error.

Signed-off-by: Dan Streetman <ddstreet at ieee.org>
---
 MAINTAINERS              |   2 +
 include/linux/sw842.h    |   7 +
 lib/842/842_decompress.c | 413 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/842/Makefile         |   1 +
 lib/Kconfig              |   3 +
 lib/Makefile             |   1 +
 6 files changed, 427 insertions(+)
 create mode 100644 include/linux/sw842.h
 create mode 100644 lib/842/842_decompress.c
 create mode 100644 lib/842/Makefile

diff --git a/MAINTAINERS b/MAINTAINERS
index efbcb50..3dc973a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4836,6 +4836,8 @@ M:	Dan Streetman <ddstreet at us.ibm.com>
 S:	Supported
 F:	drivers/crypto/nx/nx-842.c
 F:	include/linux/nx842.h
+F:	include/linux/sw842.h
+F:	lib/842/
 
 IBM Power Linux RAID adapter
 M:	Brian King <brking at us.ibm.com>
diff --git a/include/linux/sw842.h b/include/linux/sw842.h
new file mode 100644
index 0000000..aa8d86e
--- /dev/null
+++ b/include/linux/sw842.h
@@ -0,0 +1,7 @@
+#ifndef __SW842_H__
+#define __SW842_H__
+
+int sw842_decompress(const unsigned char *src, int srclen,
+			unsigned char *dst, int *destlen);
+
+#endif
diff --git a/lib/842/842_decompress.c b/lib/842/842_decompress.c
new file mode 100644
index 0000000..9fc0ffc
--- /dev/null
+++ b/lib/842/842_decompress.c
@@ -0,0 +1,413 @@
+/*
+ * 842 Decompressor
+ *
+ * Copyright (C) 2015 Dan Streetman, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * The 842 compressed format is made up of multiple blocks, each of
+ * which have the format:
+ *
+ * <template>[arg1][arg2][arg3][arg4]
+ *
+ * where there are between 0 and 4 template args, depending on the specific
+ * template operation.  For normal operations, each arg is either a specific
+ * number of data bytes to add to the output stream, or an index pointing
+ * to a previously-written number of data bytes to copy to the output stream.
+ *
+ * The template code is a 5-bit value.  This code indicates what to
+ * do with the following data.  Template codes from 0 to 0x19 should
+ * use the template table, the static "ops" table in the code below.
+ * For each template (table row), there are between 1 and 4 actions;
+ * each action corresponds to an arg following the template code
+ * bits.  Each action is either a "data" type action, or a "index"
+ * type action, and each action results in 2, 4, or 8 bytes being
+ * written to the output stream.  Each template (i.e. all actions in
+ * the table row) will add up to 8 bytes being written to the output
+ * stream.  Any row with less than 4 actions is padded with noop
+ * actions, indicated by N0 (for which there is no corresponding arg
+ * in the compressed data stream).
+ *
+ * "Data" actions, indicated in the table by D2, D4, and D8, mean that
+ * the corresponding arg is 2, 4, or 8 bytes, respectively, in the
+ * compressed data stream should be copied directly to the output stream.
+ *
+ * "Index" actions, indicated in the table by I2, I4, and I8, mean
+ * the corresponding arg is an index parameter that points to,
+ * respectively, a 2, 4, or 8 byte value already in the output
+ * stream, that should be copied to the end of the output stream.
+ * Essentially, the index points to a position in a ring buffer that
+ * contains the last N bytes of output stream data.  The number of bits
+ * for each index's arg are: 8 bits for I2, 9 bits for I4, and 8 bits for
+ * I8.  Since each index points to a 2, 4, or 8 byte section, this means
+ * that I2 can reference 512 bytes ((2^8 bits = 256) * 2 bytes), I4 can
+ * reference 2048 bytes ((2^9 = 512) * 4 bytes), and I8 can reference
+ * 2048 bytes ((2^8 = 256) * 8 bytes).  Think of it as a dedicated ring
+ * buffer for each of I2, I4, and I8 that are updated for each byte
+ * written to the output stream.  In this implementation, the output stream
+ * is directly used for each index; there is no additional memory required.
+ * Note that the index is into a ring buffer, not a sliding window;
+ * for example, if there have been 260 bytes written to the output stream,
+ * an I2 index of 0 would index to byte 256 in the output stream, while
+ * an I2 index of 16 would index to byte 16 in the output stream.
+ *
+ * There are also 3 special template codes; 0x1b for "repeat", 0x1c for
+ * "zeros", and 0x1e for "end".  The "repeat" operation is followed by
+ * a 6 bit arg N indicating how many times to repeat.  The last 8
+ * bytes written to the output stream are written again to the output
+ * stream, N + 1 times.  The "zeros" operation, which has no arg bits,
+ * writes 8 zeros to the output stream.  The "end" operation, which also
+ * has no arg bits, signals the end of the compressed data.  There may
+ * be some number of padding (don't care, but usually 0) bits after
+ * the "end" operation bits, to fill the stream length to a specific
+ * byte multiple (usually a multiple of 8, 16, or 32 bytes).
+ *
+ * After all actions for each operation code are processed, another
+ * template code is in the next 5 bits.  The decompression ends
+ * once the "end" template code is detected.
+ */
+
+#ifndef STATIC
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+
+#include <linux/sw842.h>
+
+/* special templates */
+#define OP_REPEAT	(0x1B)
+#define OP_ZEROS	(0x1C)
+#define OP_END		(0x1E)
+
+/* additional bits of each op param */
+#define OP_BITS		(5)
+#define REPEAT_BITS	(6)
+#define I2_BITS		(8)
+#define I4_BITS		(9)
+#define I8_BITS		(8)
+
+/* rolling fifo sizes */
+#define I2_FIFO_SIZE	(512)
+#define I4_FIFO_SIZE	(2048)
+#define I8_FIFO_SIZE	(2048)
+
+/* Arbitrary values used to indicate action */
+#define OP_ACTION	(0x30)
+#define OP_ACTION_NOOP	(0x00)
+#define OP_ACTION_DATA	(0x10)
+#define OP_ACTION_INDEX	(0x20)
+#define OP_AMOUNT	(0x0f)
+#define OP_AMOUNT_0	(0x00)
+#define OP_AMOUNT_2	(0x02)
+#define OP_AMOUNT_4	(0x04)
+#define OP_AMOUNT_8	(0x08)
+
+#define D2		(OP_ACTION_DATA  | OP_AMOUNT_2)
+#define D4		(OP_ACTION_DATA  | OP_AMOUNT_4)
+#define D8		(OP_ACTION_DATA  | OP_AMOUNT_8)
+#define I2		(OP_ACTION_INDEX | OP_AMOUNT_2)
+#define I4		(OP_ACTION_INDEX | OP_AMOUNT_4)
+#define I8		(OP_ACTION_INDEX | OP_AMOUNT_8)
+#define N0		(OP_ACTION_NOOP  | OP_AMOUNT_0)
+
+#define OPS_MAX		(0x19)
+
+static u8 ops[OPS_MAX + 1][4] = {
+	{ D8, N0, N0, N0 },
+	{ D4, D2, I2, N0 },
+	{ D4, I2, D2, N0 },
+	{ D4, I2, I2, N0 },
+	{ D4, I4, N0, N0 },
+	{ D2, I2, D4, N0 },
+	{ D2, I2, D2, I2 },
+	{ D2, I2, I2, D2 },
+	{ D2, I2, I2, I2 },
+	{ D2, I2, I4, N0 },
+	{ I2, D2, D4, N0 },
+	{ I2, D4, I2, N0 },
+	{ I2, D2, I2, D2 },
+	{ I2, D2, I2, I2 },
+	{ I2, D2, I4, N0 },
+	{ I2, I2, D4, N0 },
+	{ I2, I2, D2, I2 },
+	{ I2, I2, I2, D2 },
+	{ I2, I2, I2, I2 },
+	{ I2, I2, I4, N0 },
+	{ I4, D4, N0, N0 },
+	{ I4, D2, I2, N0 },
+	{ I4, I2, D2, N0 },
+	{ I4, I2, I2, N0 },
+	{ I4, I4, N0, N0 },
+	{ I8, N0, N0, N0 }
+};
+
+struct sw842_param {
+	u8 *in;
+	int bit;
+	int ilen;
+	u8 *out;
+	u8 *ostart;
+	int olen;
+};
+
+/**
+ * Get the next specified bits, up to 57 bits
+ *
+ * This also increments the byte and bit positions, and remaining
+ * length.  This can return no more than 57 bits, because in the
+ * worst case the starting bit is bit 7, which would place the end
+ * of the following 57 bits at the end of an 8 byte span, which
+ * is the max that this function's type casting approach can
+ * handle.
+ *
+ * Returns: the value of the requested bits, or -1 on failure
+ */
+static s64 next_bits(struct sw842_param *p, int n)
+{
+	u64 v;
+	u8 *in = p->in;
+	int b = p->bit, bits = b + n;
+
+	if (b > 7 || n > 57) {
+		WARN(1, "b %d n %d\n", b, n);
+		return -EINVAL;
+	}
+
+	if (DIV_ROUND_UP(bits, 8) > p->ilen)
+		return -EOVERFLOW;
+
+	if (bits <= 8)
+		v = *in >> (8 - bits);
+	else if (bits <= 16)
+		v = be16_to_cpu(*(__be16 *)in) >> (16 - bits);
+	else if (bits <= 32)
+		v = be32_to_cpu(*(__be32 *)in) >> (32 - bits);
+	else
+		v = be64_to_cpu(*(__be64 *)in) >> (64 - bits);
+
+	p->bit += n;
+
+	p->in += p->bit / 8;
+	p->ilen -= p->bit / 8;
+	p->bit %= 8;
+
+	return (s64)(v & ((1 << n) - 1));
+}
+
+static int __do_data(struct sw842_param *p, int n)
+{
+	s64 v = next_bits(p, n * 8);
+
+	if (v < 0 || n > p->olen)
+		return -EINVAL;
+
+	switch (n) {
+	case 2:
+		*(__be16 *)p->out = cpu_to_be16((u16)v);
+		break;
+	case 4:
+		*(__be32 *)p->out = cpu_to_be32((u32)v);
+		break;
+	default:
+		return -EINVAL;
+	}
+	p->out += n;
+	p->olen -= n;
+
+	return 0;
+}
+
+static int do_data(struct sw842_param *p, int n)
+{
+	switch (n) {
+	case 2:
+		if (__do_data(p, 2))
+			return -EINVAL;
+		break;
+	case 8:
+		/* we copy two 4-byte chunks here because
+		 * next_bits() can't do a full 64 bits
+		 */
+		if (__do_data(p, 4))
+			return -EINVAL;
+		/* fallthrough */
+	case 4:
+		if (__do_data(p, 4))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __do_index(struct sw842_param *p, int size, int bits, int fsize)
+{
+	s64 index = next_bits(p, bits);
+	u64 offset;
+	int total = (int)(p->out - p->ostart);
+
+	if (index < 0)
+		return -EINVAL;
+
+	offset = index * size;
+
+	/* a ring buffer of fsize is used; correct the offset */
+	if (total > fsize) {
+		/* this is where the current fifo is */
+		int sec = (total / fsize) * fsize;
+		/* the current pos in the fifo */
+		int pos = total % fsize;
+
+		/* if the offset is past/at the pos, we need to
+		 * go back to the last fifo section
+		 */
+		if (offset >= pos)
+			sec -= fsize;
+
+		offset += sec;
+	}
+
+	if (offset + size > total)
+		return -EINVAL;
+
+	memcpy(p->out, &p->ostart[offset], size);
+	p->out += size;
+	p->olen -= size;
+
+	return 0;
+}
+
+int do_index(struct sw842_param *p, int n)
+{
+	switch (n) {
+	case 2:
+		return __do_index(p, 2, I2_BITS, I2_FIFO_SIZE);
+	case 4:
+		return __do_index(p, 4, I4_BITS, I4_FIFO_SIZE);
+	case 8:
+		return __do_index(p, 8, I8_BITS, I8_FIFO_SIZE);
+	default:
+		return -EINVAL;
+	}
+}
+
+int do_op(struct sw842_param *p, int o)
+{
+	int i;
+	u8 op, n;
+
+	if (o > OPS_MAX)
+		return -EINVAL;
+
+	for (i = 0; i < 4; i++) {
+		op = ops[o][i];
+		n = op & OP_AMOUNT;
+
+		switch (op & OP_ACTION) {
+		case OP_ACTION_DATA:
+			if (do_data(p, n))
+				return -EINVAL;
+			break;
+		case OP_ACTION_INDEX:
+			if (do_index(p, n))
+				return -EINVAL;
+			break;
+		case OP_ACTION_NOOP:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * sw842_decompress
+ *
+ * Decompress the 842-compressed buffer of length @len at @in
+ * to the output buffer @out.
+ *
+ * The compressed buffer must be only a single 842-compressed buffer,
+ * with the standard format described in the comments at the top of
+ * this file.  Processing will stop when the 842 "END" template is
+ * detected, not the end of the buffer.
+ *
+ * Returns: 0 on success, error on failure.  The @olen parameter
+ * will contain the number of output bytes written on success, or
+ * 0 on error.
+ */
+int sw842_decompress(const unsigned char *in, int len,
+		     unsigned char *out, int *olen)
+{
+	struct sw842_param p;
+	int op, total = *olen;
+
+	p.in = (unsigned char *)in;
+	p.bit = 0;
+	p.ilen = len;
+	p.out = out;
+	p.ostart = out;
+	p.olen = *olen;
+
+	*olen = 0;
+
+	while ((op = (int)next_bits(&p, OP_BITS)) != OP_END) {
+		if (op < 0)
+			return op;
+
+		if (op == OP_REPEAT) {
+			int rep = (int)next_bits(&p, REPEAT_BITS);
+
+			if (rep < 0)
+				return rep;
+
+			if (p.out == out) /* no previous bytes */
+				return -EINVAL;
+
+			/* copy rep + 1 */
+			rep++;
+
+			if (rep * 8 > p.olen)
+				return -ENOSPC;
+
+			while (rep-- > 0) {
+				memcpy(p.out, p.out - 8, 8);
+				p.out += 8;
+				p.olen -= 8;
+			}
+		} else if (op == OP_ZEROS) {
+			if (8 > p.olen)
+				return -ENOSPC;
+
+			memset(p.out, 0, 8);
+			p.out += 8;
+			p.olen -= 8;
+		} else { /* use template */
+			if (do_op(&p, op))
+				return -EINVAL;
+		}
+	}
+
+	*olen = total - p.olen;
+
+	return 0;
+}
+#ifndef STATIC
+EXPORT_SYMBOL_GPL(sw842_decompress);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Software 842 Decompressor");
+MODULE_AUTHOR("Dan Streetman <ddstreet at ieee.org>");
+
+#endif
diff --git a/lib/842/Makefile b/lib/842/Makefile
new file mode 100644
index 0000000..8071c3f
--- /dev/null
+++ b/lib/842/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_842_DECOMPRESS) += 842_decompress.o
diff --git a/lib/Kconfig b/lib/Kconfig
index 87da53b..78aee1f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -213,6 +213,9 @@ config RANDOM32_SELFTEST
 #
 # compression support is select'ed if needed
 #
+config 842_DECOMPRESS
+	tristate
+
 config ZLIB_INFLATE
 	tristate
 
diff --git a/lib/Makefile b/lib/Makefile
index 58f74d2..88b144e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -78,6 +78,7 @@ obj-$(CONFIG_LIBCRC32C)	+= libcrc32c.o
 obj-$(CONFIG_CRC8)	+= crc8.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
+obj-$(CONFIG_842_DECOMPRESS) += 842/
 obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
 obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
 obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
-- 
2.1.0