[PATCH v3 1/4] erofs-utils: add a built-in DEFLATE compressor

Gao Xiang hsiangkao at linux.alibaba.com
Mon Jul 10 21:02:48 AEST 2023


As Apple documentation written "If you require interoperability with
non-Apple devices, use COMPRESSION_ZLIB. [1]", DEFLATE is a popular
generic-purpose compression algorithm for a quite long time (many
advanced formats like zlib, gzip, zip, png are all based on that),
which is made of LZ77 as wells as Huffman code, fully documented as
RFC1951 [2] and quite easy to understand, implement.

There are several hardware on-market DEFLATE accelerators as well,
such as (s390) DFLTCC, (Intel) IAA/QAT, (HiSilicon) ZIP accelerator,
etc.  Therefore, it's useful to support DEFLATE compression in order
to use these for async I/Os and get benefits from these.

Since there is _no fixed-sized output DEFLATE compression appoach_
available in public (fitblk is somewhat ineffective) and the original
zlib is quite slowly developping, let's work out one for our use cases.
Fortunately, it's only less than 1.5kLOC with lazy matching to just
match the full zlib abilities.  Besides, near-optimal block splitting
(based on price function) doesn't support since it's no rush to us.

In the future, there might be more built-in optimizers landed to
fulfill our needs even further (especially for other popular algorithms
without native fixed-sized output support).  In addition, I'd be quite
happy to see more popular encoders to support native fixed-sized output
compression too.

[1] https://developer.apple.com/documentation/compression/compression_algorithm
[2] https://datatracker.ietf.org/doc/html/rfc1951
Signed-off-by: Gao Xiang <hsiangkao at linux.alibaba.com>
---
 lib/Makefile.am    |    2 +
 lib/kite_deflate.c | 1270 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1272 insertions(+)
 create mode 100644 lib/kite_deflate.c

diff --git a/lib/Makefile.am b/lib/Makefile.am
index e243c1c..b729098 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -43,3 +43,5 @@ if ENABLE_LIBLZMA
 liberofs_la_CFLAGS += ${liblzma_CFLAGS}
 liberofs_la_SOURCES += compressor_liblzma.c
 endif
+
+liberofs_la_SOURCES += kite_deflate.c
diff --git a/lib/kite_deflate.c b/lib/kite_deflate.c
new file mode 100644
index 0000000..f5bb2fd
--- /dev/null
+++ b/lib/kite_deflate.c
@@ -0,0 +1,1270 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+/*
+ * erofs-utils/lib/kite_deflate.c
+ *
+ * Copyright (C) 2023, Alibaba Cloud
+ * Copyright (C) 2023, Gao Xiang <xiang at kernel.org>
+ */
+#include "erofs/defs.h"
+#include "erofs/print.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+
+unsigned long erofs_memcmp2(const u8 *s1, const u8 *s2,
+			    unsigned long sz);
+
+#ifdef TEST
+#define kite_dbg(x, ...)	fprintf(stderr, x "\n", ##__VA_ARGS__)
+#else
+#define kite_dbg(x, ...)
+#endif
+
+#define kHistorySize32		(1U << 15)
+
+#define kNumLenSymbols32	256
+#define kNumLenSymbolsMax	kNumLenSymbols32
+
+#define kSymbolEndOfBlock	256
+#define kSymbolMatch		(kSymbolEndOfBlock + 1)
+#define kNumLenSlots		29
+#define kMainTableSize		(kSymbolMatch + kNumLenSlots)
+
+#define kFixedLenTableSize	(kSymbolMatch + 31)
+#define FixedDistTableSize	32
+
+#define kMainTableSize		(kSymbolMatch + kNumLenSlots)
+#define kDistTableSize32	30
+
+#define kNumLitLenCodesMin	257
+#define kNumDistCodesMin	1
+
+#define kNumLensCodesMin	4
+#define kLensTableSize		19
+
+#define kMatchMinLen		3
+#define kMatchMaxLen32		kNumLenSymbols32 + kMatchMinLen - 1
+
+static u32 kstaticHuff_mainCodes[kFixedLenTableSize];
+static const u8 kstaticHuff_litLenLevels[kFixedLenTableSize] = {
+	[0   ... 143] = 8, [144 ... 255] = 9,
+	[256 ... 279] = 7, [280 ... 287] = 8,
+};
+static u32 kstaticHuff_distCodes[kFixedLenTableSize];
+
+const u8 kLenStart32[kNumLenSlots] =
+	{0,1,2,3,4,5,6,7,8,10,12,14,16,20,24,28,32,40,48,56,64,80,96,112,128,160,192,224, 255};
+
+const u8 kLenExtraBits32[kNumLenSlots] =
+	{0,0,0,0,0,0,0,0,1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4,  4,  5,
+	 5,  5,  5, 0};
+
+/* First normalized distance for each code (0 = distance of 1) */
+const u32 kDistStart[kDistTableSize32] =
+	{0,1,2,3,4,6,8,12,16,24,32,48,64,96,128,192,256,384,512,768,
+	 1024,1536,2048,3072,4096,6144,8192,12288,16384,24576};
+
+/* extra bits for each distance code */
+const u8 kDistExtraBits[kDistTableSize32] =
+	{0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+const u8 kCodeLengthAlphabetOrder[kLensTableSize] =
+	{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+const u8 kLevelExtraBits[3] = {2, 3, 7};
+
+const unsigned int kTableDirectLevels = 16;
+const unsigned int kBitLensRepNumber_3_6 = kTableDirectLevels;
+const unsigned int kBitLens0Number_3_10 = kBitLensRepNumber_3_6 + 1;
+const unsigned int kBitLens0Number_11_138 = kBitLens0Number_3_10 + 1;
+
+#define kStored			0
+#define kFixedHuffman		1
+#define kDynamicHuffman		2
+
+struct kite_deflate_symbol {
+	u16 len, dist;
+};
+
+struct kite_deflate_table {
+	u32  mainCodes[kMainTableSize];
+	u8   litLenLevels[kMainTableSize];
+	u32  distCodes[kDistTableSize32];
+	u8   distLevels[kDistTableSize32];
+	u32  levelCodes[kLensTableSize];
+	u8   levelLens[kLensTableSize];
+
+	u8   numdistlens, numblcodes;
+	u16  numlitlens;
+};
+
+struct kite_deflate {
+	struct kite_deflate_table  *tab;
+	const u8   *in;
+	u8         *out;
+
+	u32  inlen, outlen;
+	u32  pos_in, pos_out;
+	u32  inflightbits;
+	u8   bitpos;
+	u8   numHuffBits;
+	u32  symbols;
+
+	u32  costbits, startpos;
+	u8   encode_mode;
+	bool freq_changed, lastblock;
+
+	/* Previous match for lazy matching */
+	bool prev_valid;
+	u16 prev_longest;
+
+	u32  mainFreqs[kMainTableSize];
+	u32  distFreqs[kDistTableSize32];
+	struct kite_deflate_table tables[2];
+
+	/* don't reset the following fields */
+	struct kite_matchfinder *mf;
+	struct kite_deflate_symbol *sym;
+	u32 max_symbols;
+	bool lazy_search;
+};
+
+#define ZLIB_DISTANCE_TOO_FAR	4096
+
+static u8 g_LenSlots[kNumLenSymbolsMax];
+
+#define kNumLogBits 9		// do not change it
+static u8 g_FastPos[1 << kNumLogBits];
+
+static void writebits(struct kite_deflate *s, unsigned int v, u8 bits)
+{
+	unsigned int rem = sizeof(s->inflightbits) * 8 - s->bitpos;
+
+	s->inflightbits |= (v << s->bitpos) & (!rem - 1);
+	if (bits > rem) {
+		u8 *out = s->out + s->pos_out;
+
+		out[0] = s->inflightbits & 0xff;
+		out[1] = (s->inflightbits >> 8) & 0xff;
+		out[2] = (s->inflightbits >> 16) & 0xff;
+		out[3] = (s->inflightbits >> 24) & 0xff;
+		s->pos_out += 4;
+		DBG_BUGON(s->pos_out > s->outlen);
+		s->inflightbits = v >> rem;
+		s->bitpos = bits - rem;
+		return;
+	}
+	s->bitpos += bits;
+}
+
+static void flushbits(struct kite_deflate *s)
+{
+	u8 *out = s->out + s->pos_out;
+
+	if (!s->bitpos)
+		return;
+	out[0] = s->inflightbits & 0xff;
+	if (s->bitpos >= 8) {
+		out[1] = (s->inflightbits >> 8) & 0xff;
+		if (s->bitpos >= 16) {
+			out[2] = (s->inflightbits >> 16) & 0xff;
+			if (s->bitpos >= 24)
+				out[3] = (s->inflightbits >> 24) & 0xff;
+		}
+	}
+	s->pos_out += round_up(s->bitpos, 8) >> 3;
+	DBG_BUGON(s->pos_out > s->outlen);
+	s->bitpos = 0;
+	s->inflightbits = 0;
+}
+
+#define kMaxLen 16
+
+static void deflate_genhuffcodes(const u8 *lens, u32 *p, unsigned int nr_codes,
+				 const u32 *bl_count)
+{
+	u32 nextCodes[kMaxLen + 1];	/* next code value for each bit length */
+	unsigned int code = 0;		/* running code value */
+	unsigned int bits, k;
+
+	for (bits = 1; bits <= kMaxLen; ++bits) {
+		code = (code + bl_count[bits - 1]) << 1;
+		nextCodes[bits] = code;
+	}
+
+	DBG_BUGON(code + bl_count[kMaxLen] != 1 << kMaxLen);
+
+	for (k = 0; k < nr_codes; ++k)
+		p[k] = nextCodes[lens[k]]++;
+}
+
+static u32 deflate_reversebits_one(u32 code, u8 bits)
+{
+	unsigned int x = code;
+
+	x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
+	x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
+	x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
+
+	return (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - bits);
+}
+
+static void Huffman_ReverseBits(u32 *codes, const u8 *lens, unsigned int n)
+{
+	while (n) {
+		u32 code = *codes;
+
+		*codes++ = deflate_reversebits_one(code, *lens++);
+		--n;
+	}
+}
+
+static void kite_deflate_init_once(void)
+{
+	static const u32 static_bl_count[kMaxLen + 1] = {
+		[7] = 279 - 256 + 1,
+		[8] = (143 + 1) + (287 - 280 + 1),
+		[9] = 255 - 144 + 1,
+	};
+	unsigned int i, c, j, k;
+
+	if (kstaticHuff_distCodes[31])
+		return;
+	deflate_genhuffcodes(kstaticHuff_litLenLevels, kstaticHuff_mainCodes,
+			     kFixedLenTableSize, static_bl_count);
+	Huffman_ReverseBits(kstaticHuff_mainCodes, kstaticHuff_litLenLevels,
+			    kFixedLenTableSize);
+
+	for (i = 0; i < ARRAY_SIZE(kstaticHuff_distCodes); ++i)
+		kstaticHuff_distCodes[i] = deflate_reversebits_one(i, 5);
+
+	for (i = 0; i < kNumLenSlots; i++) {
+		c = kLenStart32[i];
+		j = 1 << kLenExtraBits32[i];
+
+		for (k = 0; k < j; k++, c++)
+			g_LenSlots[c] = (u8)i;
+	}
+
+	c = 0;
+	for (i = 0; i < /*kFastSlots*/ kNumLogBits * 2; i++) {
+		k = 1 << kDistExtraBits[i];
+		for (j = 0; j < k; j++)
+			g_FastPos[c++] = i;
+	}
+}
+
+static void kite_deflate_scanlens(unsigned int numlens, u8 *lens, u32 *freqs)
+{
+	int n;				/* iterates over all tree elements */
+	int prevlen = -1;		/* last emitted length */
+	int curlen;			/* length of current code */
+	int nextlen = lens[0];		/* length of next code */
+	int count = 0;			/* repeat count of the current code */
+	int max_count = 7;		/* max repeat count */
+	int min_count = 4;		/* min repeat count */
+
+	if (!nextlen)
+		max_count = 138, min_count = 3;
+
+	for (n = 0; n < numlens; n++) {
+		curlen = nextlen;
+		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+		++count;
+
+		if (count < max_count && curlen == nextlen)
+			continue;
+		if (count < min_count) {
+			freqs[curlen] += count;
+		} else if (curlen != 0) {
+			if (curlen != prevlen)
+				freqs[curlen]++;
+			freqs[kBitLensRepNumber_3_6]++;
+		} else if (count <= 10) {
+			freqs[kBitLens0Number_3_10]++;
+		} else {
+			freqs[kBitLens0Number_11_138]++;
+		}
+
+		count = 0;
+		prevlen = curlen;
+		if (!nextlen)
+			max_count = 138, min_count = 3;
+		else if (curlen == nextlen)
+			max_count = 6, min_count = 3;
+		else
+			max_count = 7, min_count = 4;
+	}
+}
+
+static void kite_deflate_sendtree(struct kite_deflate *s, const u8 *lens,
+				  unsigned int numlens)
+{
+	int n;				/* iterates over all tree elements */
+	int prevlen = -1;		/* last emitted length */
+	int curlen;			/* length of current code */
+	int nextlen = lens[0];		/* length of next code */
+	int count = 0;			/* repeat count of the current code */
+	int max_count = 7;		/* max repeat count */
+	int min_count = 4;		/* min repeat count */
+	const u8 *bl_lens = s->tab->levelLens;
+	const u32 *bl_codes = s->tab->levelCodes;
+
+	if (!nextlen)
+		max_count = 138, min_count = 3;
+
+	for (n = 0; n < numlens; n++) {
+		curlen = nextlen;
+		nextlen = n + 1 < numlens ? lens[n + 1] : -1;
+		++count;
+
+		if (count < max_count && curlen == nextlen)
+			continue;
+		if (count < min_count) {
+			do {
+				writebits(s, bl_codes[curlen], bl_lens[curlen]);
+			} while (--count);
+		} else if (curlen) {
+			if (curlen != prevlen) {
+				writebits(s, bl_codes[curlen], bl_lens[curlen]);
+				count--;
+			}
+			writebits(s, bl_codes[kBitLensRepNumber_3_6],
+				  bl_lens[kBitLensRepNumber_3_6]);
+			writebits(s, count - 3, 2);
+		} else if (count <= 10) {
+			writebits(s, bl_codes[kBitLens0Number_3_10],
+				  bl_lens[kBitLens0Number_3_10]);
+			writebits(s, count - 3, 3);
+		} else {
+			writebits(s, bl_codes[kBitLens0Number_11_138],
+				  bl_lens[kBitLens0Number_11_138]);
+			writebits(s, count - 11, 7);
+		}
+
+		count = 0;
+		prevlen = curlen;
+		if (!nextlen)
+			max_count = 138, min_count = 3;
+		else if (curlen == nextlen)
+			max_count = 6, min_count = 3;
+		else
+			max_count = 7, min_count = 4;
+	}
+}
+
+static void kite_deflate_setfixedtrees(struct kite_deflate *s)
+{
+	writebits(s, (kFixedHuffman << 1) + s->lastblock, 3);
+}
+
+static void kite_deflate_sendtrees(struct kite_deflate *s)
+{
+	struct kite_deflate_table *t = s->tab;
+	unsigned int i;
+
+	writebits(s, (kDynamicHuffman << 1) + s->lastblock, 3);
+	writebits(s, t->numlitlens - kNumLitLenCodesMin, 5);
+	writebits(s, t->numdistlens - kNumDistCodesMin,  5);
+	writebits(s, t->numblcodes - kNumLensCodesMin,   4);
+
+	for (i = 0; i < t->numblcodes; i++)
+		writebits(s, t->levelLens[kCodeLengthAlphabetOrder[i]], 3);
+
+	Huffman_ReverseBits(t->levelCodes, t->levelLens, kLensTableSize);
+	kite_deflate_sendtree(s, t->litLenLevels, t->numlitlens);
+	kite_deflate_sendtree(s, t->distLevels, t->numdistlens);
+}
+
+static inline unsigned int deflateDistSlot(unsigned int pos)
+{
+	const unsigned int zz = (kNumLogBits - 1) &
+		((((1U << kNumLogBits) - 1) - pos) >> (31 - 3));
+
+	return g_FastPos[pos >> zz] + (zz * 2);
+}
+
+static void kite_deflate_writeblock(struct kite_deflate *s, bool fixed)
+{
+	int i;
+	u32 *mainCodes, *distCodes;
+	const u8 *litLenLevels, *distLevels;
+
+	if (!fixed) {
+		struct kite_deflate_table *t = s->tab;
+
+		mainCodes = t->mainCodes; distCodes = t->distCodes;
+		litLenLevels = t->litLenLevels;	distLevels = t->distLevels;
+
+		Huffman_ReverseBits(mainCodes, litLenLevels, kMainTableSize);
+		Huffman_ReverseBits(distCodes, distLevels, kDistTableSize32);
+	} else {
+		mainCodes = kstaticHuff_mainCodes;
+		distCodes = kstaticHuff_distCodes;
+
+		litLenLevels = kstaticHuff_litLenLevels;
+	}
+
+	for (i = 0; i < s->symbols; ++i) {
+		struct kite_deflate_symbol *sym = &s->sym[i];
+
+		if (sym->len < kMatchMinLen) {		/* literal */
+			writebits(s, mainCodes[sym->dist],
+				  litLenLevels[sym->dist]);
+		} else {
+			unsigned int lenSlot, distSlot;
+			unsigned int lc = sym->len - kMatchMinLen;
+
+			lenSlot = g_LenSlots[lc];
+			writebits(s, mainCodes[kSymbolMatch + lenSlot],
+				  litLenLevels[kSymbolMatch + lenSlot]);
+			writebits(s, lc - kLenStart32[lenSlot],
+				  kLenExtraBits32[lenSlot]);
+
+			distSlot = deflateDistSlot(sym->dist - 1);
+			writebits(s, distCodes[distSlot],
+				  fixed ? 5 : distLevels[distSlot]);
+			writebits(s, sym->dist - 1 - kDistStart[distSlot],
+				  kDistExtraBits[distSlot]);
+		}
+	}
+	writebits(s, mainCodes[kSymbolEndOfBlock],
+		  litLenLevels[kSymbolEndOfBlock]);
+}
+
+static u32 Huffman_GetPrice(const u32 *freqs, const u8 *lens, u32 num)
+{
+	u32 price = 0;
+
+	while (num) {
+		price += (*lens++) * (*freqs++);
+		--num;
+	}
+	return price;
+}
+
+static u32 Huffman_GetPriceEx(const u32 *freqs, const u8 *lens, u32 num,
+			      const u8 *extraBits, u32 extraBase)
+{
+	return Huffman_GetPrice(freqs, lens, num) +
+		Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
+}
+
+/* Adapted from C/HuffEnc.c (7zip) for now */
+#define HeapSortDown(p, k, size, temp) \
+  { for (;;) { \
+    size_t s = (k << 1); \
+    if (s > size) break; \
+    if (s < size && p[s + 1] > p[s]) s++; \
+    if (temp >= p[s]) break; \
+    p[k] = p[s]; k = s; \
+  } p[k] = temp; }
+
+static void HeapSort(u32 *p, size_t size)
+{
+  if (size <= 1)
+    return;
+  p--;
+  {
+    size_t i = size / 2;
+    do
+    {
+      u32 temp = p[i];
+      size_t k = i;
+      HeapSortDown(p, k, size, temp)
+    }
+    while (--i != 0);
+  }
+  /*
+  do
+  {
+    size_t k = 1;
+    UInt32 temp = p[size];
+    p[size--] = p[1];
+    HeapSortDown(p, k, size, temp)
+  }
+  while (size > 1);
+  */
+  while (size > 3)
+  {
+    u32 temp = p[size];
+    size_t k = (p[3] > p[2]) ? 3 : 2;
+    p[size--] = p[1];
+    p[1] = p[k];
+    HeapSortDown(p, k, size, temp)
+  }
+  {
+    u32 temp = p[size];
+    p[size] = p[1];
+    if (size > 2 && p[2] < temp)
+    {
+      p[1] = p[2];
+      p[2] = temp;
+    }
+    else
+      p[1] = temp;
+  }
+}
+
+#define NUM_BITS 10
+#define MASK (((unsigned)1 << NUM_BITS) - 1)
+
+static void Huffman_Generate(const u32 *freqs, u32 *p, u8 *lens,
+			     unsigned int numSymbols, unsigned int maxLen)
+{
+	u32 num, i;
+
+	num = 0;
+	/* if (maxLen > 10) maxLen = 10; */
+
+	for (i = 0; i < numSymbols; i++) {
+		u32 freq = freqs[i];
+
+		if (!freq)
+			lens[i] = 0;
+		else
+			p[num++] = i | (freq << NUM_BITS);
+	}
+	HeapSort(p, num);
+
+	if (num < 2) {
+		unsigned int minCode = 0, maxCode = 1;
+
+		if (num == 1) {
+			maxCode = (unsigned int)p[0] & MASK;
+			if (!maxCode)
+				maxCode++;
+		}
+		p[minCode] = 0;
+		p[maxCode] = 1;
+		lens[minCode] = lens[maxCode] = 1;
+		return;
+	}
+
+	{
+		u32 b, e, i;
+
+		i = b = e = 0;
+		do {
+			u32 n, m, freq;
+
+			n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+			freq = (p[n] & ~MASK);
+			p[n] = (p[n] & MASK) | (e << NUM_BITS);
+			m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
+			freq += (p[m] & ~MASK);
+			p[m] = (p[m] & MASK) | (e << NUM_BITS);
+			p[e] = (p[e] & MASK) | freq;
+			e++;
+		} while (num - e > 1);
+
+		{
+			u32 lenCounters[kMaxLen + 1];
+
+			for (i = 0; i <= kMaxLen; i++)
+				lenCounters[i] = 0;
+
+			p[--e] &= MASK;
+			lenCounters[1] = 2;
+			while (e > 0) {
+				u32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
+
+				p[e] = (p[e] & MASK) | (len << NUM_BITS);
+				if (len >= maxLen)
+					for (len = maxLen - 1; lenCounters[len] == 0; len--);
+				lenCounters[len]--;
+				lenCounters[(size_t)len + 1] += 2;
+			}
+
+			{
+				u32 len;
+
+				i = 0;
+				for (len = maxLen; len != 0; len--) {
+					u32 k;
+					for (k = lenCounters[len]; k != 0; k--)
+						lens[p[i++] & MASK] = (u8)len;
+				}
+			}
+			deflate_genhuffcodes(lens, p, numSymbols, lenCounters);
+		}
+	}
+}
+
+static void kite_deflate_fixdynblock(struct kite_deflate *s)
+{
+	struct kite_deflate_table *t = s->tab;
+	unsigned int numlitlens, numdistlens, numblcodes;
+	u32 levelFreqs[kLensTableSize] = {0};
+	u32 opt_mainlen;
+
+	if (!s->freq_changed)
+		return;
+
+	/* in order to match zlib */
+	s->numHuffBits = kMaxLen;
+//	s->numHuffBits = (s->symbols > 18000 ? 12 :
+//		(s->symbols > 7000 ? 11 : (s->symbols > 2000 ? 10 : 9)));
+
+	Huffman_Generate(s->mainFreqs, t->mainCodes, t->litLenLevels,
+			 kMainTableSize, s->numHuffBits);
+	Huffman_Generate(s->distFreqs, t->distCodes, t->distLevels,
+			 kDistTableSize32, s->numHuffBits);
+
+	/* code lengths for the literal/length alphabet */
+	numlitlens = kMainTableSize;
+	while (numlitlens > kNumLitLenCodesMin &&
+	       !t->litLenLevels[numlitlens - 1])
+		--numlitlens;
+
+	/* code lengths for the distance alphabet */
+	numdistlens = kDistTableSize32;
+	while (numdistlens > kNumDistCodesMin &&
+	       !t->distLevels[numdistlens - 1])
+		--numdistlens;
+
+	kite_deflate_scanlens(numlitlens, t->litLenLevels, levelFreqs);
+	kite_deflate_scanlens(numdistlens, t->distLevels, levelFreqs);
+	Huffman_Generate(levelFreqs, t->levelCodes, t->levelLens,
+			 kLensTableSize, 7);
+	numblcodes = kLensTableSize;
+	while (numblcodes > kNumLensCodesMin &&
+	       !t->levelLens[kCodeLengthAlphabetOrder[numblcodes - 1]])
+		--numblcodes;
+
+	t->numlitlens = numlitlens;
+	t->numdistlens = numdistlens;
+	t->numblcodes = numblcodes;
+
+	opt_mainlen = Huffman_GetPriceEx(s->mainFreqs, t->litLenLevels,
+			kMainTableSize, kLenExtraBits32, kSymbolMatch) +
+		Huffman_GetPriceEx(s->distFreqs, t->distLevels,
+			kDistTableSize32, kDistExtraBits, 0);
+	s->costbits = 3 + 5 + 5 + 4 + 3 * numblcodes +
+		Huffman_GetPriceEx(levelFreqs, t->levelLens,
+			kLensTableSize, kLevelExtraBits, kTableDirectLevels) +
+		opt_mainlen;
+	s->freq_changed = false;
+}
+
+
+/*
+ * an array used used by the LZ-based encoder to hold the length-distance pairs
+ * found by LZ matchfinder.
+ */
+struct kite_match {
+	unsigned int len;
+	unsigned int dist;
+};
+
+struct kite_matchfinder {
+	/* pointer to buffer with data to be compressed */
+	const u8 *buffer;
+
+	/* indicate the first byte that doesn't contain valid input data */
+	const u8 *end;
+
+	/* LZ matchfinder hash chain representation */
+	u32 *hash, *chain;
+
+	u32 base;
+
+	/* indicate the next byte to run through the match finder */
+	u32 offset;
+
+	u32 cyclic_pos;
+
+	/* maximum length of a match that the matchfinder will try to find. */
+	u16 nice_len;
+
+	/* the total sliding window size */
+	u16 wsiz;
+
+	/* how many rounds a matchfinder searches on a hash chain for */
+	u16 depth;
+
+	/* do not perform lazy search no less than this match length */
+	u16 max_lazy;
+
+	/* reduce lazy search no less than this match length */
+	u8  good_len;
+
+	/* current match for lazy matching */
+	struct kite_match *matches;
+	struct kite_match matches_matrix[2][4];
+};
+
+/*
+ * This mysterious table is just the CRC of each possible byte. It can be
+ * computed using the standard bit-at-a-time methods. The polynomial can
+ * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12.
+ * Add the implicit x^16, and you have the standard CRC-CCITT.
+ */
+u16 const crc_ccitt_table[256] __attribute__((__aligned__(128))) = {
+	0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+	0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+	0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+	0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+	0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+	0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+	0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+	0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+	0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+	0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+	0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+	0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+	0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+	0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+	0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+	0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+	0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+	0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+	0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+	0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+	0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+	0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+	0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+	0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+	0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+	0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+	0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+	0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+	0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+	0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+	0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+	0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+int kite_mf_getmatches_hc3(struct kite_matchfinder *mf, u16 depth, u16 bestlen)
+{
+	const u8 *cur = mf->buffer + mf->offset;
+	const u8 *qbase = mf->buffer - mf->base;
+	u32 curMatch;
+	unsigned int v, hv, i, k, p, wsiz;
+
+	if (mf->end - cur < bestlen + 1)
+		return 0;
+
+	v = get_unaligned((u16 *)cur);
+	hv = v ^ crc_ccitt_table[cur[2]];
+	curMatch = mf->hash[hv];
+	p = mf->base + mf->offset;
+	mf->hash[hv] = p;
+	mf->chain[mf->cyclic_pos] = curMatch;
+	wsiz = mf->wsiz;
+	k = 1;
+
+	if (depth) {
+		unsigned int wpos = wsiz + mf->cyclic_pos;
+
+		hv = min_t(unsigned int, mf->nice_len, mf->end - cur);
+		DBG_BUGON(hv > kMatchMaxLen32);
+		do {
+			unsigned int diff = p - curMatch;
+			const u8 *q;
+
+			if (diff >= wsiz)
+				break;
+
+			q = qbase + curMatch;
+			curMatch = mf->chain[(wpos - diff) & (wsiz - 1)];
+			if (v == get_unaligned((u16 *)q) && (bestlen < 3 || (
+			    get_unaligned((u16 *)(cur + bestlen - 1)) ==
+			    get_unaligned((u16 *)(q + bestlen - 1)) &&
+			    !memcmp(cur + 3, q + 3, bestlen - 3)))) {
+				DBG_BUGON(cur[2] != q[2]);
+				i = erofs_memcmp2(cur + bestlen + 1,
+					q + bestlen + 1, hv - bestlen - 1);
+				bestlen += 1 + i;
+
+				k -= (k >= ARRAY_SIZE(mf->matches_matrix[0]));
+				mf->matches[k++] = (struct kite_match) {
+					.len = bestlen,
+					.dist = diff,
+				};
+				if (bestlen >= hv)
+					break;
+			}
+		} while (--depth);
+	}
+	mf->offset++;
+	mf->cyclic_pos = (mf->cyclic_pos + 1) & (wsiz - 1);
+	return k - 1;
+}
+
+/* let's align with zlib */
+static const struct kite_matchfinder_cfg {
+	u16  good_length;	/* reduce lazy search above this match length */
+	u16  max_lazy;	/* do not perform lazy search above this match length */
+	u16  nice_length;	/* quit search above this match length */
+	u16  depth;
+	bool lazy_search;
+} kite_mfcfg[10] = {
+/*      good lazy nice depth */
+/* 0 */ {0,    0,  0,    0, false},	/* store only [unsupported] */
+/* 1 */ {4,    4,  8,    4, false},	/* maximum speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, false},
+/* 3 */ {4,    6, 32,   32, false},
+
+/* 4 */ {4,    4,  16,   16, true},	/* lazy matches */
+/* 5 */ {8,   16,  32,   32, true},
+/* 6 */ {8,   16, 128,  128, true},
+/* 7 */ {8,   32, 128,  256, true},
+/* 8 */ {32, 128, 258, 1024, true},
+/* 9 */ {32, 258, 258, 4096, true},	/* maximum compression */
+};
+
+static int kite_mf_init(struct kite_matchfinder *mf, int wsiz, int level)
+{
+	const struct kite_matchfinder_cfg *cfg;
+
+	if (!level || level >= ARRAY_SIZE(kite_mfcfg))
+		return -EINVAL;
+	cfg = &kite_mfcfg[level];
+
+	if (wsiz > kHistorySize32 || (1 << ilog2(wsiz)) != wsiz)
+		return -EINVAL;
+
+	mf->hash = calloc(0x10000, sizeof(mf->hash[0]));
+	if (!mf->hash)
+		return -ENOMEM;
+
+	mf->chain = malloc(sizeof(mf->chain[0]) * wsiz);
+	if (!mf->chain) {
+		free(mf->hash);
+		mf->hash = NULL;
+		return -ENOMEM;
+	}
+	mf->wsiz = wsiz;
+
+	mf->good_len = cfg->good_length;
+	mf->nice_len = cfg->nice_length;
+	mf->depth = cfg->depth;
+	mf->max_lazy = cfg->max_lazy;
+	return cfg->lazy_search;
+}
+
+static void kite_mf_reset(struct kite_matchfinder *mf,
+			  const void *buffer, const void *end)
+{
+	mf->buffer = buffer;
+	mf->end = end;
+
+	/*
+	 * Set the initial value as max_distance + 1.  This would avoid hash
+	 * zero initialization.
+	 */
+	mf->base += mf->offset + kHistorySize32 + 1;
+
+	mf->offset = 0;
+	mf->cyclic_pos = 0;
+
+	mf->matches = mf->matches_matrix[0];
+	mf->matches_matrix[0][0].len =
+		mf->matches_matrix[1][0].len = kMatchMinLen - 1;
+}
+
+static bool deflate_count_code(struct kite_deflate *s, bool literal,
+			       unsigned int lenSlot, unsigned int distSlot)
+{
+	struct kite_deflate_table *t = s->tab;
+	unsigned int lenbase = (literal ? 0 : kSymbolMatch);
+	u64 rem = (s->outlen - s->pos_out) * 8 - s->bitpos;
+	bool recalc = false;
+	unsigned int bits;
+
+	s->freq_changed = true;
+	++s->mainFreqs[lenbase + lenSlot];
+	if (!literal)
+		++s->distFreqs[distSlot];
+
+	if (s->encode_mode == 1) {
+		if (literal) {
+			bits = kstaticHuff_litLenLevels[lenSlot];
+			goto out;
+		}
+		bits = kstaticHuff_litLenLevels[kSymbolMatch + lenSlot] +
+			kLenExtraBits32[lenSlot] + 5 + kDistExtraBits[distSlot];
+		goto out;
+	}
+
+	/* XXX: more ideas to be done later */
+	recalc |= (!literal && !t->distLevels[distSlot]);
+	recalc |= !t->litLenLevels[lenbase + lenSlot];
+	if (recalc) {
+		kite_dbg("recalc %c lS %u dS %u", literal ? 'l' : 'm',
+			 lenSlot, distSlot);
+		s->tab = s->tables + (s->tab == s->tables);
+		kite_deflate_fixdynblock(s);
+		bits = 0;
+		goto out;
+	}
+
+	if (literal) {
+		bits = t->litLenLevels[lenSlot];
+		goto out;
+	}
+
+	bits = t->distLevels[distSlot] + kDistExtraBits[distSlot] +
+		t->litLenLevels[kSymbolMatch + lenSlot] +
+		kLenExtraBits32[lenSlot];
+out:
+	if (rem < s->costbits + bits) {
+		--s->mainFreqs[lenbase + lenSlot];
+		if (!literal)
+			--s->distFreqs[distSlot];
+		if (recalc)
+			s->tab = s->tables + (s->tab == s->tables);
+		return false;
+	}
+	s->costbits += bits;
+	return true;
+}
+
+static bool kite_deflate_tally(struct kite_deflate *s,
+			       struct kite_match *match)
+{
+	struct kite_deflate_symbol *sym = s->sym + s->symbols;
+	u32 fixedcost = ~0;
+	bool hassp;
+
+	*sym = (struct kite_deflate_symbol) {
+		.len = match->len,
+		.dist = match->dist,
+	};
+
+retry:
+	if (sym->len < kMatchMinLen) {
+		hassp = deflate_count_code(s, true, sym->dist, 0);
+	} else {
+		unsigned int lc = sym->len - kMatchMinLen;
+		unsigned int lenSlot = g_LenSlots[lc];
+		unsigned int distSlot = deflateDistSlot(sym->dist - 1);
+
+		hassp = deflate_count_code(s, false, lenSlot, distSlot);
+	}
+
+	if (!hassp) {
+		if (s->encode_mode == 1) {
+			fixedcost = s->costbits;
+			s->encode_mode = 2;
+			goto retry;
+		}
+		s->lastblock = true;
+		if (fixedcost <= s->costbits)
+			s->encode_mode = 1;
+		return true;
+	}
+	++s->symbols;
+	return false;
+}
+
+static void kite_deflate_writestore(struct kite_deflate *s)
+{
+	bool fb = !s->startpos && !s->bitpos;
+	unsigned int totalsiz = s->pos_in - s->prev_valid - s->startpos;
+
+	do {
+		unsigned int len = min_t(unsigned int, totalsiz, 65535);
+
+		totalsiz -= len;
+		writebits(s, (fb << 3) | (kStored << 1) |
+			  (s->lastblock && !totalsiz), 3 + fb);
+		flushbits(s);
+		writebits(s, len, 16);
+		writebits(s, len ^ 0xffff, 16);
+		flushbits(s);
+		memcpy(s->out + s->pos_out, s->in + s->startpos, len);
+		s->pos_out += len;
+		s->startpos += len;
+	} while (totalsiz);
+}
+
+static void kite_deflate_endblock(struct kite_deflate *s)
+{
+	if (s->encode_mode == 1) {
+		u32 fixedcost = s->costbits;
+		unsigned int storelen, storeblocks, storecost;
+
+		kite_deflate_fixdynblock(s);
+		if (fixedcost > s->costbits)
+			s->encode_mode = 2;
+		else
+			s->costbits = fixedcost;
+
+		storelen = s->pos_in - s->prev_valid - s->startpos;
+		storeblocks = max(DIV_ROUND_UP(storelen, 65535), 1U);
+		storecost = (8 - s->bitpos) + storeblocks - 1 +
+			storeblocks * 32 + storelen * 8;
+		if (s->costbits > storecost) {
+			s->costbits = storecost;
+			s->encode_mode = 0;
+		}
+	}
+
+	s->lastblock |= (s->costbits + s->bitpos >=
+			(s->outlen - s->pos_out) * 8);
+}
+
+static void kite_deflate_startblock(struct kite_deflate *s)
+{
+	memset(s->mainFreqs, 0, sizeof(s->mainFreqs));
+	memset(s->distFreqs, 0, sizeof(s->distFreqs));
+	memset(s->tables, 0, sizeof(s->tables[0]));
+	s->symbols = 0;
+	s->mainFreqs[kSymbolEndOfBlock]++;
+	s->encode_mode = 1;
+	s->tab = s->tables;
+	s->costbits = 3 + kstaticHuff_litLenLevels[kSymbolEndOfBlock];
+}
+
+static bool kite_deflate_commitblock(struct kite_deflate *s)
+{
+	if (s->encode_mode == 1) {
+		kite_deflate_setfixedtrees(s);
+		kite_deflate_writeblock(s, true);
+	} else if (s->encode_mode == 2) {
+		kite_deflate_sendtrees(s);
+		kite_deflate_writeblock(s, false);
+	} else {
+		kite_deflate_writestore(s);
+	}
+	s->startpos = s->pos_in - s->prev_valid;
+	return s->lastblock;
+}
+
+static bool kite_deflate_fast(struct kite_deflate *s)
+{
+	struct kite_matchfinder *mf = s->mf;
+
+	kite_deflate_startblock(s);
+	while (1) {
+		int matches = kite_mf_getmatches_hc3(mf, mf->depth,
+				kMatchMinLen - 1);
+
+		if (matches) {
+			unsigned int len = mf->matches[matches].len;
+			unsigned int dist = mf->matches[matches].dist;
+
+			if (len == kMatchMinLen && dist > ZLIB_DISTANCE_TOO_FAR)
+				goto nomatch;
+
+			kite_dbg("%u matches found: longest [%u,%u] of distance %u",
+				 matches, s->pos_in, s->pos_in + len - 1, dist);
+
+			if (kite_deflate_tally(s, mf->matches + matches))
+				break;
+			s->pos_in += len;
+			/* skip the rest bytes */
+			while (--len)
+				(void)kite_mf_getmatches_hc3(mf, 0, 0);
+		} else {
+nomatch:
+			mf->matches[0].dist = s->in[s->pos_in];
+			if (isprint(s->in[s->pos_in]))
+				kite_dbg("literal %c pos_in %u", s->in[s->pos_in], s->pos_in);
+			else
+				kite_dbg("literal %x pos_in %u", s->in[s->pos_in], s->pos_in);
+
+			if (kite_deflate_tally(s, mf->matches))
+				break;
+			++s->pos_in;
+		}
+
+		s->lastblock |= (s->pos_in >= s->inlen);
+		if (s->pos_in >= s->inlen || s->symbols >= s->max_symbols) {
+			kite_deflate_endblock(s);
+			break;
+		}
+	}
+	return kite_deflate_commitblock(s);
+}
+
+static bool kite_deflate_slow(struct kite_deflate *s)
+{
+	struct kite_matchfinder *mf = s->mf;
+	bool flush = false;
+
+	kite_deflate_startblock(s);
+	while (1) {
+		struct kite_match *prev_matches = mf->matches;
+		unsigned int len = kMatchMinLen - 1;
+		int matches;
+		unsigned int len0;
+
+		mf->matches = mf->matches_matrix[
+				mf->matches == mf->matches_matrix[0]];
+		mf->matches[0].dist = s->in[s->pos_in];
+
+		len0 = prev_matches[s->prev_longest].len;
+		if (len0 < mf->max_lazy) {
+			matches = kite_mf_getmatches_hc3(mf, mf->depth >>
+				(len0 >= mf->good_len), len0);
+			if (matches) {
+				len = mf->matches[matches].len;
+				if (len == kMatchMinLen &&
+				    mf->matches[matches].dist > ZLIB_DISTANCE_TOO_FAR) {
+					matches = 0;
+					len = kMatchMinLen - 1;
+				}
+			}
+		} else {
+			matches = 0;
+			(void)kite_mf_getmatches_hc3(mf, 0, 0);
+		}
+
+		if (len < len0) {
+			if (kite_deflate_tally(s,
+					prev_matches + s->prev_longest))
+				break;
+
+			s->pos_in += --len0;
+			/* skip the rest bytes */
+			while (--len0)
+				(void)kite_mf_getmatches_hc3(mf, 0, 0);
+			s->prev_valid = false;
+			s->prev_longest = 0;
+		} else {
+			if (!s->prev_valid)
+				s->prev_valid = true;
+			else if (kite_deflate_tally(s, prev_matches))
+				break;
+			++s->pos_in;
+			s->prev_longest = matches;
+		}
+
+		s->lastblock |= (s->pos_in >= s->inlen);
+		if (s->pos_in >= s->inlen) {
+			flush = true;
+			break;
+		}
+		if (s->symbols >= s->max_symbols) {
+			kite_deflate_endblock(s);
+			break;
+		}
+	}
+
+	if (flush && s->prev_valid) {
+		(void)kite_deflate_tally(s, mf->matches + s->prev_longest);
+		s->prev_valid = false;
+	}
+	return kite_deflate_commitblock(s);
+}
+
+void kite_deflate_end(struct kite_deflate *s)
+{
+	if (s->mf) {
+		if (s->mf->hash)
+			free(s->mf->hash);
+		if (s->mf->chain)
+			free(s->mf->chain);
+		free(s->mf);
+	}
+	if (s->sym)
+		free(s->sym);
+	free(s);
+}
+
+struct kite_deflate *kite_deflate_init(int level, unsigned int dict_size)
+{
+	struct kite_deflate *s;
+	int err;
+
+	kite_deflate_init_once();
+	s = calloc(1, sizeof(*s));
+	if (!s)
+		return ERR_PTR(-ENOMEM);
+
+	s->max_symbols = 16384;
+	s->sym = malloc(sizeof(s->sym[0]) * s->max_symbols);
+	if (!s->sym) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	s->mf = malloc(sizeof(*s->mf));
+	if (!s->mf) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	if (!dict_size)
+		dict_size = kHistorySize32;
+
+	err = kite_mf_init(s->mf, dict_size, level);
+	if (err < 0)
+		goto err_out;
+
+	s->lazy_search = err;
+	return s;
+err_out:
+	if (s->mf)
+		free(s->mf);
+	if (s->sym)
+		free(s->sym);
+	free(s);
+	return ERR_PTR(err);
+}
+
+int kite_deflate_destsize(struct kite_deflate *s, const u8 *in, u8 *out,
+			   unsigned int *srcsize, unsigned int target_dstsize)
+{
+	memset(s, 0, offsetof(struct kite_deflate, mainFreqs));
+	s->in = in;
+	s->inlen = *srcsize;
+	s->out = out;
+	s->outlen = target_dstsize;
+	kite_mf_reset(s->mf, in, in + s->inlen);
+
+	if (s->lazy_search)
+		while (!kite_deflate_slow(s));
+	else
+		while (!kite_deflate_fast(s));
+	flushbits(s);
+
+	*srcsize = s->startpos;
+	return s->pos_out;
+}
+
+#if TEST
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+int main(int argc, char *argv[])
+{
+	int fd;
+	u64 filelength;
+	u8 out[1048576], *buf;
+	int dstsize = 4096;
+	unsigned int srcsize, outsize;
+	struct kite_deflate *s;
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0)
+		return -errno;
+	if (argc > 2)
+		dstsize = atoi(argv[2]);
+	filelength = lseek(fd, 0, SEEK_END);
+
+	s = kite_deflate_init(9, 0);
+	if (IS_ERR(s))
+		return PTR_ERR(s);
+
+	filelength = lseek(fd, 0, SEEK_END);
+	buf = mmap(NULL, filelength, PROT_READ, MAP_SHARED, fd, 0);
+	if (buf == MAP_FAILED)
+		return -errno;
+	close(fd);
+
+	srcsize = filelength;
+	outsize = kite_deflate_destsize(s, buf, out, &srcsize, dstsize);
+	fd = open("out.txt", O_WRONLY | O_CREAT | O_TRUNC, 0644);
+	write(fd, out, outsize);
+	close(fd);
+	kite_deflate_end(s);
+	return 0;
+}
+#endif
-- 
2.24.4



More information about the Linux-erofs mailing list