[WIP] [PATCH v0.0-20200229 10/11] ez: lzma: add fixed-sized output compression

Gao Xiang hsiangkao at aol.com
Sat Feb 29 15:50:16 AEDT 2020


After this patch, compressed data can be as
much as close to destsize but not exceed.

Signed-off-by: Gao Xiang <hsiangkao at aol.com>
---
 lzma/lzma_encoder.c | 133 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 132 insertions(+), 1 deletion(-)

diff --git a/lzma/lzma_encoder.c b/lzma/lzma_encoder.c
index b213504..98cde22 100644
--- a/lzma/lzma_encoder.c
+++ b/lzma/lzma_encoder.c
@@ -10,7 +10,7 @@
  */
 #include <stdlib.h>
 #include <ez/bitops.h>
-#include "rc_encoder.h"
+#include "rc_encoder_ckpt.h"
 #include "lzma_common.h"
 #include "mf.h"
 
@@ -72,12 +72,23 @@ struct lzma_length_encoder {
 	probability high[kLenNumHighSymbols];
 };
 
+struct lzma_encoder_destsize {
+	struct lzma_rc_ckpt cp;
+
+	uint8_t *op;
+	uint32_t capacity;
+
+	uint32_t esz;
+	uint8_t ending[LZMA_REQUIRED_INPUT_MAX + 5];
+};
+
 struct lzma_encoder {
 	struct lzma_mf mf;
 	struct lzma_rc_encoder rc;
 
 	uint8_t *op, *oend;
 	bool finish;
+	bool need_eopm;
 
 	unsigned int state;
 
@@ -109,6 +120,8 @@ struct lzma_encoder {
 		struct lzma_match matches[kMatchMaxLen];
 		unsigned int matches_count;
 	} fast;
+
+	struct lzma_encoder_destsize *dstsize;
 };
 
 #define change_pair(smalldist, bigdist) (((bigdist) >> 7) > (smalldist))
@@ -449,6 +462,46 @@ static void rep_match(struct lzma_encoder *lzma, const uint32_t pos_state,
 	}
 }
 
+struct lzma_endstate {
+	struct lzma_length_encoder lenEnc;
+
+	probability simpleMatch[2];
+	probability posSlot[kNumPosSlotBits];
+	probability posAlign[kNumAlignBits];
+};
+
+static void encode_eopm_stateless(struct lzma_encoder *lzma,
+				  struct lzma_endstate *endstate)
+{
+	const uint32_t pos_state =
+		(lzma->mf.cur - lzma->mf.lookahead) & lzma->pbMask;
+	const unsigned int state = lzma->state;
+	unsigned int i;
+
+	endstate->simpleMatch[0] = lzma->isMatch[state][pos_state];
+	endstate->simpleMatch[1] = lzma->isRep[state];
+	endstate->lenEnc = lzma->lenEnc;
+
+	rc_bit(&lzma->rc, endstate->simpleMatch, 1);
+	rc_bit(&lzma->rc, endstate->simpleMatch + 1, 0);
+	length(&lzma->rc, &endstate->lenEnc, pos_state, kMatchMinLen);
+
+	for (i = 0; i < kNumPosSlotBits; ++i) {
+		endstate->posSlot[i] =
+			lzma->posSlotEncoder[0][(1 << (i + 1)) - 1];
+		rc_bit(&lzma->rc, endstate->posSlot + i, 1);
+	}
+
+	rc_direct(&lzma->rc, (1 << (30 - kNumAlignBits)) - 1,
+		  30 - kNumAlignBits);
+
+	for (i = 0; i < kNumAlignBits; ++i) {
+		endstate->posAlign[i] =
+			lzma->posAlignEncoder[(1 << (i + 1)) - 1];
+		rc_bit(&lzma->rc, endstate->posAlign + i, 1);
+	}
+}
+
 static void encode_eopm(struct lzma_encoder *lzma)
 {
 	const uint32_t pos_state =
@@ -460,8 +513,86 @@ static void encode_eopm(struct lzma_encoder *lzma)
 	match(lzma, pos_state, UINT32_MAX, kMatchMinLen);
 }
 
+static int __flush_symbol_destsize(struct lzma_encoder *lzma)
+{
+	uint8_t *op2;
+	unsigned int symbols_size;
+	unsigned int esz = 0;
+
+	if (lzma->dstsize->capacity < 5)
+		return -ENOSPC;
+
+	if (!lzma->rc.pos) {
+		rc_write_checkpoint(&lzma->rc, &lzma->dstsize->cp);
+		lzma->dstsize->op = lzma->op;
+	}
+
+	if (rc_encode(&lzma->rc, &lzma->op, lzma->oend))
+		return -ENOSPC;
+
+	op2 = lzma->op;
+	symbols_size = op2 - lzma->dstsize->op;
+	if (lzma->dstsize->capacity < symbols_size + 5)
+		goto err_enospc;
+
+	if (!lzma->need_eopm)
+		goto out;
+
+	if (lzma->dstsize->capacity < symbols_size +
+	    LZMA_REQUIRED_INPUT_MAX + 5) {
+		struct lzma_rc_ckpt cp2;
+		struct lzma_endstate endstate;
+		uint8_t ending[sizeof(lzma->dstsize->ending)];
+		uint8_t *ep;
+
+		rc_write_checkpoint(&lzma->rc, &cp2);
+		encode_eopm_stateless(lzma, &endstate);
+		rc_flush(&lzma->rc);
+
+		ep = ending;
+		if (rc_encode(&lzma->rc, &ep, ending + sizeof(ending)))
+			DBG_BUGON(1);
+
+		esz = ep - ending;
+
+		if (lzma->dstsize->capacity < symbols_size + esz)
+			goto err_enospc;
+		rc_restore_checkpoint(&lzma->rc, &cp2);
+
+		memcpy(lzma->dstsize->ending, ending, sizeof(ending));
+		lzma->dstsize->esz = esz;
+	}
+
+out:
+	lzma->dstsize->capacity -= symbols_size;
+	lzma->dstsize->esz = esz;
+	return 0;
+
+err_enospc:
+	rc_restore_checkpoint(&lzma->rc, &lzma->dstsize->cp);
+	lzma->op = lzma->dstsize->op;
+	lzma->dstsize->capacity = 0;
+	return -ENOSPC;
+}
+
 static int flush_symbol(struct lzma_encoder *lzma)
 {
+	if (lzma->rc.count && lzma->dstsize) {
+		const unsigned int safemargin =
+			5 + (LZMA_REQUIRED_INPUT_MAX << !!lzma->need_eopm);
+		uint8_t *op;
+		bool ret;
+
+		if (lzma->dstsize->capacity < safemargin)
+			return __flush_symbol_destsize(lzma);
+
+		op = lzma->op;
+		ret = rc_encode(&lzma->rc, &lzma->op, lzma->oend);
+
+		lzma->dstsize->capacity -= lzma->op - op;
+		return ret ? -ENOSPC : 0;
+	}
+
 	return rc_encode(&lzma->rc, &lzma->op, lzma->oend) ? -ENOSPC : 0;
 }
 
-- 
2.20.1



More information about the Linux-erofs mailing list