[WIP] [PATCH v0.0-20200229 08/11] ez: lzma: add LZMA encoder
Gao Xiang
hsiangkao at aol.com
Sat Feb 29 15:50:14 AEDT 2020
It adds an improved fast optimizer which implements
lazy matching and a LZMA symbol encoder.
Signed-off-by: Gao Xiang <hsiangkao at aol.com>
---
lzma/lzma_encoder.c | 628 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 628 insertions(+)
create mode 100644 lzma/lzma_encoder.c
diff --git a/lzma/lzma_encoder.c b/lzma/lzma_encoder.c
new file mode 100644
index 0000000..b213504
--- /dev/null
+++ b/lzma/lzma_encoder.c
@@ -0,0 +1,628 @@
+// SPDX-License-Identifier: Apache-2.0
+/*
+ * ez/lzma/lzma_encoder.c
+ *
+ * Copyright (C) 2019-2020 Gao Xiang <hsiangkao at aol.com>
+ *
+ * Authors: Igor Pavlov <http://7-zip.org/>
+ * Lasse Collin <lasse.collin at tukaani.org>
+ * Gao Xiang <hsiangkao at aol.com>
+ */
+#include <stdlib.h>
+#include <ez/bitops.h>
+#include "rc_encoder.h"
+#include "lzma_common.h"
+#include "mf.h"
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumStates 12
+#define LZMA_PB_MAX 4
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kNumLenToPosStates 4
+
+#define is_literal_state(state) ((state) < 7)
+
+/* note that here dist is an zero-based distance */
+static unsigned int get_pos_slot2(unsigned int dist)
+{
+ const unsigned int zz = fls(dist) - 1;
+
+ return (zz + zz) + ((dist >> (zz - 1)) & 1);
+}
+
+static unsigned int get_pos_slot(unsigned int dist)
+{
+ return dist <= 4 ? dist : get_pos_slot2(dist);
+}
+
+/* aka. GetLenToPosState in LZMA */
+static inline unsigned int get_len_state(unsigned int len)
+{
+ if (len < kNumLenToPosStates - 1 + kMatchMinLen)
+ return len - kMatchMinLen;
+
+ return kNumLenToPosStates - 1;
+}
+
+struct lzma_properties {
+ uint32_t lc; /* 0 <= lc <= 8, default = 3 */
+ uint32_t lp; /* 0 <= lp <= 4, default = 0 */
+ uint32_t pb; /* 0 <= pb <= 4, default = 2 */
+
+ struct lzma_mf_properties mf;
+};
+
+struct lzma_length_encoder {
+ probability low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
+ probability high[kLenNumHighSymbols];
+};
+
+struct lzma_encoder {
+ struct lzma_mf mf;
+ struct lzma_rc_encoder rc;
+
+ uint8_t *op, *oend;
+ bool finish;
+
+ unsigned int state;
+
+ /* the four most recent match distances */
+ uint32_t reps[LZMA_NUM_REPS];
+
+ unsigned int pbMask, lpMask;
+
+ unsigned int lc, lp;
+
+ /* the following names came from lzma-specification.txt */
+ probability isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+ probability isRep[kNumStates];
+ probability isRepG0[kNumStates];
+ probability isRepG1[kNumStates];
+ probability isRepG2[kNumStates];
+ probability isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+ probability posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+ probability posEncoders[kNumFullDistances];
+ probability posAlignEncoder[1 << kNumAlignBits];
+
+ probability *literal;
+
+ struct lzma_length_encoder lenEnc;
+ struct lzma_length_encoder repLenEnc;
+
+ struct {
+ struct lzma_match matches[kMatchMaxLen];
+ unsigned int matches_count;
+ } fast;
+};
+
+#define change_pair(smalldist, bigdist) (((bigdist) >> 7) > (smalldist))
+
+static int lzma_get_optimum_fast(struct lzma_encoder *lzma,
+ uint32_t *back_res, uint32_t *len_res)
+{
+ struct lzma_mf *const mf = &lzma->mf;
+ const uint32_t nice_len = mf->nice_len;
+
+ unsigned int matches_count, i;
+ unsigned int longest_match_length, longest_match_back;
+ unsigned int best_replen, best_rep;
+ const uint8_t *ip, *ilimit, *ista;
+ uint32_t len;
+ int ret;
+
+ if (!mf->lookahead) {
+ ret = lzma_mf_find(mf, lzma->fast.matches, lzma->finish);
+
+ if (ret < 0)
+ return ret;
+
+ matches_count = ret;
+ } else {
+ matches_count = lzma->fast.matches_count;
+ }
+
+ ip = mf->buffer + mf->cur - mf->lookahead;
+
+ /* no valid match found by matchfinder */
+ if (!matches_count ||
+ /* not enough input left to encode a match */
+ mf->iend - ip <= 2)
+ goto out_literal;
+
+ ilimit = (mf->iend <= ip + kMatchMaxLen ?
+ mf->iend : ip + kMatchMaxLen);
+
+ best_replen = 0;
+
+ /* look for all valid repeat matches */
+ for (i = 0; i < LZMA_NUM_REPS; ++i) {
+ const uint8_t *const repp = ip - lzma->reps[i];
+
+ /* the first two bytes (MATCH_LEN_MIN == 2) do not match */
+ if (get_unaligned16(ip) != get_unaligned16(repp))
+ continue;
+
+ len = ez_memcmp(ip + 2, repp + 2, ilimit) - ip;
+ /* a repeated match at least nice_len, return it immediately */
+ if (len >= nice_len) {
+ *back_res = i;
+ *len_res = len;
+ lzma_mf_skip(mf, len - 1);
+ return 0;
+ }
+
+ if (len > best_replen) {
+ best_rep = i;
+ best_replen = len;
+ }
+ }
+
+ /*
+ * although we didn't find a long enough repeated match,
+ * the normal match is long enough to use directly.
+ */
+ longest_match_length = lzma->fast.matches[matches_count - 1].len;
+ longest_match_back = lzma->fast.matches[matches_count - 1].dist;
+ if (longest_match_length >= nice_len) {
+ /* it's encoded as 0-based match distances */
+ *back_res = LZMA_NUM_REPS + longest_match_back - 1;
+ *len_res = longest_match_length;
+ lzma_mf_skip(mf, longest_match_length - 1);
+ return 0;
+ }
+
+ while (matches_count > 1) {
+ const struct lzma_match *const victim =
+ &lzma->fast.matches[matches_count - 2];
+
+ /* only (longest_match_length - 1) would be considered */
+ if (longest_match_length > victim->len + 1)
+ break;
+
+ if (!change_pair(victim->dist, longest_match_back))
+ break;
+
+ --matches_count;
+ longest_match_length = victim->len;
+ longest_match_back = victim->dist;
+ }
+
+ if (longest_match_length > best_replen + 1) {
+ best_replen = 0;
+
+ if (longest_match_length < 3 &&
+ longest_match_back > 0x80)
+ goto out_literal;
+ } else {
+ longest_match_length = best_replen;
+ longest_match_back = 0;
+ }
+
+ ista = ip;
+
+ while (1) {
+ const struct lzma_match *victim;
+
+ ret = lzma_mf_find(mf, lzma->fast.matches, lzma->finish);
+
+ if (ret < 0) {
+ lzma->fast.matches_count = 0;
+ break;
+ }
+
+ lzma->fast.matches_count = ret;
+ if (!ret)
+ break;
+
+ victim = &lzma->fast.matches[lzma->fast.matches_count - 1];
+
+ /* both sides have eliminated '+ nlits' */
+ if (victim->len + 1 < longest_match_length)
+ break;
+
+ if (!best_replen) {
+ /* victim->len (should) >= longest_match_length - 1 */
+ const uint8_t *ip1 = ip + 1;
+ const uint32_t rl = max(2U, longest_match_length - 1);
+
+ /* TODO: lazy match for this */
+ for (i = 0; i < LZMA_NUM_REPS; ++i) {
+ if (!memcmp(ip1, ip1 - lzma->reps[i], rl)) {
+ *len_res = 0;
+ return ip1 - ista;
+ }
+ }
+
+ len = UINT32_MAX;
+ } else {
+ len = 0;
+ }
+
+ for (i = 0; i < LZMA_NUM_REPS; ++i) {
+ if (lzma->reps[i] == victim->dist) {
+ len = victim->len;
+ break;
+ }
+ }
+
+ /* if the previous match is a rep, this should be longer */
+ if (len <= best_replen)
+ break;
+
+ /* if it's not a rep */
+ if (len == UINT32_MAX) {
+ if (victim->len + 1 == longest_match_length &&
+ !change_pair(victim->dist, longest_match_back))
+ break;
+
+ if (victim->len == longest_match_length &&
+ get_pos_slot(victim->dist - 1) >=
+ get_pos_slot(longest_match_back))
+ break;
+ len = 0;
+ }
+ longest_match_length = victim->len;
+ longest_match_back = victim->dist;
+ best_replen = len;
+ best_rep = i;
+ ++ip;
+ }
+
+ /* it's encoded as 0-based match distances */
+ if (best_replen)
+ *back_res = best_rep;
+ else
+ *back_res = LZMA_NUM_REPS + longest_match_back - 1;
+
+ *len_res = longest_match_length;
+ lzma_mf_skip(mf, longest_match_length - 2 + (ret < 0));
+ return ip - ista;
+
+out_literal:
+ *len_res = 0;
+ return 1;
+}
+
+static void literal_matched(struct lzma_rc_encoder *rc, probability *probs,
+ uint32_t match_byte, uint32_t symbol)
+{
+ uint32_t offset = 0x100;
+
+ symbol += 0x100;
+ do {
+ const unsigned int bit = (symbol >> 7) & 1;
+ const unsigned int match_bit = (match_byte <<= 1) & offset;
+
+ rc_bit(rc, &probs[offset + match_bit + (symbol >> 8)], bit);
+ symbol <<= 1;
+ offset &= ~(match_byte ^ symbol);
+ } while (symbol < 0x10000);
+}
+
+static void literal(struct lzma_encoder *lzma, uint32_t position)
+{
+ static const unsigned char kLiteralNextStates[] = {
+ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5
+ };
+ struct lzma_mf *mf = &lzma->mf;
+ const uint8_t *ptr = &mf->buffer[mf->cur - mf->lookahead];
+ const unsigned int state = lzma->state;
+
+ probability *probs = lzma->literal +
+ 3 * ((((position << 8) + ptr[-1]) & lzma->lpMask) << lzma->lc);
+
+ if (is_literal_state(state)) {
+ /*
+ * Previous LZMA-symbol was a literal. Encode a normal
+ * literal without a match byte.
+ */
+ rc_bittree(&lzma->rc, probs, 8, *ptr);
+ } else {
+ /*
+ * Previous LZMA-symbol was a match. Use the byte + 1
+ * of the last match as a "match byte". That is, compare
+ * the bits of the current literal and the match byte.
+ */
+ const uint8_t match_byte = *(ptr - lzma->reps[0]);
+
+ literal_matched(&lzma->rc, probs, match_byte, *ptr);
+ }
+
+ lzma->state = kLiteralNextStates[state];
+}
+
+/* LenEnc_Encode */
+static void length(struct lzma_rc_encoder *rc,
+ struct lzma_length_encoder *lc,
+ const uint32_t pos_state, const uint32_t len)
+{
+ uint32_t sym = len - kMatchMinLen;
+ probability *probs = lc->low;
+
+ if (sym >= kLenNumLowSymbols) {
+ rc_bit(rc, probs, 1);
+ probs += kLenNumLowSymbols;
+ if (sym >= kLenNumLowSymbols * 2 /* + kLenNumMidSymbols */) {
+ rc_bit(rc, probs, 1);
+ rc_bittree(rc, lc->high, kLenNumHighBits,
+ sym - kLenNumLowSymbols * 2);
+ return;
+ }
+ sym -= kLenNumLowSymbols;
+ }
+ rc_bit(rc, probs, 0);
+ rc_bittree(rc, probs + (pos_state << (kLenNumLowBits + 1)),
+ kLenNumLowBits, sym);
+}
+
+/* Match */
+static void match(struct lzma_encoder *lzma, const uint32_t pos_state,
+ const uint32_t dist, const uint32_t len)
+{
+ const uint32_t posSlot = get_pos_slot(dist);
+ const uint32_t lenState = get_len_state(len);
+
+ lzma->state = (is_literal_state(lzma->state) ? 7 : 10);
+ length(&lzma->rc, &lzma->lenEnc, pos_state, len);
+
+ /* - unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); */
+ rc_bittree(&lzma->rc, lzma->posSlotEncoder[lenState],
+ kNumPosSlotBits, posSlot);
+
+ if (dist >= kStartPosModelIndex) {
+ const uint32_t footer_bits = (posSlot >> 1) - 1;
+ const uint32_t base = (2 | (posSlot & 1)) << footer_bits;
+
+ if (dist < kNumFullDistances) {
+ rc_bittree_reverse(&lzma->rc,
+ lzma->posEncoders + base,
+ footer_bits, dist);
+ } else {
+ const uint32_t dist_reduced = dist - base;
+
+ rc_direct(&lzma->rc, dist_reduced >> kNumAlignBits,
+ footer_bits - kNumAlignBits);
+
+#if 0
+ rc_bittree_reverse(&lzma->rc, lzma->posAlignEncoder,
+ kNumAlignBits,
+ dist_reduced & kAlignMask);
+#endif
+ rc_bittree_reverse(&lzma->rc, lzma->posAlignEncoder,
+ kNumAlignBits, dist);
+ }
+ }
+ lzma->reps[3] = lzma->reps[2];
+ lzma->reps[2] = lzma->reps[1];
+ lzma->reps[1] = lzma->reps[0];
+ lzma->reps[0] = dist + 1;
+}
+
+static void rep_match(struct lzma_encoder *lzma, const uint32_t pos_state,
+ const uint32_t rep, const uint32_t len)
+{
+ const unsigned int state = lzma->state;
+
+ if (rep == 0) {
+ rc_bit(&lzma->rc, &lzma->isRepG0[state], 0);
+ rc_bit(&lzma->rc, &lzma->isRep0Long[state][pos_state],
+ len != 1);
+ } else {
+ const uint32_t distance = lzma->reps[rep];
+
+ rc_bit(&lzma->rc, &lzma->isRepG0[state], 1);
+ if (rep == 1) {
+ rc_bit(&lzma->rc, &lzma->isRepG1[state], 0);
+ } else {
+ rc_bit(&lzma->rc, &lzma->isRepG1[state], 1);
+ rc_bit(&lzma->rc, &lzma->isRepG2[state], rep - 2);
+
+ if (rep == 3)
+ lzma->reps[3] = lzma->reps[2];
+ lzma->reps[2] = lzma->reps[1];
+ }
+ lzma->reps[1] = lzma->reps[0];
+ lzma->reps[0] = distance;
+ }
+
+ if (len == 1) {
+ lzma->state = is_literal_state(state) ? 9 : 11;
+ } else {
+ length(&lzma->rc, &lzma->repLenEnc, pos_state, len);
+ lzma->state = is_literal_state(state) ? 8 : 11;
+ }
+}
+
+static void encode_eopm(struct lzma_encoder *lzma)
+{
+ const uint32_t pos_state =
+ (lzma->mf.cur - lzma->mf.lookahead) & lzma->pbMask;
+ const unsigned int state = lzma->state;
+
+ rc_bit(&lzma->rc, &lzma->isMatch[state][pos_state], 1);
+ rc_bit(&lzma->rc, &lzma->isRep[state], 0);
+ match(lzma, pos_state, UINT32_MAX, kMatchMinLen);
+}
+
+static int flush_symbol(struct lzma_encoder *lzma)
+{
+ return rc_encode(&lzma->rc, &lzma->op, lzma->oend) ? -ENOSPC : 0;
+}
+
+static int encode_symbol(struct lzma_encoder *lzma, uint32_t back,
+ uint32_t len, uint32_t *position)
+{
+ int err = flush_symbol(lzma);
+
+ if (!err) {
+ const uint32_t pos_state = *position & lzma->pbMask;
+ const unsigned int state = lzma->state;
+ struct lzma_mf *const mf = &lzma->mf;
+
+ if (back == MARK_LIT) {
+ /* literal i.e. 8-bit byte */
+ rc_bit(&lzma->rc, &lzma->isMatch[state][pos_state], 0);
+ literal(lzma, *position);
+ len = 1;
+ } else {
+ rc_bit(&lzma->rc, &lzma->isMatch[state][pos_state], 1);
+
+ if (back < LZMA_NUM_REPS) {
+ /* repeated match */
+ rc_bit(&lzma->rc, &lzma->isRep[state], 1);
+ rep_match(lzma, pos_state, back, len);
+ } else {
+ /* normal match */
+ rc_bit(&lzma->rc, &lzma->isRep[state], 0);
+ match(lzma, pos_state,
+ back - LZMA_NUM_REPS, len);
+ }
+ }
+
+ /* len bytes has been consumed by encoder */
+ DBG_BUGON(mf->lookahead < len);
+ mf->lookahead -= len;
+ *position += len;
+ }
+ return err;
+}
+
+/* encode sequence (literal, match) */
+static int encode_sequence(struct lzma_encoder *lzma, unsigned int nliterals,
+ uint32_t back, uint32_t len, uint32_t *position)
+{
+ while (nliterals) {
+ int err = encode_symbol(lzma, MARK_LIT, 0, position);
+
+ if (err)
+ return err;
+ --nliterals;
+ }
+ if (!len) /* no match */
+ return 0;
+ return encode_symbol(lzma, back, len, position);
+}
+
+static int __lzma_encode(struct lzma_encoder *lzma)
+{
+ uint32_t pos32 = lzma->mf.cur - lzma->mf.lookahead;
+ int err;
+
+ do {
+ uint32_t back, len;
+ int nlits;
+
+ nlits = lzma_get_optimum_fast(lzma, &back, &len);
+
+ if (nlits < 0) {
+ err = nlits;
+ break;
+ }
+
+ err = encode_sequence(lzma, nlits, back, len, &pos32);
+ } while (!err);
+ return err;
+}
+
+static void lzma_length_encoder_reset(struct lzma_length_encoder *lc)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(lc->low); i++)
+ lc->low[i] = kProbInitValue;
+
+ for (i = 0; i < ARRAY_SIZE(lc->high); i++)
+ lc->high[i] = kProbInitValue;
+}
+
+static int lzma_encoder_reset(struct lzma_encoder *lzma,
+ const struct lzma_properties *props)
+{
+ unsigned int i, j, oldlclp, lclp;
+
+ lzma_mf_reset(&lzma->mf, &props->mf);
+ rc_reset(&lzma->rc);
+
+ /* refer to "The main loop of decoder" of lzma specification */
+ lzma->state = 0;
+ lzma->reps[0] = lzma->reps[1] = lzma->reps[2] =
+ lzma->reps[3] = 1;
+
+ /* reset all LZMA probability matrices */
+ for (i = 0; i < kNumStates; ++i) {
+ for (j = 0; j < LZMA_NUM_PB_STATES_MAX; ++j) {
+ lzma->isMatch[i][j] = kProbInitValue;
+ lzma->isRep0Long[i][j] = kProbInitValue;
+ }
+ lzma->isRep[i] = kProbInitValue;
+ lzma->isRepG0[i] = kProbInitValue;
+ lzma->isRepG1[i] = kProbInitValue;
+ lzma->isRepG2[i] = kProbInitValue;
+ }
+
+ for (i = 0; i < kNumLenToPosStates; ++i)
+ for (j = 0; j < (1 << kNumPosSlotBits); j++)
+ lzma->posSlotEncoder[i][j] = kProbInitValue;
+
+ for (i = 0; i < ARRAY_SIZE(lzma->posEncoders); i++)
+ lzma->posEncoders[i] = kProbInitValue;
+
+ for (i = 0; i < ARRAY_SIZE(lzma->posAlignEncoder); i++)
+ lzma->posAlignEncoder[i] = kProbInitValue;
+
+ /* set up LZMA literal probabilities */
+ oldlclp = lzma->lc + lzma->lp;
+ lclp = props->lc + props->lp;
+ lzma->lc = props->lc;
+ lzma->lp = props->lp;
+
+ if (lzma->literal && lclp != oldlclp) {
+ free(lzma->literal);
+ lzma->literal = NULL;
+ }
+
+ if (!lzma->literal) {
+ lzma->literal = malloc((0x300 << lclp) * sizeof(probability));
+ if (!lzma->literal)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < (0x300 << lclp); i++)
+ lzma->literal[i] = kProbInitValue;
+
+ lzma->pbMask = (1 << props->pb) - 1;
+ lzma->lpMask = (0x100 << props->lp) - (0x100 >> props->lc);
+
+ lzma_length_encoder_reset(&lzma->lenEnc);
+ lzma_length_encoder_reset(&lzma->repLenEnc);
+ return 0;
+}
+
+void lzma_default_properties(struct lzma_properties *p, int level)
+{
+ if (level < 0)
+ level = 5;
+
+ p->lc = 3;
+ p->lp = 0;
+ p->pb = 2;
+ p->mf.nice_len = (level < 7 ? 32 : 64); /* LZMA SDK numFastBytes */
+ p->mf.depth = (16 + (p->mf.nice_len >> 1)) >> 1;
+}
+
--
2.20.1
More information about the Linux-erofs
mailing list