/* * LZMAEncoderNormal * * Authors: Lasse Collin <lasse.collin@tukaani.org> * Igor Pavlov <http://7-zip.org/> * * This file has been put into the public domain. * You can do whatever you want with this file. */ package org.tukaani.xz.lzma; import org.tukaani.xz.lz.LZEncoder; import org.tukaani.xz.lz.Matches; import org.tukaani.xz.rangecoder.RangeEncoder; final class LZMAEncoderNormal extends LZMAEncoder { private static final int OPTS = 4096; private static int EXTRA_SIZE_BEFORE = OPTS; private static int EXTRA_SIZE_AFTER = OPTS; private final Optimum[] opts = new Optimum[OPTS]; private int optCur = 0; private int optEnd = 0; private Matches matches; // These are fields solely to avoid allocating the objects again and // again on each function call. private final int[] repLens = new int[REPS]; private final State nextState = new State(); static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { return LZEncoder.getMemoryUsage(dictSize, Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) + OPTS * 64 / 1024; } LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, int dictSize, int extraSizeBefore, int niceLen, int mf, int depthLimit) { super(rc, LZEncoder.getInstance(dictSize, Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), EXTRA_SIZE_AFTER, niceLen, MATCH_LEN_MAX, mf, depthLimit), lc, lp, pb, dictSize, niceLen); for (int i = 0; i < OPTS; ++i) opts[i] = new Optimum(); } public void reset() { optCur = 0; optEnd = 0; super.reset(); } /** * Converts the opts array from backward indexes to forward indexes. * Then it will be simple to get the next symbol from the array * in later calls to <code>getNextSymbol()</code>. */ private int convertOpts() { optEnd = optCur; int optPrev = opts[optCur].optPrev; do { Optimum opt = opts[optCur]; if (opt.prev1IsLiteral) { opts[optPrev].optPrev = optCur; opts[optPrev].backPrev = -1; optCur = optPrev--; if (opt.hasPrev2) { opts[optPrev].optPrev = optPrev + 1; opts[optPrev].backPrev = opt.backPrev2; optCur = optPrev; optPrev = opt.optPrev2; } } int temp = opts[optPrev].optPrev; opts[optPrev].optPrev = optCur; optCur = optPrev; optPrev = temp; } while (optCur > 0); optCur = opts[0].optPrev; back = opts[optCur].backPrev; return optCur; } int getNextSymbol() { // If there are pending symbols from an earlier call to this // function, return those symbols first. if (optCur < optEnd) { int len = opts[optCur].optPrev - optCur; optCur = opts[optCur].optPrev; back = opts[optCur].backPrev; return len; } assert optCur == optEnd; optCur = 0; optEnd = 0; back = -1; if (readAhead == -1) matches = getMatches(); // Get the number of bytes available in the dictionary, but // not more than the maximum match length. If there aren't // enough bytes remaining to encode a match at all, return // immediately to encode this byte as a literal. int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); if (avail < MATCH_LEN_MIN) return 1; // Get the lengths of repeated matches. int repBest = 0; for (int rep = 0; rep < REPS; ++rep) { repLens[rep] = lz.getMatchLen(reps[rep], avail); if (repLens[rep] < MATCH_LEN_MIN) { repLens[rep] = 0; continue; } if (repLens[rep] > repLens[repBest]) repBest = rep; } // Return if the best repeated match is at least niceLen bytes long. if (repLens[repBest] >= niceLen) { back = repBest; skip(repLens[repBest] - 1); return repLens[repBest]; } // Initialize mainLen and mainDist to the longest match found // by the match finder. int mainLen = 0; int mainDist = 0; if (matches.count > 0) { mainLen = matches.len[matches.count - 1]; mainDist = matches.dist[matches.count - 1]; // Return if it is at least niceLen bytes long. if (mainLen >= niceLen) { back = mainDist + REPS; skip(mainLen - 1); return mainLen; } } int curByte = lz.getByte(0); int matchByte = lz.getByte(reps[0] + 1); // If the match finder found no matches and this byte cannot be // encoded as a repeated match (short or long), we must be return // to have the byte encoded as a literal. if (mainLen < MATCH_LEN_MIN && curByte != matchByte && repLens[repBest] < MATCH_LEN_MIN) return 1; int pos = lz.getPos(); int posState = pos & posMask; // Calculate the price of encoding the current byte as a literal. { int prevByte = lz.getByte(1); int literalPrice = literalEncoder.getPrice(curByte, matchByte, prevByte, pos, state); opts[1].set1(literalPrice, 0, -1); } int anyMatchPrice = getAnyMatchPrice(state, posState); int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); // If it is possible to encode this byte as a short rep, see if // it is cheaper than encoding it as a literal. if (matchByte == curByte) { int shortRepPrice = getShortRepPrice(anyRepPrice, state, posState); if (shortRepPrice < opts[1].price) opts[1].set1(shortRepPrice, 0, 0); } // Return if there is neither normal nor long repeated match. Use // a short match instead of a literal if is is possible and cheaper. optEnd = Math.max(mainLen, repLens[repBest]); if (optEnd < MATCH_LEN_MIN) { assert optEnd == 0 : optEnd; back = opts[1].backPrev; return 1; } // Update the lookup tables for distances and lengths before using // those price calculation functions. (The price function above // don't need these tables.) updatePrices(); // Initialize the state and reps of this position in opts[]. // updateOptStateAndReps() will need these to get the new // state and reps for the next byte. opts[0].state.set(state); System.arraycopy(reps, 0, opts[0].reps, 0, REPS); // Initialize the prices for latter opts that will be used below. for (int i = optEnd; i >= MATCH_LEN_MIN; --i) opts[i].reset(); // Calculate the prices of repeated matches of all lengths. for (int rep = 0; rep < REPS; ++rep) { int repLen = repLens[rep]; if (repLen < MATCH_LEN_MIN) continue; int longRepPrice = getLongRepPrice(anyRepPrice, rep, state, posState); do { int price = longRepPrice + repLenEncoder.getPrice(repLen, posState); if (price < opts[repLen].price) opts[repLen].set1(price, 0, rep); } while (--repLen >= MATCH_LEN_MIN); } // Calculate the prices of normal matches that are longer than rep0. { int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); if (len <= mainLen) { int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, state); // Set i to the index of the shortest match that is // at least len bytes long. int i = 0; while (len > matches.len[i]) ++i; while (true) { int dist = matches.dist[i]; int price = getMatchAndLenPrice(normalMatchPrice, dist, len, posState); if (price < opts[len].price) opts[len].set1(price, 0, dist + REPS); if (len == matches.len[i]) if (++i == matches.count) break; ++len; } } } avail = Math.min(lz.getAvail(), OPTS - 1); // Get matches for later bytes and optimize the use of LZMA symbols // by calculating the prices and picking the cheapest symbol // combinations. while (++optCur < optEnd) { matches = getMatches(); if (matches.count > 0 && matches.len[matches.count - 1] >= niceLen) break; --avail; ++pos; posState = pos & posMask; updateOptStateAndReps(); anyMatchPrice = opts[optCur].price + getAnyMatchPrice(opts[optCur].state, posState); anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); calc1BytePrices(pos, posState, avail, anyRepPrice); if (avail >= MATCH_LEN_MIN) { int startLen = calcLongRepPrices(pos, posState, avail, anyRepPrice); if (matches.count > 0) calcNormalMatchPrices(pos, posState, avail, anyMatchPrice, startLen); } } return convertOpts(); } /** * Updates the state and reps for the current byte in the opts array. */ private void updateOptStateAndReps() { int optPrev = opts[optCur].optPrev; assert optPrev < optCur; if (opts[optCur].prev1IsLiteral) { --optPrev; if (opts[optCur].hasPrev2) { opts[optCur].state.set(opts[opts[optCur].optPrev2].state); if (opts[optCur].backPrev2 < REPS) opts[optCur].state.updateLongRep(); else opts[optCur].state.updateMatch(); } else { opts[optCur].state.set(opts[optPrev].state); } opts[optCur].state.updateLiteral(); } else { opts[optCur].state.set(opts[optPrev].state); } if (optPrev == optCur - 1) { // Must be either a short rep or a literal. assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; if (opts[optCur].backPrev == 0) opts[optCur].state.updateShortRep(); else opts[optCur].state.updateLiteral(); System.arraycopy(opts[optPrev].reps, 0, opts[optCur].reps, 0, REPS); } else { int back; if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { optPrev = opts[optCur].optPrev2; back = opts[optCur].backPrev2; opts[optCur].state.updateLongRep(); } else { back = opts[optCur].backPrev; if (back < REPS) opts[optCur].state.updateLongRep(); else opts[optCur].state.updateMatch(); } if (back < REPS) { opts[optCur].reps[0] = opts[optPrev].reps[back]; int rep; for (rep = 1; rep <= back; ++rep) opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; for (; rep < REPS; ++rep) opts[optCur].reps[rep] = opts[optPrev].reps[rep]; } else { opts[optCur].reps[0] = back - REPS; System.arraycopy(opts[optPrev].reps, 0, opts[optCur].reps, 1, REPS - 1); } } } /** * Calculates prices of a literal, a short rep, and literal + rep0. */ private void calc1BytePrices(int pos, int posState, int avail, int anyRepPrice) { // This will be set to true if using a literal or a short rep. boolean nextIsByte = false; int curByte = lz.getByte(0); int matchByte = lz.getByte(opts[optCur].reps[0] + 1); // Try a literal. int literalPrice = opts[optCur].price + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), pos, opts[optCur].state); if (literalPrice < opts[optCur + 1].price) { opts[optCur + 1].set1(literalPrice, optCur, -1); nextIsByte = true; } // Try a short rep. if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur || opts[optCur + 1].backPrev != 0)) { int shortRepPrice = getShortRepPrice(anyRepPrice, opts[optCur].state, posState); if (shortRepPrice <= opts[optCur + 1].price) { opts[optCur + 1].set1(shortRepPrice, optCur, 0); nextIsByte = true; } } // If neither a literal nor a short rep was the cheapest choice, // try literal + long rep0. if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { int lenLimit = Math.min(niceLen, avail - 1); int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); if (len >= MATCH_LEN_MIN) { nextState.set(opts[optCur].state); nextState.updateLiteral(); int nextPosState = (pos + 1) & posMask; int price = literalPrice + getLongRepAndLenPrice(0, len, nextState, nextPosState); int i = optCur + 1 + len; while (optEnd < i) opts[++optEnd].reset(); if (price < opts[i].price) opts[i].set2(price, optCur, 0); } } } /** * Calculates prices of long rep and long rep + literal + rep0. */ private int calcLongRepPrices(int pos, int posState, int avail, int anyRepPrice) { int startLen = MATCH_LEN_MIN; int lenLimit = Math.min(avail, niceLen); for (int rep = 0; rep < REPS; ++rep) { int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); if (len < MATCH_LEN_MIN) continue; while (optEnd < optCur + len) opts[++optEnd].reset(); int longRepPrice = getLongRepPrice(anyRepPrice, rep, opts[optCur].state, posState); for (int i = len; i >= MATCH_LEN_MIN; --i) { int price = longRepPrice + repLenEncoder.getPrice(i, posState); if (price < opts[optCur + i].price) opts[optCur + i].set1(price, optCur, rep); } if (rep == 0) startLen = len + 1; int len2Limit = Math.min(niceLen, avail - len - 1); int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], len2Limit); if (len2 >= MATCH_LEN_MIN) { // Rep int price = longRepPrice + repLenEncoder.getPrice(len, posState); nextState.set(opts[optCur].state); nextState.updateLongRep(); // Literal int curByte = lz.getByte(len, 0); int matchByte = lz.getByte(0); // lz.getByte(len, len) int prevByte = lz.getByte(len, 1); price += literalEncoder.getPrice(curByte, matchByte, prevByte, pos + len, nextState); nextState.updateLiteral(); // Rep0 int nextPosState = (pos + len + 1) & posMask; price += getLongRepAndLenPrice(0, len2, nextState, nextPosState); int i = optCur + len + 1 + len2; while (optEnd < i) opts[++optEnd].reset(); if (price < opts[i].price) opts[i].set3(price, optCur, rep, len, 0); } } return startLen; } /** * Calculates prices of a normal match and normal match + literal + rep0. */ private void calcNormalMatchPrices(int pos, int posState, int avail, int anyMatchPrice, int startLen) { // If the longest match is so long that it would not fit into // the opts array, shorten the matches. if (matches.len[matches.count - 1] > avail) { matches.count = 0; while (matches.len[matches.count] < avail) ++matches.count; matches.len[matches.count++] = avail; } if (matches.len[matches.count - 1] < startLen) return; while (optEnd < optCur + matches.len[matches.count - 1]) opts[++optEnd].reset(); int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, opts[optCur].state); int match = 0; while (startLen > matches.len[match]) ++match; for (int len = startLen; ; ++len) { int dist = matches.dist[match]; // Calculate the price of a match of len bytes from the nearest // possible distance. int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, dist, len, posState); if (matchAndLenPrice < opts[optCur + len].price) opts[optCur + len].set1(matchAndLenPrice, optCur, dist + REPS); if (len != matches.len[match]) continue; // Try match + literal + rep0. First get the length of the rep0. int len2Limit = Math.min(niceLen, avail - len - 1); int len2 = lz.getMatchLen(len + 1, dist, len2Limit); if (len2 >= MATCH_LEN_MIN) { nextState.set(opts[optCur].state); nextState.updateMatch(); // Literal int curByte = lz.getByte(len, 0); int matchByte = lz.getByte(0); // lz.getByte(len, len) int prevByte = lz.getByte(len, 1); int price = matchAndLenPrice + literalEncoder.getPrice(curByte, matchByte, prevByte, pos + len, nextState); nextState.updateLiteral(); // Rep0 int nextPosState = (pos + len + 1) & posMask; price += getLongRepAndLenPrice(0, len2, nextState, nextPosState); int i = optCur + len + 1 + len2; while (optEnd < i) opts[++optEnd].reset(); if (price < opts[i].price) opts[i].set3(price, optCur, dist + REPS, len, 0); } if (++match == matches.count) break; } } }