JICPCompressor1.java example

Explorer
jade_agents-master
- src

/**
 * Byte Array Compressor
 *
 * The (de-)compression algorthm is quite simple and memory efficient.
 *
 * the compression is done using a dictionary of words. words are detected
 * by predefined seperators (see isSep()). the dictionary of the words is
 * the (de-)compression array itself.
 *
 * @author Steffen Rusitschka, Siemens AG, CT IC 6
 * @author Dmitri Toropov, Siemens AG, CT IC 6
 *
 */

package jade.imtp.leap.JICP;

import java.io.*;


public class JICPCompressor1 {

    private static final int WORD_MAGIC    = 200;
    private static final int RLE_MAGIC     = 201;
    private static final int MAX_WORDS     = 254;

    // internal fields
    private static final int RLE_FLUSH     = 0x100;
    private int              rleOldValue   = RLE_FLUSH;
    private int              rleOccurrence = 0;
    private byte[]           ba;

//    private int count = 0;

    private static boolean isSep(int value) {
        return !((value >= 'A' && value <= 'Z') ||
                 (value >= 'a' && value <= 'z') ||
                 (value >= '0' && value <= '9') ||
                  value == '-' ||
                  value == '_');
    }


    /**
     * run length encoding write
     * if value is RLE_FLUSH, the stream will be flushed.
     */
    private void rleWrite(ByteArrayOutputStream baos, int value) {
//        System.out.println("" + (count++) + ": " + (char)value + " " + value);
        if (rleOldValue == RLE_FLUSH) {
            rleOldValue   = value;
            rleOccurrence = 1;
            return;
        }

        if (value != rleOldValue || rleOccurrence == 255) {
            if (rleOldValue == RLE_MAGIC || rleOccurrence > 2) {
                baos.write(RLE_MAGIC);
                baos.write(rleOccurrence);
//                System.out.println("rle: " + rleOccurrence + " times " + (char)rleOldValue);
                if (rleOccurrence > 2) {
                    baos.write(rleOldValue);
                }
            } else {
                for (int i=0; i<rleOccurrence; ++i) {
                   baos.write(rleOldValue);
                }
            }
            rleOccurrence = 0;
        }

        rleOccurrence++;
        rleOldValue = value;
    }

    /**
     * run length encoding read
     */
    private int rleRead(ByteArrayInputStream bais) {
        if (rleOccurrence == 0) {
            rleOldValue = bais.read();
            if (rleOldValue == RLE_MAGIC) {
                rleOccurrence = bais.read();
                if (rleOccurrence > 2) {
                    rleOldValue = bais.read();
                }
            } else {
                rleOccurrence = 1;
            }
        }
        rleOccurrence--;
//        System.out.println("" + (count++) + ": " + (char)rleOldValue + " " + rleOldValue);
        return rleOldValue;
    }

    private int getValue(int index) {
        return index >= ba.length ? RLE_FLUSH : ((int)(char)ba[index]) & 255;
    }

    private void setValue(int index, int value) {
        if (index >= ba.length) {
            byte[] newba = new byte[index * 5/4 + 1];
            System.arraycopy(ba, 0, newba, 0, ba.length);
//            System.out.println("[r]");
            ba = newba;
        }
        ba[index] = (byte)value;
    }


/**
 * compress()
 *
 * algorithm:
 *
 * if during compression a word is detected, its position inside the array
 * is stored in the wordIndexes[] array. but only, if the word itself was not
 * found in the array before. if so, only a magic byte (WORD_MAGIC) and the index of
 * the word in the wordIndexes[] array is stored as a byte. the magic byte is
 * encoded as (WORD_MAGIC, 255). this limits the number of words that are possible
 * inside the wordIndexes[] array to 254 (=MAX_WORD constant).
 *
 */
    public static byte[] compress(byte[] ba) {
        if (ba == null) {
            return null;
        }
        return new JICPCompressor1().compressHelper(ba);
    }

    private byte[] compressHelper(byte[] uba) {

        int                   wordIndexes[] = new int[MAX_WORDS];
        int                   beginIndex    = 0;
        int                   wordIndex     = 0;
        int                   lastWordIndex = 0;
        ByteArrayOutputStream baos          = new ByteArrayOutputStream();

        ba = uba; // set ba, so it is accessable through getValue().

        // go through array
        for (int i=0; i<=ba.length; ++i) {
//            System.out.println("" + i + ": " + (char)ba[i] + " " + (int)(char)ba[i]);
            if (isSep(getValue(i))) {
                boolean foundWord = false;

                // if there is a seperator and the word has more than 2 chars, add the word
                if ((i - beginIndex) > 2) {

                    // find the word
                    for (int wi=0; wi<lastWordIndex; ++wi) {

                        int existingWordIndex = wordIndexes[wi];
                        int j                 = 0;

                        while (true) {
                            int ch1 = getValue(existingWordIndex + j);
                            int ch2 = getValue(beginIndex + j);

                            if (ch1 != ch2) {
                                break; // words are different
                            }
                            if (isSep(ch1) && isSep(ch2)) {
                                foundWord = true;
                                break; // words are the same
                            }
                            ++j;
                        }
                        if (foundWord) {
//                            System.out.println(" w! " + existingWordIndex);
                            rleWrite(baos, WORD_MAGIC);
                            rleWrite(baos, wi);
                            rleWrite(baos, getValue(i));
                            break;
                        }
                    }

                    if (!foundWord) {
                        if (wordIndex == MAX_WORDS) {
                            wordIndex = 0;
                        }
//                        System.out.println("wi["+wordIndex+"]="+beginIndex);
                        wordIndexes[wordIndex++] = beginIndex;
                        if (wordIndex > lastWordIndex) {
                            lastWordIndex = wordIndex;
                        }
                    }
                }

                if (!foundWord) {
                    for (int j = beginIndex; j<=i; ++j) {
                        int ch = getValue(j);
                        rleWrite(baos, ch);
                        if (ch == WORD_MAGIC) {
                            rleWrite(baos, 255);
                        }
                    }
                }

                beginIndex = i+1;
            }

        }
/*
        // do the remaining bytes
        for (int j = beginIndex; j<ba.length; ++j) {
            int ch = (int)((char)ba[j] & 255);
            rleWrite(baos, ch);
            if (ch == WORD_MAGIC) {
                rleWrite(baos, 255);
            }
        }
        rleWrite(baos, RLE_FLUSH);
*/
        byte[] result = baos.toByteArray();
//        System.out.println("" + ba.length + "->" + result.length + " = " + (result.length*100)/ba.length+"%");
        return result;
    }


/**
 * decompress()
 *
 * algorithm:
 *
 * during decompression, the wordIndexes[] array will be built. this is done by
 * reading the compressed array, decode it and look for words. if a magic byte
 * (WORD_MAGIC) is detected, it will be decoded to a "real" WORD_MAGIC if the
 * following byte is 255 (WORD_MAGIC, 255 - sequence). if the following byte
 * is not 255, it is treated as an index in the wordIndexes[] array.
 * the index stored inside there is used as the beginning of a word. this word
 * will be copied to the end of the decoded stream. the end of the word is
 * detected by a separator (see isSep()).
 */
    public static byte[] decompress(byte[] cba) {
        if (cba == null) {
            return null;
        }
        return new JICPCompressor1().decompressHelper(cba);
    }

    private byte[] decompressHelper(byte[] cba) {
//        System.out.print("" + cba.length + " -> ");

        int                  wordIndexes[] = new int[MAX_WORDS];
        int                  wordIndex     = 0;
        int                  currentIndex  = 0;
        int                  beginIndex    = 0;
        ByteArrayInputStream bais          = new ByteArrayInputStream(cba);
        int                  ch;

        ba = new byte[cba.length * 3/2]; // init ba for setValue() access.

        while ((ch = rleRead(bais)) != -1) {
//            System.out.println("" + (char)ch + " " + (int)(char)ch);
            if (ch == WORD_MAGIC) {
                int wi = rleRead(bais);
                if (wi == 255) {
//                    System.out.print(" m ");
                    setValue(currentIndex++, (byte)WORD_MAGIC);
                } else {
                    int refWordIndex = wordIndexes[wi];

                    while (!isSep(ba[refWordIndex])) {
//                        System.out.print(" w ");
                        setValue(currentIndex++, ba[refWordIndex++]);
                    }
                }
                beginIndex = currentIndex+1;
            } else {
                if (isSep((byte)ch)) {
                    if ((currentIndex-beginIndex) > 2) {
                        if (wordIndex == MAX_WORDS) {
                            wordIndex = 0;
                        }
//                        System.out.println("wi["+wordIndex+"]="+beginIndex);
                        wordIndexes[wordIndex++] = beginIndex;
                    }
                    beginIndex = currentIndex+1;
                }

//                System.out.print(" r ");
                setValue(currentIndex++, (byte)ch);
            }
        }

        byte[] newba = new byte[currentIndex];
        System.arraycopy(ba, 0, newba, 0, currentIndex);
        System.out.println("dec " + cba.length + "->" + currentIndex);
//        System.out.println("[r]");
//        System.out.println("" + newba.length);
        return newba;
    }
}