package net.mcforkage.ant;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import net.mcforkage.ant.compression.BitOutputStream;
import net.mcforkage.ant.compression.FrequencyTable;
import net.mcforkage.ant.compression.HuffmanNode;
import net.mcforkage.ant.compression.HuffmanTable;
import net.mcforkage.ant.compression.HuffmanTreeVisitor;
import net.mcforkage.ant.compression.HuffmanNode.Leaf;
import net.mcforkage.ant.compression.HuffmanNode.Node;
import org.apache.tools.ant.BuildException;
import org.apache.tools.ant.Task;
// Hacked out in a few hours, so not well documented.
// This basically Huffman-codes the various components of the patch2 file.
//
// The indices Huffman table is itself written using Huffman-coded (!) differences between consecutive values.
// This is because the indices are large numbers, most of which only occur once, but which are close to each other.
// This saves some ridiculous amount of the output file size (about 400KiB out of 1400KiB).
//
// 108.6% file size compared to 7-Zip Ultra preset. 111.0% file size compared to 7-Zip highest settings.
// An obvious improvement would be to switch Huffman coding for arithmetic or range coding.
//
// TODO: Why we are doing this instead of just using LZMA?
public class CompressDiff2Task extends Task {
private File infile, outfile;
public void setInput(File f) {infile = f;}
public void setOutput(File f) {outfile = f;}
public static void main(String[] args) throws Exception {
CompressDiff2Task t = new CompressDiff2Task();
t.infile = new File("../../build/bytecode.patch2");
t.outfile = new File("../../build/bytecode.patch2z");
t.execute();
}
private static class InputLine {
String literalString;
int index, length;
boolean isLiteral;
public InputLine(String literal) {
this.literalString = literal;
this.isLiteral = true;
}
public InputLine(int index, int length) {
this.index = index;
this.length = length;
}
}
@Override
public void execute() throws BuildException {
if(infile == null) throw new BuildException("Input file not set");
if(outfile == null) throw new BuildException("Output file not set");
FrequencyTable<String> literalFreq = new FrequencyTable<>();
FrequencyTable<Integer> indexFreq = new FrequencyTable<>();
FrequencyTable<Integer> lengthFreq = new FrequencyTable<>();
FrequencyTable<Character> charFreq = new FrequencyTable<>();
final int LITERAL_INDEX = -1;
final int EOF_INDEX = -2;
indexFreq.add(EOF_INDEX);
List<InputLine> patchLines = new ArrayList<>();
try (BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(infile), StandardCharsets.UTF_8))) {
String line;
while((line = in.readLine()) != null) {
if(line.startsWith("write ")) {
String literal = line.substring(6);
patchLines.add(new InputLine(literal));
literalFreq.add(literal);
indexFreq.add(LITERAL_INDEX);
if(literalFreq.counts.get(literal) == 1) {
for(int k = 0; k < literal.length(); k++)
charFreq.add(literal.charAt(k));
}
charFreq.add('\uFFFE');
} else if(line.startsWith("copy ")) {
String[] parts = line.split(" ");
int index = Integer.parseInt(parts[1]);
int length = Integer.parseInt(parts[2]);
indexFreq.add(index);
lengthFreq.add(length);
patchLines.add(new InputLine(index, length));
}
}
} catch(IOException e) {
throw new BuildException(e);
}
HuffmanTable<String> literalTable = HuffmanTable.build(literalFreq);
HuffmanTable<Integer> indexTable = HuffmanTable.build(indexFreq);
HuffmanTable<Integer> lengthTable = HuffmanTable.build(lengthFreq);
HuffmanTable<Character> charTable = HuffmanTable.build(charFreq);
try (BitOutputStream out = new BitOutputStream(new BufferedOutputStream(new FileOutputStream(outfile)))) {
charTable.writeTable(out);
writeStringHuffmanTable(literalTable.root, out, charTable);
writeDiffedHuffmanTable(indexTable.root, out, 1);
lengthTable.writeTable(out);
for(InputLine line : patchLines) {
if(line.isLiteral) {
indexTable.write(LITERAL_INDEX, out);
literalTable.write(line.literalString, out);
} else {
indexTable.write(line.index, out);
lengthTable.write(line.length, out);
}
}
indexTable.write(EOF_INDEX, out);
} catch(IOException e) {
throw new BuildException(e);
}
}
private void writeStringHuffmanTable(HuffmanNode<String> t, BitOutputStream out, HuffmanTable<Character> charTable) throws IOException {
if(t instanceof HuffmanNode.Node<?>) {
out.write(true);
writeStringHuffmanTable(((HuffmanNode.Node<String>)t).c0, out, charTable);
writeStringHuffmanTable(((HuffmanNode.Node<String>)t).c1, out, charTable);
return;
}
String val = ((HuffmanNode.Leaf<String>)t).value;
out.write(false);
for(int k = 0; k < val.length(); k++)
charTable.write(val.charAt(k), out);
charTable.write('\uFFFE', out);
}
private void writeDiffedHuffmanTable(HuffmanNode<Integer> t, final BitOutputStream out, int levels) throws IOException {
if(levels == 0) {
t.writeTree(out);
return;
}
final FrequencyTable<Integer> diffFreq = new FrequencyTable<>();
final int[] lastVal = {-2};
t.accept(new HuffmanTreeVisitor<Integer>() {
@Override
public void visit(Leaf<Integer> n) {
int diff = n.value - lastVal[0];
diffFreq.add(diff);
lastVal[0] = n.value;
}
@Override
public void visit(Node<Integer> n) {
n.c0.accept(this);
n.c1.accept(this);
}
});
final HuffmanTable<Integer> difftable = HuffmanTable.build(diffFreq);
writeDiffedHuffmanTable(difftable.root, out, levels - 1);
lastVal[0] = -2;
t.accept(new HuffmanTreeVisitor<Integer>() {
@Override
public void visit(Leaf<Integer> n) {
int diff = n.value - lastVal[0];
try {
out.write(false);
difftable.write(diff, out);
} catch(IOException e) {
throw new RuntimeException(e);
}
lastVal[0] = n.value;
}
@Override
public void visit(Node<Integer> n) {
try {
out.write(true);
} catch(IOException e) {
throw new RuntimeException(e);
}
n.c0.accept(this);
n.c1.accept(this);
}
});
}
}