/*
* Copyright 2004-2014 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.dev.fs;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import java.util.Random;
import org.h2.mvstore.Cursor;
import org.h2.mvstore.DataUtils;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.h2.store.fs.FileUtils;
import org.h2.util.New;
/**
* An archive tool to compress directories, using the MVStore backend.
*/
public class ArchiveToolStore {
private static final int[] RANDOM = new int[256];
private static final int MB = 1000 * 1000;
private long lastTime;
private long start;
private int bucket;
private String fileName;
static {
Random r = new Random(1);
for (int i = 0; i < RANDOM.length; i++) {
RANDOM[i] = r.nextInt();
}
}
/**
* Run the tool.
*
* @param args the command line arguments
*/
public static void main(String... args) throws Exception {
ArchiveToolStore app = new ArchiveToolStore();
String arg = args.length != 3 ? null : args[0];
if ("-compress".equals(arg)) {
app.fileName = args[1];
app.compress(args[2]);
} else if ("-extract".equals(arg)) {
app.fileName = args[1];
app.expand(args[2]);
} else {
System.out.println("Command line options:");
System.out.println("-compress <file> <sourceDir>");
System.out.println("-extract <file> <targetDir>");
}
}
private void compress(String sourceDir) throws Exception {
start();
long tempSize = 8 * 1024 * 1024;
String tempFileName = fileName + ".temp";
ArrayList<String> fileNames = New.arrayList();
System.out.println("Reading the file list");
long totalSize = addFiles(sourceDir, fileNames);
System.out.println("Compressing " + totalSize / MB + " MB");
FileUtils.delete(tempFileName);
FileUtils.delete(fileName);
MVStore storeTemp = new MVStore.Builder().
fileName(tempFileName).
autoCommitDisabled().
open();
final MVStore store = new MVStore.Builder().
fileName(fileName).
pageSplitSize(2 * 1024 * 1024).
compressHigh().
autoCommitDisabled().
open();
MVMap<String, int[]> filesTemp = storeTemp.openMap("files");
long currentSize = 0;
int segmentId = 1;
int segmentLength = 0;
ByteBuffer buff = ByteBuffer.allocate(1024 * 1024);
for (String s : fileNames) {
String name = s.substring(sourceDir.length() + 1);
if (FileUtils.isDirectory(s)) {
// directory
filesTemp.put(name, new int[1]);
continue;
}
buff.clear();
buff.flip();
ArrayList<Integer> posList = new ArrayList<Integer>();
FileChannel fc = FileUtils.open(s, "r");
try {
boolean eof = false;
while (true) {
while (!eof && buff.remaining() < 512 * 1024) {
int remaining = buff.remaining();
buff.compact();
buff.position(remaining);
int l = fc.read(buff);
if (l < 0) {
eof = true;
}
buff.flip();
}
if (buff.remaining() == 0) {
break;
}
int c = getChunkLength(buff.array(), buff.position(),
buff.limit()) - buff.position();
byte[] bytes = new byte[c];
System.arraycopy(buff.array(), buff.position(), bytes, 0, c);
buff.position(buff.position() + c);
int[] key = getKey(bucket, bytes);
key[3] = segmentId;
while (true) {
MVMap<int[], byte[]> data = storeTemp.
openMap("data" + segmentId);
byte[] old = data.get(key);
if (old == null) {
// new
data.put(key, bytes);
break;
}
if (Arrays.equals(old, bytes)) {
// duplicate
break;
}
// same checksum: change checksum
key[2]++;
}
for (int i = 0; i < key.length; i++) {
posList.add(key[i]);
}
segmentLength += c;
currentSize += c;
if (segmentLength > tempSize) {
storeTemp.commit();
segmentId++;
segmentLength = 0;
}
printProgress(0, 50, currentSize, totalSize);
}
} finally {
fc.close();
}
int[] posArray = new int[posList.size()];
for (int i = 0; i < posList.size(); i++) {
posArray[i] = posList.get(i);
}
filesTemp.put(name, posArray);
}
storeTemp.commit();
ArrayList<Cursor<int[], byte[]>> list = New.arrayList();
totalSize = 0;
for (int i = 1; i <= segmentId; i++) {
MVMap<int[], byte[]> data = storeTemp.openMap("data" + i);
totalSize += data.sizeAsLong();
Cursor<int[], byte[]> c = data.cursor(null);
if (c.hasNext()) {
c.next();
list.add(c);
}
}
segmentId = 1;
segmentLength = 0;
currentSize = 0;
MVMap<int[], byte[]> data = store.openMap("data" + segmentId);
MVMap<int[], Boolean> keepSegment = storeTemp.openMap("keep");
while (list.size() > 0) {
Collections.sort(list, new Comparator<Cursor<int[], byte[]>>() {
@Override
public int compare(Cursor<int[], byte[]> o1,
Cursor<int[], byte[]> o2) {
int[] k1 = o1.getKey();
int[] k2 = o2.getKey();
int comp = 0;
for (int i = 0; i < k1.length - 1; i++) {
long x1 = k1[i];
long x2 = k2[i];
if (x1 > x2) {
comp = 1;
break;
} else if (x1 < x2) {
comp = -1;
break;
}
}
return comp;
}
});
Cursor<int[], byte[]> top = list.get(0);
int[] key = top.getKey();
byte[] bytes = top.getValue();
int[] k2 = Arrays.copyOf(key, key.length);
k2[key.length - 1] = 0;
// TODO this lookup can be avoided
// if we remember the last entry with k[..] = 0
byte[] old = data.get(k2);
if (old == null) {
if (segmentLength > tempSize) {
// switch only for new entries
// where segmentId is 0,
// so that entries with the same
// key but different segmentId
// are in the same segment
store.commit();
segmentLength = 0;
segmentId++;
data = store.openMap("data" + segmentId);
}
key = k2;
// new entry
data.put(key, bytes);
segmentLength += bytes.length;
} else if (Arrays.equals(old, bytes)) {
// duplicate
} else {
// almost a duplicate:
// keep segment id
keepSegment.put(key, Boolean.TRUE);
data.put(key, bytes);
segmentLength += bytes.length;
}
if (!top.hasNext()) {
list.remove(0);
} else {
top.next();
}
currentSize++;
printProgress(50, 100, currentSize, totalSize);
}
MVMap<String, int[]> files = store.openMap("files");
for (Entry<String, int[]> e : filesTemp.entrySet()) {
String k = e.getKey();
int[] ids = e.getValue();
if (ids.length == 1) {
// directory
files.put(k, ids);
continue;
}
int[] newIds = Arrays.copyOf(ids, ids.length);
for (int i = 0; i < ids.length; i += 4) {
int[] id = new int[4];
id[0] = ids[i];
id[1] = ids[i + 1];
id[2] = ids[i + 2];
id[3] = ids[i + 3];
if (!keepSegment.containsKey(id)) {
newIds[i + 3] = 0;
}
}
files.put(k, newIds);
}
store.commit();
storeTemp.close();
FileUtils.delete(tempFileName);
store.close();
System.out.println();
System.out.println("Compressed to " +
FileUtils.size(fileName) / MB + " MB");
printDone();
}
private void start() {
this.start = System.nanoTime();
this.lastTime = start;
}
private void printProgress(int low, int high, long current, long total) {
long now = System.nanoTime();
if (now - lastTime > TimeUnit.SECONDS.toNanos(5)) {
System.out.print((low + (high - low) * current / total) + "% ");
lastTime = now;
}
}
private void printDone() {
System.out.println("Done in " +
TimeUnit.NANOSECONDS.toSeconds(System.nanoTime() - start) +
" seconds");
}
private static long addFiles(String dir, ArrayList<String> list) {
long size = 0;
for (String s : FileUtils.newDirectoryStream(dir)) {
if (FileUtils.isDirectory(s)) {
size += addFiles(s, list);
} else {
size += FileUtils.size(s);
}
list.add(s);
}
return size;
}
private void expand(String targetDir) throws Exception {
start();
long tempSize = 8 * 1024 * 1024;
String tempFileName = fileName + ".temp";
FileUtils.createDirectories(targetDir);
MVStore store = new MVStore.Builder().
fileName(fileName).open();
MVMap<String, int[]> files = store.openMap("files");
System.out.println("Extracting " + files.size() + " files");
MVStore storeTemp = null;
FileUtils.delete(tempFileName);
long totalSize = 0;
int lastSegment = 0;
for (int i = 1;; i++) {
if (!store.hasMap("data" + i)) {
lastSegment = i - 1;
break;
}
}
storeTemp = new MVStore.Builder().
fileName(tempFileName).
autoCommitDisabled().
open();
MVMap<Integer, String> fileNames = storeTemp.openMap("fileNames");
MVMap<String, int[]> filesTemp = storeTemp.openMap("files");
int fileId = 0;
for (Entry<String, int[]> e : files.entrySet()) {
fileNames.put(fileId++, e.getKey());
filesTemp.put(e.getKey(), e.getValue());
totalSize += e.getValue().length / 4;
}
storeTemp.commit();
files = filesTemp;
long currentSize = 0;
int chunkSize = 0;
for (int s = 1; s <= lastSegment; s++) {
MVMap<int[], byte[]> segmentData = store.openMap("data" + s);
// key: fileId, blockId; value: data
MVMap<int[], byte[]> fileData = storeTemp.openMap("fileData" + s);
fileId = 0;
for (Entry<String, int[]> e : files.entrySet()) {
int[] keys = e.getValue();
if (keys.length == 1) {
fileId++;
continue;
}
for (int i = 0; i < keys.length; i += 4) {
int[] dk = new int[4];
dk[0] = keys[i];
dk[1] = keys[i + 1];
dk[2] = keys[i + 2];
dk[3] = keys[i + 3];
byte[] bytes = segmentData.get(dk);
if (bytes != null) {
int[] k = new int[] { fileId, i / 4 };
fileData.put(k, bytes);
chunkSize += bytes.length;
if (chunkSize > tempSize) {
storeTemp.commit();
chunkSize = 0;
}
currentSize++;
printProgress(0, 50, currentSize, totalSize);
}
}
fileId++;
}
storeTemp.commit();
}
ArrayList<Cursor<int[], byte[]>> list = New.arrayList();
totalSize = 0;
currentSize = 0;
for (int i = 1; i <= lastSegment; i++) {
MVMap<int[], byte[]> fileData = storeTemp.openMap("fileData" + i);
totalSize += fileData.sizeAsLong();
Cursor<int[], byte[]> c = fileData.cursor(null);
if (c.hasNext()) {
c.next();
list.add(c);
}
}
String lastFileName = null;
OutputStream file = null;
int[] lastKey = null;
while (list.size() > 0) {
Collections.sort(list, new Comparator<Cursor<int[], byte[]>>() {
@Override
public int compare(Cursor<int[], byte[]> o1,
Cursor<int[], byte[]> o2) {
int[] k1 = o1.getKey();
int[] k2 = o2.getKey();
int comp = 0;
for (int i = 0; i < k1.length; i++) {
long x1 = k1[i];
long x2 = k2[i];
if (x1 > x2) {
comp = 1;
break;
} else if (x1 < x2) {
comp = -1;
break;
}
}
return comp;
}
});
Cursor<int[], byte[]> top = list.get(0);
int[] key = top.getKey();
byte[] bytes = top.getValue();
String f = targetDir + "/" + fileNames.get(key[0]);
if (!f.equals(lastFileName)) {
if (file != null) {
file.close();
}
String p = FileUtils.getParent(f);
if (p != null) {
FileUtils.createDirectories(p);
}
file = new BufferedOutputStream(new FileOutputStream(f));
lastFileName = f;
} else {
if (key[0] != lastKey[0] || key[1] != lastKey[1] + 1) {
System.out.println("missing entry after " + Arrays.toString(lastKey));
}
}
lastKey = key;
file.write(bytes);
if (!top.hasNext()) {
list.remove(0);
} else {
top.next();
}
currentSize++;
printProgress(50, 100, currentSize, totalSize);
}
for (Entry<String, int[]> e : files.entrySet()) {
String f = targetDir + "/" + e.getKey();
int[] keys = e.getValue();
if (keys.length == 1) {
FileUtils.createDirectories(f);
} else if (keys.length == 0) {
// empty file
String p = FileUtils.getParent(f);
if (p != null) {
FileUtils.createDirectories(p);
}
new FileOutputStream(f).close();
}
}
if (file != null) {
file.close();
}
store.close();
storeTemp.close();
FileUtils.delete(tempFileName);
System.out.println();
printDone();
}
private int getChunkLength(byte[] data, int start, int maxPos) {
int minLen = 4 * 1024;
int mask = 4 * 1024 - 1;
int factor = 31;
int hash = 0, mul = 1, offset = 8;
int min = Integer.MAX_VALUE;
int max = Integer.MIN_VALUE;
int i = start;
int[] rand = RANDOM;
for (int j = 0; i < maxPos; i++, j++) {
hash = hash * factor + rand[data[i] & 255];
if (j >= offset) {
hash -= mul * rand[data[i - offset] & 255];
} else {
mul *= factor;
}
if (hash < min) {
min = hash;
}
if (hash > max) {
max = hash;
}
if (j > minLen) {
if (j > minLen * 4) {
break;
}
if ((hash & mask) == 1) {
break;
}
}
}
bucket = min;
return i;
}
private static int[] getKey(int bucket, byte[] buff) {
int[] key = new int[4];
int[] counts = new int[8];
int len = buff.length;
for (int i = 0; i < len; i++) {
int x = buff[i] & 0xff;
counts[x >> 5]++;
}
int cs = 0;
for (int i = 0; i < 8; i++) {
cs *= 2;
if (counts[i] > (len / 32)) {
cs += 1;
}
}
key[0] = cs;
key[1] = bucket;
key[2] = DataUtils.getFletcher32(buff, buff.length);
return key;
}
}