package org.deeplearning4j.examples.utilities;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import java.io.*;
/**
* Created by Alex on 27/01/2017.
*/
public class DataUtilities {
private static final int BUFFER_SIZE = 4096;
public static void extractTarGz(String filePath, String outputPath) throws IOException {
int fileCount = 0;
int dirCount = 0;
System.out.print("Extracting files");
try(TarArchiveInputStream tais = new TarArchiveInputStream(
new GzipCompressorInputStream( new BufferedInputStream( new FileInputStream(filePath))))){
TarArchiveEntry entry;
/** Read the tar entries using the getNextEntry method **/
while ((entry = (TarArchiveEntry) tais.getNextEntry()) != null) {
//System.out.println("Extracting file: " + entry.getName());
//Create directories as required
if (entry.isDirectory()) {
new File(outputPath + entry.getName()).mkdirs();
dirCount++;
}else {
int count;
byte data[] = new byte[BUFFER_SIZE];
FileOutputStream fos = new FileOutputStream(outputPath + entry.getName());
BufferedOutputStream dest = new BufferedOutputStream(fos,BUFFER_SIZE);
while ((count = tais.read(data, 0, BUFFER_SIZE)) != -1) {
dest.write(data, 0, count);
}
dest.close();
fileCount++;
}
if(fileCount % 1000 == 0) System.out.print(".");
}
}
System.out.println("\n" + fileCount + " files and " + dirCount + " directories extracted to: " + outputPath);
}
}