package org.deeplearning4j.examples.utilities; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import java.io.*; /** * Created by Alex on 27/01/2017. */ public class DataUtilities { private static final int BUFFER_SIZE = 4096; public static void extractTarGz(String filePath, String outputPath) throws IOException { int fileCount = 0; int dirCount = 0; System.out.print("Extracting files"); try(TarArchiveInputStream tais = new TarArchiveInputStream( new GzipCompressorInputStream( new BufferedInputStream( new FileInputStream(filePath))))){ TarArchiveEntry entry; /** Read the tar entries using the getNextEntry method **/ while ((entry = (TarArchiveEntry) tais.getNextEntry()) != null) { //System.out.println("Extracting file: " + entry.getName()); //Create directories as required if (entry.isDirectory()) { new File(outputPath + entry.getName()).mkdirs(); dirCount++; }else { int count; byte data[] = new byte[BUFFER_SIZE]; FileOutputStream fos = new FileOutputStream(outputPath + entry.getName()); BufferedOutputStream dest = new BufferedOutputStream(fos,BUFFER_SIZE); while ((count = tais.read(data, 0, BUFFER_SIZE)) != -1) { dest.write(data, 0, count); } dest.close(); fileCount++; } if(fileCount % 1000 == 0) System.out.print("."); } } System.out.println("\n" + fileCount + " files and " + dirCount + " directories extracted to: " + outputPath); } }