package hk.hku.cecid.edi.sfrm.archive; import hk.hku.cecid.edi.sfrm.util.PathHelper; import hk.hku.cecid.piazza.commons.io.FileSystem; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.Channels; import java.nio.channels.FileChannel; import java.nio.channels.WritableByteChannel; import java.util.Iterator; import java.util.List; import java.util.ArrayList; import org.apache.tools.tar.TarOutputStream; import org.apache.tools.tar.TarEntry; import org.apache.tools.tar.TarInputStream; /** * @author Patrick Yip * */ public class ArchiverTar extends ArchiverNULL { public static int TAR_ENTRY_SIZE = 512; private boolean writeDirEntry(File srcDir, String dirpath, TarOutputStream outs) throws IOException{ String filepath; String entryName; TarEntry tarEntry; filepath = srcDir.getAbsolutePath(); entryName = filepath.substring(dirpath.length() + 1) .replace('\\', '/'); if(!entryName.endsWith("/")){ entryName += "/"; } tarEntry = new TarEntry(entryName); outs.putNextEntry(tarEntry); outs.closeEntry(); return true; } private boolean writeEntries(File srcFile, String dirpath, TarOutputStream outs, WritableByteChannel tarChannel, boolean includeItself) throws IOException{ TarEntry tarEntry; String filepath; String entryName; long size; // the size of the files. long tSize; // the transfer size for transferTo calls. long aSize; // the actual transfer size for transferTo calls. long sPos; // the start position for transferTo calls. filepath = srcFile.getAbsolutePath(); entryName = filepath.substring(dirpath.length() + 1) .replace('\\', '/'); // Create zip entry tarEntry = new TarEntry(srcFile); tarEntry.setModTime(srcFile.lastModified()); // Tricky fix to make the tar entry back // to relative path from the source // directory. tarEntry.setName(entryName); // I/O Piping outs.putNextEntry(tarEntry); FileChannel fc = new FileInputStream(srcFile).getChannel(); // NIO Bugs // transferTo can only transfer up to Integer.MAX_VALUE -1; size = fc.size(); tSize = size; sPos = 0; do{ tSize = aSize = (size - sPos); if (tSize > Integer.MAX_VALUE) aSize = Integer.MAX_VALUE - 1; fc.transferTo(sPos, aSize, tarChannel); sPos += aSize; } while(tSize > Integer.MAX_VALUE); outs.closeEntry(); fc.close(); fc = null; // For gc return true; } public boolean compress(List<File> src, File dest, boolean includeItself) throws IOException{ FileOutputStream fos = new FileOutputStream(dest); TarOutputStream outs = new TarOutputStream(fos); WritableByteChannel tarChannel = Channels.newChannel(outs); outs.setLongFileMode(TarOutputStream.LONGFILE_GNU); File srcFile; // the file object to tar. for(int i=0; src.size() > i;i++){ File baseSrc = src.get(i); super.compress(baseSrc, dest, includeItself); Iterator allFiles = this.listAllToArchive(baseSrc); String dirpath = this.getBaseArchivingDirectory(baseSrc, includeItself); while (allFiles.hasNext()) { srcFile = (File) allFiles.next(); if(srcFile.isFile()) writeEntries(srcFile, dirpath, outs, tarChannel, includeItself); if(srcFile.isDirectory()){ writeDirEntry(srcFile, dirpath, outs); } } } outs.close(); outs = null; fos.close(); fos = null; return true; } /** * Compress the <code>src</code> to <code>dest</code> * in the archive form.<br><br> * * If the <code>src</code> is a file, then the resulting * archive contains only that file.<br><br> * * If the <code>src</code> is a directory, then the resulting * archive contains all files (recursively) in the <code> * src</code>. * * The <code>src</code> file sets will be archived to TAR * format which is comes from Apache Ant Tools Tar.<br><br> * * For more details, * read <a href="http://www.jajakarta.org/ant/ant-1.6.1/docs/mix/manual/api/">Apache Ant Tool Tar</a> * * @param src * The source of the file(s) to be archive. * @param dest * The destination of the arhived file. * @param includeItself * the source directory includes in the archive if it is * true, vice versa. * @since * 1.0.2 * @throws IOException * if any kind of I/O Erros * @return true if the operations run successfully. * @see hk.hku.cecid.edi.sfrm.archive.Archiver#compress(File, File, boolean) */ public boolean compress(File src, File dest, boolean includeItself) throws IOException{ super.compress(src, dest, includeItself); FileOutputStream fos = new FileOutputStream(dest); TarOutputStream outs = new TarOutputStream(fos); WritableByteChannel tarChannel = Channels.newChannel(outs); Iterator allFiles = this.listFilesToArchive(src); String dirpath = this.getBaseArchivingDirectory(src, includeItself); outs.setLongFileMode(TarOutputStream.LONGFILE_GNU); File srcFile; // the file object to tar. while (allFiles.hasNext()) { srcFile = (File) allFiles.next(); writeEntries(srcFile, dirpath, outs, tarChannel, includeItself); } outs.close(); outs = null; fos.close(); fos = null; return true; } /* (non-Javadoc) * @see hk.hku.cecid.piazza.commons.io.Archiver#compress(hk.hku.cecid.piazza.commons.io.FileSystem, java.io.File) */ public boolean compress(FileSystem src, File dest) throws IOException { return compress(src.getRoot(), dest, true); } /** * Extract the <code>archive</code> to the <code>dest</code> * directory.<br><br> * * @param archive * The archive to be extract. * @param dest * The destination directory extract to. * @since * 1.0.2 * @throws IOException * Any kind of I/O Errors. * @throws IllegalArgumentException * If the <code>dest</code> is not a directory. * @return true if the operations run successfully. */ public boolean extract(File archive, File dest) throws IOException{ super.extract(archive, dest); BufferedInputStream bis = new BufferedInputStream( new FileInputStream(archive)); TarInputStream tis = new TarInputStream(bis); int count = 0; for (;; count++) { TarEntry entry = tis.getNextEntry(); if (entry == null) { break; } String name = entry.getName(); name = name.replace('/', File.separatorChar); File destFile = new File(dest, name); if (entry.isDirectory()) { if (!destFile.exists()) { if (!destFile.mkdirs()) { throw new IOException( "Error making directory path :" + destFile.getPath()); } } } else { File subDir = new File(destFile.getParent()); if (!subDir.exists()) { if (!subDir.mkdirs()) { throw new IOException( "Error making directory path :" + subDir.getPath()); } } FileOutputStream out = new FileOutputStream(destFile); // FIXME: TUNE PLACE byte[] rdbuf = new byte[32 * 1024]; for (;;){ int numRead = tis.read(rdbuf); if (numRead == -1) break; out.write(rdbuf, 0, numRead); } out.close(); } } // For gc tis.close(); tis = null; bis.close(); bis = null; // NO FILE EXTRACTED, throw IOException if (count == 0) throw new IOException("At least one file should be a TAR."); return true; } /** * Extract the <code>archive</code> to the <code>dest</code> directory.<br> * <br> * * @param archive * The archive to be extract. * @param dest * The destination directory extract to. * @since 1.0.2 * @throws IOException * Any kind of I/O Errors. * @return true if the operations run successfully. */ public boolean extract(File archive, FileSystem dest) throws IOException { // TODO Auto-generated method stub return extract(archive, dest.getRoot()); } /** * Guess how big is the compressed file without * compressing actually. The algorithm of guessing the tar size as follow:<br> * For each of file Each header size is TAR_ENTRY_SIZE bytes, and for the data content block. It use TAR_ENTRY_SIZE * as a block of data. If for last block of data is not TAR_ENTRY_SIZE, then the rest will padding with the empty bytes. * Such that the final guessed size is ceil((file_length/TAR_ENTRY_SIZE)+1)*TAR_ENTRY_SIZE. More details of tar file format can * found from <a href="http://en.wikipedia.org/wiki/Tarball">this</a>. * * @param src * The source of the file(s) to be archive. * @return guessed file size in byte * @since * 1.0.3 * @throws NullPointerException * if the <code>src</code> is null. * @throws IOException * if one of the file in the folders * does not exist in some reason. */ public long guessCompressedSize(File src) throws IOException { Iterator allFiles = listFilesToArchive(src); long size = 0; while(allFiles.hasNext()){ //Need to query about why it is the RandomAccessFile RandomAccessFile file = new RandomAccessFile((File)allFiles.next(), "r"); size += (Math.ceil((double)file.length()/TAR_ENTRY_SIZE)+1)*TAR_ENTRY_SIZE; file.close(); } return size; } /* (non-Javadoc) * @see hk.hku.cecid.piazza.commons.io.Archiver#guessCompressedSize(hk.hku.cecid.piazza.commons.io.FileSystem) */ public long guessCompressedSize(FileSystem src) throws IOException { return guessCompressedSize(src.getRoot()); } /* (non-Javadoc) * @see hk.hku.cecid.piazza.commons.io.Archiver#isSupportArchive(java.io.File) */ public boolean isSupportArchive(File archive) { return PathHelper.getExtension(archive.getAbsolutePath()).equalsIgnoreCase("TAR"); } /** * List the files inside the <code>archive</code>.<br> * * This operation is quite slow and pending to optimize. * * @param archive * The archive to be listed. * @since * 1.0.2 * @return * A list of java.io.File object that represents * each entry in the archive. */ public List listAsFile(File archive) throws IOException{ TarInputStream tarInStream = new TarInputStream(new FileInputStream(archive)); TarEntry entry = null; ArrayList list = new ArrayList(); while((entry = tarInStream.getNextEntry())!=null){ list.add(entry.getFile()); } tarInStream.close(); return list; } /** * List the files inside the <code>archive</code>. * * @param archive * The archive to be listed. * @since * 1.0.2 * @return * A list of String objects that represents * the filename of each entry in the * archive. */ public List listAsFilename(File archive) throws IOException{ TarInputStream tarInStream = new TarInputStream(new FileInputStream(archive)); TarEntry entry = null; ArrayList list = new ArrayList(); while((entry = tarInStream.getNextEntry())!=null){ list.add(entry.getName()); } tarInStream.close(); return list; } }