package com.alimama.mdrill.utils; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStream; import java.io.OutputStream; import java.util.Enumeration; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.alimama.mdrill.utils.zip.ZipEntry; import com.alimama.mdrill.utils.zip.ZipFile; import com.alimama.mdrill.utils.zip.ZipOutputStream; /** * <p> * ZIP工具包 * </p> * <p> * 依赖:ant-1.7.1.jar * </p> * * @author IceWee * @date 2012-5-26 * @version 1.0 */ public class ZipUtils { /** * 使用GBK编码可以避免压缩中文文件名乱码 */ private static final String CHINESE_CHARSET = "GBK"; /** * 文件读取缓冲区大小 */ private static final int CACHE_SIZE = 10240; /** * <p> * 压缩文件 * </p> * * @param sourceFolder 压缩文件夹 * @param zipFilePath 压缩文件输出路径 * @throws Exception */ public static void zip(FileSystem fs,String sourceFolder1, FileSystem fs2,String zipFilePath2) throws Exception { OutputStream out = fs2.create(new Path(zipFilePath2),true); BufferedOutputStream bos = new BufferedOutputStream(out); ZipOutputStream zos = new ZipOutputStream(bos); // 解决中文文件名乱码 zos.setEncoding(CHINESE_CHARSET); Path basePath = null; Path src=new Path(sourceFolder1); FileStatus f=fs.getFileStatus(src); if (f.isDir()) { basePath =f.getPath(); } else { basePath = f.getPath().getParent(); } zipFile(fs,f, basePath, zos); zos.closeEntry(); zos.close(); bos.close(); out.close(); } /** * <p> * 递归压缩文件 * </p> * * @param parentFile * @param basePath * @param zos * @throws Exception */ private static void zipFile(FileSystem fs,FileStatus parentFile, Path basePath, ZipOutputStream zos) throws Exception { FileStatus[] files = new FileStatus[0]; if (parentFile.isDir()) { files = fs.listStatus(parentFile.getPath()); } else { files = new FileStatus[1]; files[0] = parentFile; } String pathName; InputStream is; BufferedInputStream bis; byte[] cache = new byte[CACHE_SIZE]; for (FileStatus file : files) { if (file.isDir()) { pathName = file.getPath().toString().substring(basePath.toString().length() + 1) + "/"; zos.putNextEntry(new ZipEntry(pathName)); zipFile(fs,file, basePath, zos); } else { pathName = file.getPath().toString().substring(basePath.toString().length() + 1); is = fs.open(file.getPath()); bis = new BufferedInputStream(is); zos.putNextEntry(new ZipEntry(pathName)); int nRead = 0; while ((nRead = bis.read(cache, 0, CACHE_SIZE)) != -1) { zos.write(cache, 0, nRead); } bis.close(); is.close(); } } } /** * <p> * 解压压缩包 * </p> * * @param zipFilePath 压缩文件路径 * @param destDir 压缩包释放目录 * @throws Exception */ public static void unZip(FileSystem fs,String zipFilePath, FileSystem fs2,String destDir) throws Exception { FSDataInputStream in=fs.open(new Path(zipFilePath)); long length = fs.getFileStatus(new Path(zipFilePath)).getLen(); ZipFile zipFile = new ZipFile(in,length, CHINESE_CHARSET,true); Enumeration<?> emu = zipFile.getEntries(); BufferedInputStream bis; FSDataOutputStream fos; BufferedOutputStream bos; Path file, parentFile; ZipEntry entry; byte[] cache = new byte[CACHE_SIZE]; while (emu.hasMoreElements()) { entry = (ZipEntry) emu.nextElement(); if (entry.isDirectory()) { fs2.mkdirs(new Path(destDir , entry.getName())); continue; } bis = new BufferedInputStream(zipFile.getInputStream(entry)); file = new Path(destDir , entry.getName()); parentFile = file.getParent(); if (parentFile != null && (!fs2.exists(parentFile))) { fs2.mkdirs(parentFile); } fos =fs2.create(file,true); bos = new BufferedOutputStream(fos, CACHE_SIZE); int nRead = 0; while ((nRead = bis.read(cache, 0, CACHE_SIZE)) != -1) { fos.write(cache, 0, nRead); } bos.flush(); bos.close(); fos.close(); bis.close(); } zipFile.close(); } }