package com.github.zangxiaoqiang.common.hadoop; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.util.LineReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class HadoopUtil { private static final Logger log = LoggerFactory.getLogger(HadoopUtil.class); private static FileSystem fs; private static CompressionCodecFactory codecFactory; public static void setFileSystem(FileSystem fileSystem) { fs = fileSystem; } static { init(); } public static void init() { Configuration conf = new Configuration(); while (!init(conf)) { log.error("Could not init HDFS by time:" + System.currentTimeMillis()); try { Thread.sleep(1000); } catch (InterruptedException e) { log.error(e.getMessage(), e); } } } private static boolean init(Configuration conf) { try { fs = FileSystem.newInstance(conf); codecFactory = new CompressionCodecFactory(conf); return fs != null && codecFactory != null; } catch (Exception e) { log.error("fail to connect to hadoop, sleep 1s", e); } return false; } public static FileSystem getFileSystem() { return fs; } public static boolean existPath(Path path) throws IOException { return getFileSystem().exists(path); } public static long getLastModifiedTime(String path) throws IOException { return getFileSystem().getFileStatus(new Path(path)) .getModificationTime(); } public static InputStream openFile(String path) throws IOException { return openFile(new Path(path)); } public static InputStream openFile(Path path) throws IOException { FileSystem fs = getFileSystem(); if (!fs.exists(path)) { return null; } return fs.open(path); } public static boolean isDir(String path) throws IOException{ Path p = new Path(path); return fs.exists(p) && !fs.isFile(p); } public static List<String> getSubDirs(Path parentDir) throws IOException { if (!existPath(parentDir)) { return null; } FileSystem fs = getFileSystem(); List<String> subPaths = new ArrayList<String>(); if (!fs.isFile(parentDir)) { FileStatus[] fsArray = fs.listStatus(parentDir); for (FileStatus status : fsArray) { subPaths.add(status.getPath().getName()); } return subPaths; } return null; } public static List<String> getSubDirs(String parentDir) throws IOException { return getSubDirs(new Path(parentDir)); } /** * @param parentDir * @return all files and folder under the parentDir * @throws IOException */ public static FileStatus[] listAll(Path parentDir) throws IOException { if (parentDir == null) { return null; } if (!existPath(parentDir)) { return null; } FileSystem fs = getFileSystem(); FileStatus fss = fs.getFileStatus(parentDir); if (fss.isDirectory()) { return fs.listStatus(parentDir); } return null; } /** * @param path * @return the codec which can be used for the specified path */ public static CompressionCodec getCodec(Path path) { return codecFactory.getCodec(path); } /** * Get the lineReader which can be used to read uncompressed lines from the * path. Current, Hive only support '\n' as the lines terminator. * * @param path * @return * @throws IOException */ public static LineReader getLineReader(Path path) throws IOException { InputStream inputStream = openFile(path); CompressionCodec codec = getCodec(path); return new LineReader(codec == null ? inputStream : codec.createInputStream(inputStream)); } public static long getFileTotalSize(String path) throws IOException { if(path == null || path.isEmpty()){ return 0; } Path hdfsPath = new Path(path); FileSystem fs = getFileSystem(); FileStatus stauts = fs.getFileStatus(hdfsPath); return getFileTotalSize(fs, stauts); } public static long getFileTotalSize(List<String> paths) throws IOException { long allSize = 0; for (String path : paths) { allSize += getFileTotalSize(path); } return allSize; } public static long getFileTotalSize(FileSystem fs, FileStatus stauts) throws IOException { long totalSize = 0; if (!stauts.isDirectory()) { totalSize = stauts.getLen(); } else { FileStatus[] stautsArray = fs.listStatus(stauts.getPath()); for (FileStatus subStauts : stautsArray) { totalSize += getFileTotalSize(fs, subStauts); } } return totalSize; } public static void write(String uri, String value) throws IOException { Path path = new Path(uri); FSDataOutputStream out = null; FileSystem fs = getFileSystem(); if (fs.exists(path)) { out = fs.append(path); } else { out = fs.create(path); } out.writeBytes(value); out.close(); } public static boolean deleteFile(String filePath) throws IOException { Path path = new Path(filePath); if(!fs.exists(path)){ return true; } return fs.delete(path, false); } public static boolean deleteFolder(String filePath) throws IOException { Path path = new Path(filePath); if(!fs.exists(path)){ return true; } return fs.delete(path, true); } }