package org.openstack.atlas.logs.itest; import java.util.Arrays; import java.util.logging.Level; import java.util.logging.Logger; import org.openstack.atlas.logs.hadoop.sequencefiles.SequenceFileReaderException; import org.openstack.atlas.util.staticutils.StaticStringUtils; import org.openstack.atlas.util.staticutils.StaticFileUtils; import org.openstack.atlas.util.staticutils.StaticDateTimeUtils; import org.openstack.atlas.config.HadoopLogsConfigs; import com.hadoop.compression.lzo.LzoIndex; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.lang.reflect.Method; import java.net.URL; import java.net.URLClassLoader; import java.util.ArrayList; import java.util.List; import com.hadoop.compression.lzo.LzopCodec; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataOutputStream; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.io.compress.CompressionInputStream; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.openstack.atlas.config.LbLogsConfiguration; import org.openstack.atlas.logs.hadoop.jobs.HadoopJob; import org.openstack.atlas.logs.hadoop.jobs.HadoopLogSplitterJob; import org.openstack.atlas.logs.hadoop.writables.LogMapperOutputValue; import org.openstack.atlas.logs.hadoop.writables.LogReducerOutputValue; import org.openstack.atlas.util.debug.Debug; import org.joda.time.DateTime; import org.openstack.atlas.logs.hadoop.util.HdfsUtils; import org.openstack.atlas.logs.hadoop.util.LogChopper; import org.openstack.atlas.util.debug.SillyTimer; public class HdfsCli { private static final double GIGBYTES_DOUBLE = 1000.0 * 1000.0 * 1000.0; private static final double MILLISECOND_COEF = 0.001; private static final Pattern zipPattern = Pattern.compile(".*\\.zip$"); private static final int LARGEBUFFERSIZE = 8 * 1024 * 1024; private static final int PAGESIZE = 4096; private static final int HDFSBUFFSIZE = 512 * 1024; private static final int ONEMEG = 1024 * 1024; private static final int BUFFER_SIZE = 256 * 1024; private static final String LB_LOGS_SPLIT = "lb_logs_split"; private static List<String> jarFiles = new ArrayList<String>(); private static URLClassLoader jobClassLoader = null; private static String jobJarName = ""; private static SillyTimer timer = new SillyTimer(); public static void main(String[] argv) throws IOException, InterruptedException { System.out.printf("JAVA_LIBRARY_PATH=%s\n", System.getProperty("java.library.path")); timer.start(); String cmdLine; String[] args; if (argv.length >= 1) { System.out.printf("Useing confFile %s\n", argv[0]); HadoopLogsConfigs.resetConfigs(argv[0]); } else { System.out.printf("useing confFile %s\n", LbLogsConfiguration.defaultConfigurationLocation); } HdfsUtils hdfsUtils = HadoopLogsConfigs.getHdfsUtils(); String user = HadoopLogsConfigs.getHdfsUserName(); Configuration conf = HadoopLogsConfigs.getHadoopConfiguration(); HadoopLogsConfigs.markJobsJarAsAlreadyCopied(); URI defaultHdfsUri = FileSystem.getDefaultUri(conf); FileSystem fs = hdfsUtils.getFileSystem(); System.setProperty(CommonItestStatic.HDUNAME, user); FileSystem lfs = hdfsUtils.getLocalFileSystem(); BufferedReader stdin = StaticFileUtils.inputStreamToBufferedReader(System.in, BUFFER_SIZE); System.out.printf("\n"); List<WastedBytesBlock> wastedBlocks = new ArrayList<WastedBytesBlock>(); while (true) { try { System.out.printf("lbaas_hadoop_client %s> ", fs.getWorkingDirectory().toUri().toString()); cmdLine = stdin.readLine(); if (cmdLine == null) { break; // EOF } args = CommonItestStatic.stripBlankArgs(cmdLine); if (args.length < 1) { System.out.printf("Usage is help\n"); continue; } String cmd = args[0]; if (cmd.equals("help")) { System.out.printf("\n"); System.out.printf("Usage is\n"); System.out.printf("help\n"); System.out.printf("cat <path>\n"); System.out.printf("classInfo <classPath>\n"); System.out.printf("cd <path> #Change remote directory\n"); System.out.printf("cdin [dateKey] #Change to the input directory\n"); System.out.printf("cdout[dateKey] #Change to the output directory\n"); System.out.printf("chmod <octPerms> <path>\n"); System.out.printf("chown <user> <group> <path>\n"); System.out.printf("chuser <userName>\n"); System.out.printf("compressLzo <srcPath> <dstFile> [buffSize]#Compress lzo file\n"); System.out.printf("countLines <zeusFile> <nTicks> [buffSize]\n"); System.out.printf("countzipbytes <ndays> #Count the number of bytes in all the zips\n"); System.out.printf("countlzobytes <ndays> #Count the number of bytes in all the lzos\n"); System.out.printf("cpfl <srcPath 1local> <dstPath remote> [reps] [blocksize]#copy from local\n"); System.out.printf("cpld <srcDir> <dstDir> args [reps] [blocksize]\n"); System.out.printf("cpLocal <localSrc> <localDst> [buffsize] #None hadoop file copy\n"); System.out.printf("cptl <srcPath remote> <dstPath local> #Copy to Local\n"); System.out.printf("cpjj #Copy the jobs jar\n"); System.out.printf("cpjjf #Mark the job jar as already copied\n"); System.out.printf("cpjju #Mark the the job jar as not yet copied\n"); System.out.printf("diffConfig <confA.xml> <confB.xml># Compare the differences between the configs\n"); System.out.printf("du #Get number of free space on HDFS\n"); System.out.printf("dumpConfig <outFile.xml> <confIn.xml..> #Dump config to outfile\n"); System.out.printf("exit\n"); System.out.printf("findCp <className> #find class path via reflection\n"); System.out.printf("gc\n"); System.out.printf("getlzo <DownloadDir> <dateHour> #Download the Lzo for the given hour\n"); System.out.printf("getzip <DownloadDir> <h=hourKey> <l=LoadbalancerId> #Download the zip file from Hdfs for the specifie day and loadbalancer\n"); System.out.printf("homedir\n"); System.out.printf("indexLzo <FileName>\n"); System.out.printf("joinPath <path1> ...<pathN> #Test the join the paths together skipping double slashes.\n"); System.out.printf("lineIndex <fileName> #Index the line numbers in the file\n"); System.out.printf("lslzo [hourKey] #List the lzos in the input directory\n"); System.out.printf("ls [path] #List hdfs files\n"); System.out.printf("lsin [hourKey]#List the hourkeys in the input directory usefull because ls prints long form\n"); System.out.printf("lsout #List the hourKeys in the output directory usefull because ls prints long form\n"); System.out.printf("lsr [path] #List hdfs files recursivly\n"); System.out.printf("lszip [l=lid] [h=hour] [m=missing]#List all zip files in the HDFS ourput directory for hourh or and the given lid\n"); System.out.printf("dlzip (<hourkey>|<startHour> <endHour>) [l=lid] [a=accoundId] #Download all zip files in local cache directory for the given keys\n"); System.out.printf("ullzo <file> #Upload the lzo file to hdfs\n"); System.out.printf("mem\n"); System.out.printf("mkdir <path>\n"); System.out.printf("printReducers <hdfsDir> #Display the contents of the reducer output\n"); System.out.printf("pwd #print remote current directory\n"); System.out.printf("rebasePath <srcBase> <srcPath> <dstPath> #Show what the rebasePath method in StaticFileUtils would do\n"); System.out.printf("recompressIndex <srcFile> <hdfsDstFile> #Recompress and index lzo file and upload to hdfs\n"); System.out.printf("rmdir <path>\n"); System.out.printf("rmin <daysAgo> #Remove the input directories for entries that are older then daysAgo\n"); System.out.printf("rmout <daysAgo> #Remove the output directories for enrties that are older then daysAgo\n"); System.out.printf("rm <path>\n"); System.out.printf("runJob <jobDriverClass>\n"); System.out.printf("runSplit <hourKey> #Run the HadoopSplitterJob for the specified hourkey\n"); System.out.printf("runMain <class> args0..N\n"); System.out.printf("uploadLzo <lzoFile> #Upload the the lzo file\n"); System.out.printf("startTimer #Start timer\n"); System.out.printf("stopTimer #Stop timer\n"); System.out.printf("readTimer #Read timer\n"); System.out.printf("resetTimer #Reset the timer\n"); System.out.printf("scanLines <logFile> <nLines> <nTicks>\n"); System.out.printf("scanLinesLzo <logFile> <nLines> <nTicks>\n"); System.out.printf("scanhdfszips <yyyymmddhh> <yyyymmddhh> [scanparts=<true|false>]#Scan the hadoop output directories and count how many zips where found between the 2 days\n"); System.out.printf("setJobJar <jobJar> #set Jar file to classLoader\n"); System.out.printf("setReplCount <FilePath> <nReps> #Set the replication count for this file\n"); System.out.printf("spon #Enable speculative execution\n"); System.out.printf("spoff #Disable speculative execution\n"); System.out.printf("showCl <className> #Show class loader info via reflection\n"); System.out.printf("showConfig #Show hadoop configs\n"); System.out.printf("showCrc <fileName> #Show crc value that would be reported by Zip\n"); System.out.printf("wb <size> #Wast nbytes to experiment with the Garbage colector\n"); System.out.printf("fb #Free all bytes wasted so far"); System.out.printf("wbs #List the number of bytes in the wasted byte Cuffer\n"); System.out.printf("whoami\n"); } else if (cmd.equals("resetTimer")) { System.out.printf("Timer reset\n"); timer.reset(); } else if (cmd.equals("startTimer")) { System.out.printf("Starting timer\n"); timer.start(); } else if (cmd.equals("stopTimer")) { System.out.printf("Stopping timer\n"); timer.stop(); } else if (cmd.equals("readTimer")) { System.out.printf("timer now reads %f seconds\n", timer.readSeconds()); } else if (cmd.equals("cpjjf")) { System.out.printf("Marking the jobs jar as already copied\n"); HadoopLogsConfigs.markJobsJarAsAlreadyCopied(); } else if (cmd.equals("cpjju")) { System.out.printf("Marking the jobs jar as not yet copied\n"); HadoopLogsConfigs.markJobsJarAsUnCopied(); } else if (cmd.equals("spoff")) { System.out.printf("Attempting to disable speculative execution\n"); Configuration editConf; editConf = HadoopLogsConfigs.getHadoopConfiguration(); editConf.setBoolean("mapred.reduce.tasks.speculative.execution", false); editConf.setBoolean("mapred.map.tasks.speculative.execution", false); HadoopLogsConfigs.setHadoopConfiguration(editConf); } else if (cmd.equals("spon")) { System.out.printf("Attempting to enable speculative execution\n"); Configuration editConf; editConf = HadoopLogsConfigs.getHadoopConfiguration(); editConf.setBoolean("mapred.reduce.tasks.speculative.execution", true); editConf.setBoolean("mapred.map.tasks.speculative.execution", true); HadoopLogsConfigs.setHadoopConfiguration(editConf); } else if (cmd.equals("cpjj")) { System.out.printf("Attempting to Copy jobs jar\n"); HadoopLogsConfigs.copyJobsJar(); System.out.printf("JobJar copied.\n"); } else if (cmd.equals("wbs")) { long totalWastedBytes = 0L; for (WastedBytesBlock wastedBlock : wastedBlocks) { totalWastedBytes += wastedBlock.size(); } System.out.printf("Total wasted bytes: %d\n", totalWastedBytes); } else if (cmd.equals("wb") && args.length >= 2) { int size = Integer.parseInt(args[1]); double startTime = Debug.getEpochSeconds(); wastedBlocks.add(new WastedBytesBlock(size)); double stopTime = Debug.getEpochSeconds(); double delta = stopTime - startTime; double rate = (double) size / delta; String fmt = "Took %f seconds to wast %d bytes at a rate of %s bytes persecond\n"; System.out.printf(fmt, delta, size, Debug.humanReadableBytes(rate)); } else if (cmd.equals("fb")) { wastedBlocks = new ArrayList<WastedBytesBlock>(); } else if (cmd.equals("classInfo") && args.length >= 2) { String className = args[1]; System.out.printf("Looking up classinfo for %s\n", className); String classInfo = Debug.classLoaderInfo(className); System.out.printf("Class Info:\n%s\n", classInfo); } else if (cmd.equals("lsin")) { String inputDir = HadoopLogsConfigs.getMapreduceInputPrefix(); String hourKey = (args.length >= 2) ? args[1] : null; String fileDisplay = listHourKeyFiles(hdfsUtils, inputDir, hourKey); System.out.printf("%s\n", fileDisplay); } else if (cmd.equals("lsout")) { List<String> pathComps = new ArrayList<String>(); pathComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); pathComps.add(LB_LOGS_SPLIT); String outputDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(pathComps)); String hourKey = (args.length >= 2) ? args[1] : null; String fileDisplay = listHourKeyFiles(hdfsUtils, outputDir, hourKey); System.out.printf("%s\n", fileDisplay); } else if (cmd.equals("rmout") && args.length > 1) { int nFiles = 0; int daysAgo = Integer.parseInt(args[1]); List<Long> hourKeysListL = new ArrayList<Long>(); List<String> dirComps = new ArrayList<String>(); String lbLogSplitDir = StaticFileUtils.mergePathString(HadoopLogsConfigs.getMapreduceOutputPrefix(), LB_LOGS_SPLIT); FileStatus[] dateDirsStats = hdfsUtils.getFileSystem().listStatus(new Path(lbLogSplitDir)); DateTime now = StaticDateTimeUtils.nowDateTime(true); Long daysAgoLong = StaticDateTimeUtils.dateTimeToHourLong( StaticDateTimeUtils.nowDateTime( true).minusDays(daysAgo)); for (FileStatus fileStatus : dateDirsStats) { Long hourLong; String pathStr; try { pathStr = pathTailString(fileStatus); hourLong = Long.parseLong(pathStr); if (hourLong < daysAgoLong) { hourKeysListL.add(hourLong); nFiles++; } } catch (Exception ex) { continue; } } Collections.sort(hourKeysListL); System.out.printf("Attempting to delete hours "); for (Long hourKey : hourKeysListL) { System.out.printf("%s ", hourKey); System.out.flush(); } System.out.printf(" Delete above Files(Y/N) %f days of data\n", nFiles / 24.0); if (CommonItestStatic.inputStream(stdin, "Y")) { System.out.printf("Deleting\n"); for (Long hourLong : hourKeysListL) { dirComps.clear(); dirComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); dirComps.add(LB_LOGS_SPLIT); dirComps.add(hourLong.toString()); String pathStr = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(dirComps)); System.out.printf("Delete %s = ", pathStr); System.out.flush(); boolean resp = fs.delete(new Path(pathStr), true); System.out.printf("%s\n", resp); } } else { System.out.printf("bailing out\n"); continue; } } else if (cmd.equals("countzipbytes") && args.length > 1) { List<String> zipDirComps = new ArrayList<String>(); ZipBytesCounter totalCounts = new ZipBytesCounter(); Map<String, ZipBytesCounter> counterMap = new HashMap<String, ZipBytesCounter>(); int daysAgo = Integer.parseInt(args[1]); List<Long> hourKeysListL = new ArrayList<Long>(); String lbLogSplitDir = StaticFileUtils.mergePathString(HadoopLogsConfigs.getMapreduceOutputPrefix(), LB_LOGS_SPLIT); FileStatus[] dateDirsStats = hdfsUtils.getFileSystem().listStatus(new Path(lbLogSplitDir)); DateTime now = StaticDateTimeUtils.nowDateTime(true); Long daysAgoLong = StaticDateTimeUtils.dateTimeToHourLong( StaticDateTimeUtils.nowDateTime( true).minusDays(daysAgo)); System.out.printf("Scanning zipfiles for hours no less then %d days continue (%d) files (Y/N)\n", daysAgo, daysAgoLong); if (CommonItestStatic.inputStream(stdin, "Y")) { System.out.printf("Scanning directories\n"); } else { System.out.printf("not scanning\n"); continue; } for (FileStatus fileStatus : dateDirsStats) { Long hourLong; String pathStr; try { pathStr = pathTailString(fileStatus); hourLong = Long.parseLong(pathStr); if (hourLong >= daysAgoLong) { hourKeysListL.add(hourLong); } } catch (Exception ex) { continue; } } Collections.sort(hourKeysListL); for (Long hourKey : hourKeysListL) { counterMap.clear(); System.out.printf("scanning zips for hour %d: ", hourKey); zipDirComps.clear(); zipDirComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); zipDirComps.add(LB_LOGS_SPLIT); zipDirComps.add(hourKey.toString()); String partsDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(zipDirComps)); zipDirComps.add("zips"); String zipDirStr = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(zipDirComps)); List<LogReducerOutputValue> zipInfo = hdfsUtils.getZipFileInfoList(partsDir); FileStatus[] zipStats = hdfsUtils.getFileSystem().listStatus(new Path(zipDirStr)); for (LogReducerOutputValue rout : zipInfo) { String logFile = rout.getLogFile(); if (!counterMap.containsKey(logFile)) { ZipBytesCounter zipCounter = new ZipBytesCounter(); zipCounter.setZipCount(1); counterMap.put(logFile, zipCounter); } ZipBytesCounter zipCounter = counterMap.get(logFile); zipCounter.setnLines(rout.getnLines()); zipCounter.setUncompressedBytes(rout.getFileSize()); Debug.nop(); } for (FileStatus zipStat : zipStats) { String logFile = zipStat.getPath().toUri().getPath(); if (!counterMap.containsKey(logFile)) { ZipBytesCounter zipCounter = new ZipBytesCounter(); zipCounter.setZipCount(1); counterMap.put(logFile, zipCounter); } ZipBytesCounter zipCounter = counterMap.get(logFile); zipCounter.setZipBytes(zipStat.getLen()); double ratio = (double) zipCounter.getZipBytes() / (double) zipCounter.getUncompressedBytes(); String zipCounterStr = "ratio:" + ratio + ":" + logFile + ":" + zipCounter.toString(); Debug.nop(); } ZipBytesCounter hourCount = ZipBytesCounter.countZips(counterMap); System.out.printf("%s\n", hourCount.toString()); totalCounts.incnLines(hourCount.getnLines()); totalCounts.incZipCount(hourCount.getZipCount()); totalCounts.incZipBytes(hourCount.getZipBytes()); totalCounts.incUncompressedBytes(hourCount.getUncompressedBytes()); Debug.nop(); } System.out.printf("Total zip bytes are %s\n", totalCounts.toString()); String fmt = "totalbytes in gigs is zipBytes = %f gigs uncompressedBytes = %f gigs\n"; double zipBytesGigs = totalCounts.getZipBytes() / GIGBYTES_DOUBLE; double zipUncompressedGigs = totalCounts.getUncompressedBytes() / GIGBYTES_DOUBLE; System.out.printf(fmt, zipBytesGigs, zipUncompressedGigs); } else if (cmd.equals("scanhdfszips")) { Map<String, String> kw = CommonItestStatic.argMapper(args); args = CommonItestStatic.stripKwArgs(args); List<Long> hourKeysListL = new ArrayList<Long>(); String lbLogSplitDir = StaticFileUtils.mergePathString(HadoopLogsConfigs.getMapreduceOutputPrefix(), LB_LOGS_SPLIT); FileStatus[] dateDirsStats = hdfsUtils.getFileSystem().listStatus(new Path(lbLogSplitDir)); for (FileStatus fileStatus : dateDirsStats) { Long hourLong; String pathStr; try { pathStr = pathTailString(fileStatus); hourLong = Long.parseLong(pathStr); } catch (Exception ex) { continue; } hourKeysListL.add(hourLong); } Collections.sort(hourKeysListL); DateTime startDt; if (args.length >= 2) { startDt = StaticDateTimeUtils.hourKeyToDateTime(args[1], false); } else { startDt = StaticDateTimeUtils.hourKeyToDateTime(hourKeysListL.get(0), false); } DateTime endDt; if (args.length >= 3) { endDt = StaticDateTimeUtils.hourKeyToDateTime(args[2], false); } else { endDt = StaticDateTimeUtils.hourKeyToDateTime(hourKeysListL.get(hourKeysListL.size() - 1), false); } DateTime curDt = new DateTime(startDt); String fmt = "Scanning for zips in date range (%d,%d)\n"; System.out.printf(fmt, StaticDateTimeUtils.dateTimeToHourLong(startDt), StaticDateTimeUtils.dateTimeToHourLong(endDt)); System.out.printf("Press Enter to continue\n"); stdin.readLine(); hourKeysListL = new ArrayList<Long>(); Map<String, HdfsZipDirScan> zipDirMap = new HashMap<String, HdfsZipDirScan>(); boolean scanParts = false; if (kw.containsKey("scanparts") && kw.get("scanparts").equalsIgnoreCase("true")) { scanParts = true; } while (true) { if (curDt.isAfter(endDt)) { break; } Long hourKeyL = StaticDateTimeUtils.dateTimeToHourLong(curDt); hourKeysListL.add(hourKeyL); curDt = curDt.plusHours(1); } Collections.sort(hourKeysListL); System.out.printf("scanning directorys:\n"); System.out.flush(); for (Long hourKeyL : hourKeysListL) { System.out.printf(" %d", hourKeyL); System.out.flush(); String key = hourKeyL.toString(); HdfsZipDirScan val = scanHdfsZipDirs(hdfsUtils, key, scanParts); zipDirMap.put(key, val); } System.out.printf("\n"); for (Long hourKey : hourKeysListL) { String key = hourKey.toString(); HdfsZipDirScan val = zipDirMap.get(key); System.out.printf("%s ", val.displayString()); if (scanParts) { Set<String> missingSet = new HashSet<String>(val.getZipsFound()); missingSet.removeAll(val.getZipsFound()); System.out.printf("found %s files in partitions but missing %d files", val.getZipsFound().size(), missingSet.size()); } if (!val.isDateDirFound() || !val.isZipDirFound()) { System.out.printf(" ******************\n"); } else { System.out.printf("\n"); } } } else if (cmd.equals("ullzo") && args.length >= 2) { String localLzoFilePath = StaticFileUtils.expandUser(args[1]); String localLzoFile = StaticFileUtils.pathTail(localLzoFilePath); Matcher m = HdfsUtils.hdfsLzoPatternPre.matcher(localLzoFile); if (!m.find()) { System.out.printf("%s doesn't look like a properly name lzo file", localLzoFilePath); continue; } String hourKey = m.group(1); // upload the lzo file List<String> hdfsLzoPathComps = new ArrayList<String>(); hdfsLzoPathComps.add(HadoopLogsConfigs.getMapreduceInputPrefix()); hdfsLzoPathComps.add(hourKey); hdfsLzoPathComps.add("0-" + hourKey + "-access_log.aggregated.lzo"); String hdfsLzoPath = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(hdfsLzoPathComps)); String hdfsLzoIdxPath = hdfsLzoPath + ".index"; // Verify the user wants to upload this file System.out.printf("Are you sure you want to upload %s to %s with index %s(Y/N)\n", localLzoFilePath, hdfsLzoPath, hdfsLzoIdxPath); if (CommonItestStatic.inputStream(stdin, "Y")) { System.out.printf("Uploading lzo\n"); } else { System.out.printf("Not uploading lzo\n"); continue; } Configuration codecConf = new Configuration(); codecConf.set("io.compression.codecs", "org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec"); codecConf.set("io.compression.codec.lzo.class", "com.hadoop.compression.lzo.LzoCodec"); LzopCodec codec = new LzopCodec(); codec.setConf(codecConf); System.out.printf("Uploading lzo %s to %s with idx file %s\n", localLzoFile, hdfsLzoPath, hdfsLzoIdxPath); InputStream lzoIs = StaticFileUtils.openInputFile(localLzoFilePath, BUFFER_SIZE); OutputStream lzoOs = hdfsUtils.openHdfsOutputFile(hdfsLzoPath, false, false); FSDataOutputStream lzoIdx = hdfsUtils.openHdfsOutputFile(hdfsLzoIdxPath, false, false); CompressionInputStream cis = codec.createInputStream(lzoIs); CompressionOutputStream cos = codec.createIndexedOutputStream(lzoOs, lzoIdx); StaticFileUtils.copyStreams(cis, cos, null, BUFFER_SIZE); cos.flush(); cos.finish(); StaticFileUtils.close(cis); StaticFileUtils.close(cos); StaticFileUtils.close(lzoIs); StaticFileUtils.close(lzoOs); StaticFileUtils.close(lzoIdx); } else if (cmd.equals("runSplit") && args.length >= 2) { HadoopLogsConfigs.copyJobsJar(); String hourKey = args[1]; // Setup Inputfile based on hourKey List<String> hdfsLzoPathComps = new ArrayList<String>(); hdfsLzoPathComps.add(HadoopLogsConfigs.getMapreduceInputPrefix()); hdfsLzoPathComps.add(hourKey); hdfsLzoPathComps.add("0-" + hourKey + "-access_log.aggregated.lzo"); String hdfsLzoPath = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(hdfsLzoPathComps)); // Setup outputdir List<String> outDirComps = new ArrayList<String>(); outDirComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); outDirComps.add(LB_LOGS_SPLIT); outDirComps.add(hourKey); String outDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(outDirComps)); List<String> logSplitArgs = new ArrayList<String>(); logSplitArgs.add(HadoopLogsConfigs.getHdfsJobsJarPath()); logSplitArgs.add(outDir); logSplitArgs.add(""); logSplitArgs.add(hourKey); logSplitArgs.add(HadoopLogsConfigs.getNumReducers()); logSplitArgs.add(HadoopLogsConfigs.getHdfsUserName()); logSplitArgs.add(hdfsLzoPath); HadoopJob hadoopClient = new HadoopLogSplitterJob(); System.out.printf("Calling HadoopLogSplitterJob with args:\n"); for (int i = 0; i < logSplitArgs.size(); i++) { System.out.printf(" arg[%d] = \"%s\"\n", i, logSplitArgs.get(i)); } hadoopClient.setConfiguration(HadoopLogsConfigs.getHadoopConfiguration()); timer.start(); timer.reset(); int errorCode = hadoopClient.run(logSplitArgs); // Actually runs the Hadoop Job timer.stop(); System.out.printf("Hadoop tun response code was %d in %f secs\n", errorCode, timer.readSeconds()); } else if (cmd.equals("getzip") && args.length > 1) { Map<String, String> kw = CommonItestStatic.argMapper(args); String lid = (kw.containsKey("l")) ? kw.get("l") : null; String hourKey = (kw.containsKey("h")) ? kw.get("h") : null; String downloadDir = args[1]; List<FileStatus> zipStatusList = hdfsUtils.listHdfsZipsStatus(hourKey, lid, false); System.out.printf("Attempting to fetch zipfiles\n"); for (FileStatus zipFileStatus : zipStatusList) { System.out.printf("%s\n", HdfsCliHelpers.displayFileStatus(zipFileStatus)); } System.out.printf("Are you sure you want to download the above files (Y/N)?"); if (CommonItestStatic.inputStream(stdin, "Y")) { for (FileStatus zipFileStatus : zipStatusList) { String hdfsZipFileStr = zipFileStatus.getPath().toUri().getRawPath(); String dstZipFileStr = StaticFileUtils.joinPath(downloadDir, StaticFileUtils.pathTail(hdfsZipFileStr)); System.out.printf("Downloading %s to %s\n", zipFileStatus.getPath().toUri().toString(), dstZipFileStr); InputStream is = hdfsUtils.openHdfsInputFile(zipFileStatus.getPath(), false); OutputStream os = StaticFileUtils.openOutputFile(dstZipFileStr, BUFFER_SIZE); StaticFileUtils.copyStreams(is, os, System.out, BUFFER_SIZE); is.close(); os.close(); } } } else if (cmd.equals("getlzo") && args.length > 2) { String downloadDir = args[1]; String dateHour = args[2]; System.out.printf("Searching for lzo files matching %s\n", dateHour); List<FileStatus> lzoFileStatusList = hdfsUtils.listHdfsLzoStatus(dateHour); System.out.printf("Attempting to download lzos\n"); for (FileStatus lzoFileStatus : lzoFileStatusList) { System.out.printf("%s\n", HdfsCliHelpers.displayFileStatus(lzoFileStatus)); } System.out.printf("Are you sure you want to download the lzo files above?(Y/N)?"); if (CommonItestStatic.inputStream(stdin, "Y")) { for (FileStatus lzoFileStatus : lzoFileStatusList) { String srcLzoFileStr = StaticFileUtils.pathTail(lzoFileStatus.getPath().toUri().getRawPath()); Matcher m = HdfsUtils.hdfsLzoPattern.matcher(srcLzoFileStr); if (!m.find()) { System.out.printf("Error srcFile %s didn't match expected LZO file name"); continue; } String dstFileName = m.group(1) + "-access_log.aggregated.lzo"; String dstFilePath = StaticFileUtils.joinPath(downloadDir, dstFileName); System.out.printf("Downloading %s to %s\n", lzoFileStatus.getPath().toUri().toString(), dstFilePath); InputStream is = hdfsUtils.openHdfsInputFile(lzoFileStatus.getPath(), false); OutputStream os = StaticFileUtils.openOutputFile(dstFilePath, BUFFER_SIZE); StaticFileUtils.copyStreams(is, os, System.out, BUFFER_SIZE); is.close(); os.close(); } } } else if (cmd.equals("showConfig")) { System.out.printf("HadoopLogsConfig=%s\n", HadoopLogsConfigs.staticToString()); System.out.printf("Hdfs workingDir = %s\n", fs.getWorkingDirectory().toUri().getRawPath().toString()); System.out.printf("Local workingDir = %s\n", lfs.getWorkingDirectory()); } else if (cmd.equals("recompressIndex") && args.length >= 3) { String srcLzo = StaticFileUtils.expandUser(args[1]); String dstLzo = args[2]; String dstIdx = dstLzo + ".index"; FileInputStream lzoInputStream = new FileInputStream(srcLzo); FSDataOutputStream dstLzoStream = hdfsUtils.openHdfsOutputFile(dstLzo, false, true); FSDataOutputStream dstIdxStream = hdfsUtils.openHdfsOutputFile(dstIdx, false, true); hdfsUtils.recompressAndIndexLzoStream(lzoInputStream, dstLzoStream, dstIdxStream, null); System.out.printf("Recompressed and sent\n"); lzoInputStream.close(); dstLzoStream.close(); dstIdxStream.close(); } else if (cmd.equals("whoami")) { System.out.printf("your supposed to be %s\n", user); } else if (cmd.equals("chuser") && args.length >= 2) { user = args[1]; fs = FileSystem.get(defaultHdfsUri, conf, user); System.setProperty(CommonItestStatic.HDUNAME, user); System.out.printf("Switched to user %s\n", user); } else if (cmd.equals("mem")) { System.out.printf("Memory\n=================================\n%s\n", Debug.showMem()); } else if (cmd.equals("runJob") && args.length >= 2) { Class<? extends HadoopJob> jobDriverClass; String jobDriverClassName = "org.openstack.atlas.logs.hadoop.jobs." + args[1]; if (jobClassLoader == null) { System.out.printf("No jobJar set cannot load class searching class Path\n"); jobDriverClass = (Class<? extends HadoopJob>) Class.forName(jobDriverClassName); } else { jobDriverClass = (Class<? extends HadoopJob>) Class.forName(jobDriverClassName, true, jobClassLoader); } HadoopJob jobDriver = jobDriverClass.newInstance(); jobDriver.setConfiguration(conf); List<String> argsList = new ArrayList<String>(); for (int i = 2; i < args.length; i++) { argsList.add(args[i]); } // Run job double startTime = Debug.getEpochSeconds(); int exitCode = jobDriver.run(argsList); //jobDriver.run(jobArgs); double endTime = Debug.getEpochSeconds(); System.out.printf("took %f seconds running job %s\n", endTime - startTime, jobDriverClassName); System.out.printf("Exit status = %d\n", exitCode); } else if (cmd.equals("runMain") && args.length >= 2) { String className = args[1]; String[] mainArgs = new String[args.length - 2]; System.out.printf("Running %s\n", className); for (int i = 0; i < args.length - 2; i++) { mainArgs[i] = args[i + 2]; } Class mainClass = Class.forName(args[1]); Method mainMethod = mainClass.getDeclaredMethod("main", String[].class); mainMethod.invoke(null, (Object) mainArgs); } else if (cmd.equals("gc")) { System.out.printf("Calling garbage collector\n"); Debug.gc(); } else if (cmd.equals("lszip")) { Map<String, String> kw = CommonItestStatic.argMapper(args); String dateHour = (kw.containsKey("h")) ? kw.get("h") : null; String lid = (kw.containsKey("l")) ? kw.get("l") : null; System.out.printf("Scanning for zips on hour[%s] lid[%s]\n", dateHour, lid); boolean onlyMissing = (kw.containsKey("m")) ? true : false; List<FileStatus> zipStatusList = hdfsUtils.listHdfsZipsStatus(dateHour, lid, onlyMissing); for (FileStatus zipStatus : zipStatusList) { System.out.printf("%s\n", HdfsCliHelpers.displayFileStatus(zipStatus)); } } else if (cmd.equals("lslzo")) { String hourKey = (args.length >= 2) ? args[1] : null; System.out.printf("Scanning lzos for hour[%s]\n", hourKey); List<FileStatus> lzoFiles = hdfsUtils.listHdfsLzoStatus(hourKey); for (FileStatus lzoFileStatus : lzoFiles) { System.out.printf("%s\n", HdfsCliHelpers.displayFileStatus(lzoFileStatus)); } } else if (cmd.equals("ls")) { long total_file_size = 0; long total_repl_size = 0; Path path = (args.length >= 2) ? new Path(args[1]) : fs.getWorkingDirectory(); FileStatus[] fileStatusList = fs.listStatus(path); if (fileStatusList == null) { System.out.printf("Error got null when trying to retrieve file statuses\n"); } for (FileStatus fileStatus : fileStatusList) { total_file_size += fileStatus.getLen(); total_repl_size += fileStatus.getLen() * fileStatus.getReplication(); System.out.printf("%s\n", HdfsCliHelpers.displayFileStatus(fileStatus)); } System.out.printf("Total file bytes: %s\n", Debug.humanReadableBytes(total_file_size)); System.out.printf("Total file bytes including replication: %s\n", Debug.humanReadableBytes(total_repl_size)); System.out.printf("Total file count: %d\n", fileStatusList.length); } else if (cmd.equals("rmin") && args.length > 1) { int daysAgo = Integer.parseInt(args[1]); List<Long> hourDirs = new ArrayList<Long>(); int nFiles = 0; FileStatus[] stats = hdfsUtils.getFileSystem().listStatus(new Path(HadoopLogsConfigs.getMapreduceInputPrefix())); Long ninetyDaysAgoLong = StaticDateTimeUtils.dateTimeToHourLong( StaticDateTimeUtils.nowDateTime( true).minusDays(daysAgo)); for (FileStatus stat : stats) { try { String pathStr = pathTailString(stat); long hourLong = Long.parseLong(pathStr); if (hourLong < ninetyDaysAgoLong) { nFiles++; hourDirs.add(hourLong); } } catch (Exception ex) { continue; } } Collections.sort(hourDirs); System.out.printf("attempting to delete input files "); for (Long hourDir : hourDirs) { System.out.printf(" %s ", hourDir); System.out.flush(); } System.out.printf(" Delete above Files(Y/N) %f days worth\n", nFiles / 24.0); if (CommonItestStatic.inputStream(stdin, "Y")) { System.out.printf("Deleting\n"); for (Long hourLong : hourDirs) { String pathStr = StaticFileUtils.joinPath(HadoopLogsConfigs.getMapreduceInputPrefix(), hourLong.toString()); System.out.printf("Delete %s = ", pathStr); System.out.flush(); boolean resp = fs.delete(new Path(pathStr), true); System.out.printf("%s\n", resp); } } else { System.out.printf("bailing out\n"); continue; } } else if (cmd.equals("countlzobytes") && args.length > 1) { long totalLzoBytes = 0L; long dirBytes = 0L; int nFiles = 0; int daysAgo = Integer.parseInt(args[1]); List<Long> hourDirs = new ArrayList<Long>(); FileStatus[] stats = hdfsUtils.getFileSystem().listStatus(new Path(HadoopLogsConfigs.getMapreduceInputPrefix())); Long ninetyDaysAgoLong = StaticDateTimeUtils.dateTimeToHourLong( StaticDateTimeUtils.nowDateTime( true).minusDays(daysAgo)); System.out.printf("Scanning lzofiles for hours no less then %d days continue (%d) files (Y/N)\n", daysAgo, ninetyDaysAgoLong); for (FileStatus stat : stats) { try { String pathStr = pathTailString(stat); long hourLong = Long.parseLong(pathStr); if (hourLong >= ninetyDaysAgoLong) { hourDirs.add(hourLong); System.out.flush(); } } catch (Exception ex) { continue; } } Collections.sort(hourDirs); for (Long hourLong : hourDirs) { String pathStr = StaticFileUtils.joinPath(HadoopLogsConfigs.getMapreduceInputPrefix(), hourLong.toString()); System.out.printf("Scanning hour %d: ", hourLong); FileStatus[] lzoStats = hdfsUtils.listStatuses(pathStr, false); dirBytes = 0L; for (FileStatus fileStat : lzoStats) { String fileName = fileStat.getPath().toUri().toString(); nFiles++; dirBytes += fileStat.getLen(); } System.out.printf("%d\n", dirBytes); totalLzoBytes += dirBytes; } System.out.printf("\n"); System.out.printf("counted %d bytes in %d files\n", totalLzoBytes, nFiles); double gigs = (double) totalLzoBytes / GIGBYTES_DOUBLE; System.out.printf("or %f GigaBytes\n", gigs); } else if (cmd.equals("dlzip") && args.length >= 2) { Map<String, String> kw = CommonItestStatic.argMapper(args); args = CommonItestStatic.stripKwArgs(args); Integer lid = (kw.containsKey("l")) ? Integer.valueOf(kw.get("l")) : null; Integer aid = (kw.containsKey("a")) ? Integer.valueOf(kw.get("a")) : null; List<String> pathComps = new ArrayList<String>(); pathComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); pathComps.add(LB_LOGS_SPLIT); String logSplitDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(pathComps)); List<String> hourKeys = new ArrayList<String>(); if (args.length >= 3) { // Add all hours in range of startHour and endHour for zip scan. long startHour = Long.parseLong(args[1]); long endHour = Long.parseLong(args[2]); FileStatus[] stats = hdfsUtils.getFileSystem().listStatus(new Path(logSplitDir)); for (FileStatus stat : stats) { String tail = pathTailString(stat.getPath()); if (!stat.isDir()) { continue; // If its a plain file don't count this one } try { long currHour = Long.parseLong(tail); if (currHour >= startHour && currHour <= endHour) { hourKeys.add(tail); } } catch (NumberFormatException ex) { continue; // This is not an hour } } } else { hourKeys.add(args[1]); // Only scan for this one hour } Collections.sort(hourKeys); List<ZipSrcDstFile> transferFiles = new ArrayList<ZipSrcDstFile>(); for (String hourKey : hourKeys) { String reducerOutputDir = StaticFileUtils.mergePathString(HadoopLogsConfigs.getMapreduceOutputPrefix(), LB_LOGS_SPLIT, hourKey); List<LogReducerOutputValue> reducerOutputList = hdfsUtils.getZipFileInfoList(reducerOutputDir); List<LogReducerOutputValue> filteredZipFileInfo = hdfsUtils.filterZipFileInfoList(reducerOutputList, aid, lid); for (LogReducerOutputValue val : filteredZipFileInfo) { ZipSrcDstFile transferFile = new ZipSrcDstFile(); transferFile.setSrcFile(val.getLogFile()); transferFile.setDstFile(zipFilePath(hourKey, val.getAccountId(), val.getLoadbalancerId())); transferFile.setHourKey(hourKey); transferFile.setAccountId(val.getAccountId()); transferFile.setLoadbalancerId(val.getLoadbalancerId()); transferFiles.add(transferFile); } } Collections.sort(transferFiles, new ZipSrcDstFileComparator()); for (ZipSrcDstFile transferFile : transferFiles) { System.out.printf("%s AccountId=%d LoadbalancerId=%d\n", transferFile.toString(), transferFile.getAccountId(), transferFile.getLoadbalancerId()); } System.out.printf("Are you sure you want to download the above zip files (Y/N)\n"); if (CommonItestStatic.inputStream(stdin, "Y")) { for (ZipSrcDstFile transferFile : transferFiles) { String srcFile = transferFile.getSrcFile(); String dstFile = transferFile.getDstFile(); System.out.printf("Transfering %s -> %s\n", srcFile, dstFile); InputStream is = hdfsUtils.openHdfsInputFile(srcFile, false); OutputStream os = hdfsUtils.openHdfsOutputFile(dstFile, true, true); StaticFileUtils.copyStreams(is, os, System.out, BUFFER_SIZE); is.close(); os.close(); } } } else if (cmd.equals("lsr")) { long total_file_size = 0; long total_repl_size = 0; String mntPath = (args.length >= 2) ? args[1] : fs.getWorkingDirectory().toUri().getRawPath(); double startTime = Debug.getEpochSeconds(); List<FileStatus> fileStatusList = hdfsUtils.listFileStatusRecursively(mntPath, false); for (FileStatus fileStatus : fileStatusList) { total_file_size += fileStatus.getLen(); total_repl_size += fileStatus.getLen() * fileStatus.getReplication(); System.out.printf("%s\n", HdfsCliHelpers.displayFileStatus(fileStatus)); } System.out.printf("Total file bytes: %s\n", Debug.humanReadableBytes(total_file_size)); System.out.printf("Total file bytes including replication: %s\n", Debug.humanReadableBytes(total_repl_size)); System.out.printf("Total file count: %d\n", fileStatusList.size()); double endTime = Debug.getEpochSeconds(); double delay = endTime - startTime; System.out.printf("Took %f Seconds to scan\n", delay); } else if (cmd.equals("exit")) { break; } else if (cmd.equals("cd") && args.length >= 2) { Path path = new Path(args[1]); fs.setWorkingDirectory(path); } else if (cmd.equals("cdin")) { List<String> pathComps = new ArrayList<String>(); pathComps.add(HadoopLogsConfigs.getMapreduceInputPrefix()); if (args.length >= 2) { pathComps.add(args[1]); } String pathStr = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(pathComps)); System.out.printf("Changing directory to %s\n", pathStr); fs.setWorkingDirectory(new Path(pathStr)); } else if (cmd.equals("cdout")) { List<String> pathComps = new ArrayList<String>(); pathComps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); pathComps.add(LB_LOGS_SPLIT); if (args.length >= 2) { pathComps.add(args[1]); } String pathStr = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(pathComps)); System.out.printf("Changing directory to %s\n", pathStr); fs.setWorkingDirectory(new Path(pathStr)); } else if (cmd.equals("pwd")) { System.out.printf("%s\n", fs.getWorkingDirectory().toUri().toString()); } else if (cmd.equals("cat") && args.length >= 2) { String pathStr = args[1]; Path filePath = new Path(pathStr); FSDataInputStream is = fs.open(filePath); StaticFileUtils.copyStreams(is, System.out, null, PAGESIZE); is.close(); } else if (cmd.equals("chmod") && args.length >= 3) { String octMal = args[1]; Path path = new Path(args[2]); short oct = (short) Integer.parseInt(octMal, 8); fs.setPermission(path, new FsPermission(oct)); System.out.printf("Setting permisions on file %s\n", path.toUri().toString()); } else if (cmd.equals("chown") && args.length >= 4) { String fUser = args[1]; String fGroup = args[2]; String fPath = args[3]; fs.setOwner(new Path(fPath), fUser, fGroup); System.out.printf("Setting owner of %s to %s:%s\n", fPath, fUser, fGroup); } else if (cmd.equals("mkdir") && args.length >= 2) { String fPath = args[1]; boolean resp = fs.mkdirs(new Path(fPath)); System.out.printf("mkdir %s = %s\n", fPath, resp); } else if (cmd.equals("rm") && args.length >= 2) { String fPath = args[1]; boolean resp = fs.delete(new Path(fPath), false); System.out.printf("rm %s = %s\n", fPath, resp); } else if (cmd.equals("rmdir") && args.length >= 2) { String fPath = args[1]; boolean resp = fs.delete(new Path(fPath), true); System.out.printf("rmdir %s = %s\n", fPath, resp); } else if (cmd.equals("homedir")) { System.out.printf("%s\n", fs.getHomeDirectory().toUri().toString()); } else if (cmd.equals("cpld") && args.length >= 3) { String inDirName = StaticFileUtils.expandUser(args[1]); String outDir = args[2]; short nReplications = (args.length > 3) ? (short) Integer.parseInt(args[3]) : fs.getDefaultReplication(); long blockSize = (args.length > 4) ? Long.parseLong(args[4]) : fs.getDefaultBlockSize(); File inDir = new File(inDirName); File[] files = inDir.listFiles(); for (int i = 0; i < files.length; i++) { File file = files[i]; String inFileName = file.getName(); String fullInPath = inDirName + "/" + inFileName; if (file.isDirectory() || !file.isFile()) { System.out.printf("Skipping %s since its not a file\n", fullInPath); continue; } String fullOutPath = String.format("%s/%s", outDir, inFileName); System.out.printf("Copying %s to %s\n", fullInPath, fullOutPath); long fSize = new File(fullInPath).length(); InputStream is = StaticFileUtils.openDataInputStreamFile(fullInPath); FSDataOutputStream os = fs.create(new Path(fullOutPath), true, HDFSBUFFSIZE, nReplications, blockSize); StaticFileUtils.copyStreams(is, os, System.out, fSize, ONEMEG); System.out.printf("Finished with file\n"); is.close(); os.close(); continue; } } else if (cmd.equals("cpfl") && args.length >= 3) { String outPathStr = args[2]; String inPathStr = args[1]; long fSize = new File(StaticFileUtils.expandUser(inPathStr)).length(); InputStream is = StaticFileUtils.openInputFile(inPathStr); FSDataOutputStream os; short nReplications = (args.length > 3) ? (short) Integer.parseInt(args[3]) : fs.getDefaultReplication(); long blockSize = (args.length > 4) ? Long.parseLong(args[4]) : fs.getDefaultBlockSize(); System.out.printf("Copying with %d replications and blocksize of %d\n", nReplications, blockSize); os = fs.create(new Path(outPathStr), true, HDFSBUFFSIZE, nReplications, blockSize); StaticFileUtils.copyStreams(is, os, System.out, fSize, ONEMEG); System.out.printf("copyed %s -> %s\n", inPathStr, outPathStr); is.close(); os.close(); } else if (cmd.equals("cptl") && args.length >= 3) { FSDataInputStream is = fs.open(new Path(args[1]), HDFSBUFFSIZE); OutputStream os = StaticFileUtils.openOutputFile(args[2]); StaticFileUtils.copyStreams(is, os, System.out, HDFSBUFFSIZE); is.close(); os.close(); } else if (cmd.equals("findCp")) { if (args.length >= 2) { String className = args[1]; String classPath = Debug.findClassPath(className, jobClassLoader); System.out.printf("%s classpath = %s\n", className, classPath); continue; } String classPath = System.getProperties().getProperty("java.class.path"); System.out.printf("classpath = %s\n", classPath); } else if (cmd.equals("setJobJar") && args.length >= 2) { String jarName = StaticFileUtils.expandUser(args[1]); if (jobClassLoader != null) { System.out.printf("jobJar already set to %s\n", jobJarName); continue; } File jarFile = new File(jarName).getAbsoluteFile(); if (!jarFile.canRead()) { System.out.printf("Can't read file %s\n", jarFile.getAbsolutePath()); continue; } URL jarUrl = jarFile.toURI().toURL(); jobClassLoader = new URLClassLoader(new URL[]{jarUrl}, HdfsCli.class.getClassLoader()); System.out.printf("Loaded %s as jobJar\n", jarName); } else if (cmd.equals("showCl") && args.length >= 2) { String className = args[1]; if (jobClassLoader == null) { System.out.printf("jobJar not yet set\n"); } Class classIn = Class.forName(className, true, jobClassLoader); String classLoaderInfo = Debug.classLoaderInfo(className); System.out.printf("%s\n", classLoaderInfo); } else if (cmd.equals("countLines") && args.length >= 3) { String fileName = args[1]; int nTicks = Integer.valueOf(args[2]); int buffSize = (args.length > 3) ? Integer.valueOf(args[3]) : PAGESIZE * 4; System.out.printf("Counting the lines from file %s with %d ticks", fileName, nTicks); double startTime = Debug.getEpochSeconds(); long nLines = HdfsCliHelpers.countLines(fileName, nTicks, buffSize); double endTime = Debug.getEpochSeconds(); System.out.printf("Took %f seconds to count %d lines\n", endTime - startTime, nLines); } else if (cmd.equals("compressLzo") && args.length >= 3) { String srcFileName = args[1]; String dstFileName = args[2]; int buffsize = (args.length >= 5) ? Integer.parseInt(args[4]) : 4096; InputStream fis = StaticFileUtils.openInputFile(srcFileName); OutputStream fos = StaticFileUtils.openOutputFile(dstFileName); System.out.printf("Attempting to compress %s to file %s\n", srcFileName, dstFileName); LzopCodec codec = new LzopCodec(); codec.setConf(conf); CompressionOutputStream cos = codec.createOutputStream(fos); double startTime = Debug.getEpochSeconds(); StaticFileUtils.copyStreams(fis, cos, System.out, 1024 * 1024 * 64); double endTime = Debug.getEpochSeconds(); System.out.printf("Compression took %f seconds\n", endTime - startTime); fis.close(); cos.finish(); cos.close(); fos.close(); } else if (cmd.equals("indexLzo") && args.length >= 2) { String srcFileName = args[1]; Path filePath = new Path(StaticFileUtils.expandUser(srcFileName)); System.out.printf("Indexing file %s\n", srcFileName); double startTime = Debug.getEpochSeconds(); LzoIndex.createIndex(lfs, filePath); double endTime = Debug.getEpochSeconds(); System.out.printf("Took %f seconds to index file %s\n", endTime - startTime, srcFileName); } else if (cmd.equals("printReducers") && args.length >= 2) { String sequenceDirectory = args[1]; List<LogReducerOutputValue> zipFileInfoList = hdfsUtils.getZipFileInfoList(sequenceDirectory); int totalEntryCount = zipFileInfoList.size(); int entryNum = 0; for (LogReducerOutputValue zipFileInfo : zipFileInfoList) { System.out.printf("zipFile[%d]=%s\n", entryNum, zipFileInfo.toString()); entryNum++; } System.out.printf("Total entries = %d\n", totalEntryCount); } else if (cmd.equals("scanLines") && args.length >= 3) { String fileName = args[1]; int nLines = Integer.parseInt(args[2]); int nTicks = Integer.parseInt(args[3]); BufferedReader r = new BufferedReader(new FileReader(StaticFileUtils.expandUser(fileName)), HDFSBUFFSIZE); int badLines = 0; int goodLines = 0; int lineCounter = 0; int totalLines = 0; int totalGoodLines = 0; int totalBadLines = 0; LogMapperOutputValue logValue = new LogMapperOutputValue(); double startTime = StaticDateTimeUtils.getEpochSeconds(); for (int i = 0; i < nLines; i++) { String line = r.readLine(); if (line == null) { break; // End of file } try { LogChopper.getLogLineValues(line, logValue); goodLines++; totalGoodLines++; } catch (Exception ex) { badLines++; totalBadLines++; String excMsg = Debug.getEST(ex); } lineCounter++; totalLines++; if (i % nTicks == 0) { double stopTime = StaticDateTimeUtils.getEpochSeconds(); double lps = (double) lineCounter / (stopTime - startTime); System.out.printf("read %d lines goodlines=%d badlines=%d secs = %f linespersecond=%f\n", lineCounter, goodLines, badLines, stopTime - startTime, lps); startTime = stopTime; lineCounter = 0; goodLines = 0; badLines = 0; } } System.out.printf("Good=%d badLines=%d total = %d\n", totalGoodLines, totalBadLines, totalLines); r.close(); } else if (cmd.equals("scanLinesLzo") && args.length >= 3) { String fileName = args[1]; int nLines = Integer.parseInt(args[2]); int nTicks = Integer.parseInt(args[3]); CompressionInputStream cis = hdfsUtils.openLzoDecompressionStream(fileName); BufferedReader r = new BufferedReader(new InputStreamReader(cis), LARGEBUFFERSIZE); int badLines = 0; int goodLines = 0; int lineCounter = 0; int totalLines = 0; int totalGoodLines = 0; int totalBadLines = 0; LogMapperOutputValue logValue = new LogMapperOutputValue(); double startTime = StaticDateTimeUtils.getEpochSeconds(); for (int i = 0; i < nLines; i++) { String line = r.readLine(); if (line == null) { break; // End of file } try { LogChopper.getLogLineValues(line, logValue); goodLines++; totalGoodLines++; } catch (Exception ex) { badLines++; totalBadLines++; String excMsg = Debug.getEST(ex); } lineCounter++; totalLines++; if (i % nTicks == 0) { double stopTime = StaticDateTimeUtils.getEpochSeconds(); double lps = (double) lineCounter / (stopTime - startTime); System.out.printf("read %d lines goodlines=%d badlines=%d secs = %f linespersecond=%f\n", lineCounter, goodLines, badLines, stopTime - startTime, lps); startTime = stopTime; lineCounter = 0; goodLines = 0; badLines = 0; } } System.out.printf("Good=%d badLines=%d total = %d\n", totalGoodLines, totalBadLines, totalLines); r.close(); } else if (cmd.equals("showCrc") && args.length >= 2) { String fileName = StaticFileUtils.expandUser(args[1]); BufferedInputStream is = new BufferedInputStream(new FileInputStream(fileName), BUFFER_SIZE); long crc = StaticFileUtils.computeCrc(is); System.out.printf("crc(%s)=%d\n", fileName, crc); is.close(); } else if (cmd.equals("du")) { long used = fs.getUsed(); System.out.printf("Used bytes: %s\n", Debug.humanReadableBytes(used)); } else if (cmd.equals("setReplCount") && args.length >= 3) { String fileName = args[1]; Path filePath = new Path(fileName); short replCount = Short.parseShort(args[2]); System.out.printf("Setting Replication count for file %s to %d\n", fileName, replCount); fs.setReplication(filePath, replCount); } else if (cmd.equals("dumpConfig") && args.length >= 2) { System.out.printf("Dumping configs\n"); BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(new File(StaticFileUtils.expandUser(args[1]))), HDFSBUFFSIZE); Configuration dumpConf = new Configuration(); for (int i = 2; i < args.length; i++) { dumpConf.addResource(new Path(StaticFileUtils.expandUser(args[i]))); } dumpConf.writeXml(bos); bos.close(); dumpConf.writeXml(System.out); } else if (cmd.equals("lineIndex") && args.length >= 2) { String inFileName = StaticFileUtils.expandUser(args[1]); String outFileName = inFileName + ".idx"; InputStream is = StaticFileUtils.openInputFile(inFileName); DataOutputStream os = StaticFileUtils.openDataOutputStreamFile(outFileName); System.out.printf("Indexling file %s -> %s\n", inFileName, outFileName); HdfsCliHelpers.indexFile(is, os, PAGESIZE * 8); is.close(); os.close(); } else if (cmd.equals("rebasePath") && args.length >= 4) { String srcBase = args[1]; String srcPath = args[2]; String dstPath = args[3]; System.out.printf("calling StaticFileUtils.rebasePath(%s,%s,%s)=", srcBase, srcPath, dstPath); System.out.flush(); String rebasedPath = StaticFileUtils.rebaseSplitPath(srcBase, srcPath, dstPath); System.out.printf("%s\n", rebasedPath); } else if (cmd.equals("joinPath") && args.length >= 1) { List<String> pathComps = new ArrayList<String>(); for (int i = 1; i < args.length; i++) { pathComps.add(args[i]); } List<String> joinedPathList = StaticFileUtils.joinPath(pathComps); String joinPathString = StaticFileUtils.splitPathToString(joinedPathList); System.out.printf("joinedPath = %s\n", joinPathString); } else { System.out.printf("Unrecognized command\n"); } } catch (Exception ex) { System.out.printf("Exception: %s\n", Debug.getExtendedStackTrace(ex)); } } System.out.printf("Exiting\n"); } public static String chop(String line) { return line.replace("\r", "").replace("\n", ""); } public static String zipFilePath(String dateHour, int accountId, int loadbalancerId) { List<String> pathComps = new ArrayList<String>(); if (accountId < 0 || loadbalancerId < 0) { pathComps.add(HadoopLogsConfigs.getCacheDir()); pathComps.add("unknown"); pathComps.add("unknown_" + dateHour + ".zip"); } else { pathComps.add(HadoopLogsConfigs.getCacheDir()); pathComps.add(dateHour); pathComps.add(Integer.toString(accountId)); pathComps.add("access_log_" + Integer.toString(loadbalancerId) + "_" + dateHour + ".zip"); } return StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(pathComps)); } public static String listHourKeyFiles(HdfsUtils hdfsUtils, String remoteDir, String hourKeyPrefix) throws IOException { StringBuilder sb = new StringBuilder(); FileStatus[] fileStatusArray = hdfsUtils.listStatuses(remoteDir, false); List<FileStatus> fileStatusList = new ArrayList<FileStatus>(Arrays.asList(fileStatusArray)); Collections.sort(fileStatusList, new FileStatusDateComparator()); for (FileStatus fileStatus : fileStatusList) { String tail = StaticFileUtils.pathTail(fileStatus.getPath().toUri().getRawPath().toString()); if (hourKeyPrefix != null && !tail.startsWith(hourKeyPrefix)) { continue; } sb.append(tail).append(HdfsCliHelpers.displayFileStatus(fileStatus)).append("\n"); } return sb.toString(); } public static HdfsZipDirScan scanHdfsZipDirs(HdfsUtils hdfsUtils, String hourKey, boolean scanParts) { Matcher zipMatch = zipPattern.matcher(""); HdfsZipDirScan scan = new HdfsZipDirScan(); scan.setHourKey(hourKey); List<String> comps = new ArrayList<String>(); comps.add(HadoopLogsConfigs.getMapreduceOutputPrefix()); comps.add(LB_LOGS_SPLIT); comps.add(hourKey); String partsDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(comps)); comps.add("zips"); String zipDir = StaticFileUtils.splitPathToString(StaticFileUtils.joinPath(comps)); List<LogReducerOutputValue> zipInfoList; if (scanParts) { try { zipInfoList = hdfsUtils.getZipFileInfoList(partsDir); scan.setPartionFilesFound(true); } catch (SequenceFileReaderException ex) { zipInfoList = null; } if (zipInfoList != null) { for (LogReducerOutputValue zipInfo : zipInfoList) { scan.getPartZipsFound().add(StaticFileUtils.pathTail(zipInfo.getLogFile())); scan.incPartZipCount(1); scan.incZipByteCount(zipInfo.getFileSize()); } } } FileStatus[] fileStatuses; try { fileStatuses = hdfsUtils.getFileSystem().listStatus(new Path(zipDir)); if (fileStatuses != null) { scan.setDateDirFound(true); scan.setZipDirFound(true); for (FileStatus fileStatus : fileStatuses) { String zipFileName = StaticFileUtils.pathTail(HdfsUtils.rawPath(fileStatus)); zipMatch.reset(zipFileName); if (zipMatch.find()) { scan.incZipCount(1); scan.getZipsFound().add(zipFileName); } } } } catch (IOException ex) { fileStatuses = null; } return scan; } public static double nowDateSecs() { return (double) System.currentTimeMillis() * MILLISECOND_COEF; } public static String pathTailString(Path path) { return StaticFileUtils.pathTail(path.toUri().getRawPath()); } public static String pathTailString(FileStatus fileStatus) { return pathTailString(fileStatus.getPath()); } }