package edu.umd.hooka;
import java.io.File;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.OutputStream;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.StringTokenizer;
import java.io.IOException;
import java.util.NoSuchElementException;
import java.util.Collection;
public class ProfileLogParser {
static final int JOB_START = 0;
static final int MAP_START = 1;
static final int MAP_FINISH = 2;
static final int REDUCE_START = 3;
static final int REDUCE_FINISH = 4;
static final int JOB_FINISH = 5;
static final int INVALID = 6;
private class nodeData
{
long mapStart;
long mapFinish;
long reduceStart;
long reduceFinish;
}
private class itemData
{
String identifier;
int itemType;
long timestamp;
public itemData(String data)
{
StringTokenizer tokens = new StringTokenizer(data);
String currToken = "";
try{
while(!currToken.endsWith(":"))
currToken = tokens.nextToken();
//Item type
currToken = tokens.nextToken();
if (currToken.equals("JOB_START")) itemType = JOB_START;
else if(currToken.equals("MAP_START")) itemType = MAP_START;
else if (currToken.equals("MAP_FINISH")) itemType = MAP_FINISH;
else if (currToken.equals("REDUCE_START")) itemType = REDUCE_START;
else if (currToken.equals("REDUCE_FINISH")) itemType = REDUCE_FINISH;
else if (currToken.equals("JOB_FINISH")) itemType = JOB_FINISH;
else itemType = INVALID;
//Identifier
currToken = tokens.nextToken();
identifier = currToken;
//timestamp
currToken = tokens.nextToken();
timestamp = Long.parseLong(currToken);
}
catch(NoSuchElementException e)
{
itemType = INVALID;
}
catch(NumberFormatException e)
{
itemType = INVALID;
}
}
}
public void Parse(Collection<File> theFiles, long startTime, long finishTime, OutputStream output) throws IOException
{
BufferedReader inputReader = null;
BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(output));
HashMap<String, nodeData> dataHashMap = new HashMap<String, nodeData>();
itemData currData = null;
nodeData currNodeData = null;
long jobStartTime = startTime;
long jobFinishTime = finishTime;
String currLine = null;
for(File currFile : theFiles)
{
try{
inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(currFile)));
}
catch(FileNotFoundException e)
{
System.err.println(e.getMessage());
e.printStackTrace();
System.err.println("Input file not found");
return;
}
currLine = inputReader.readLine();
while(currLine != null)
{
currData = new itemData(currLine);
if(currData.itemType == INVALID)
{
currLine = inputReader.readLine();
continue;
}
if(currData.itemType == JOB_START)
jobStartTime = currData.timestamp;
else if (currData.itemType == JOB_FINISH)
jobFinishTime = currData.timestamp;
else
{
if(!(dataHashMap.containsKey(currData.identifier)))
dataHashMap.put(currData.identifier, new nodeData());
currNodeData = dataHashMap.get(currData.identifier);
switch(currData.itemType)
{
case MAP_START: currNodeData.mapStart = currData.timestamp;
break;
case MAP_FINISH: currNodeData.mapFinish = currData.timestamp;
break;
case REDUCE_START: currNodeData.reduceStart = currData.timestamp;
break;
case REDUCE_FINISH: currNodeData.reduceFinish = currData.timestamp;
break;
}
}
currLine = inputReader.readLine();
}
}
//Output the information
int numNodes = 0;
int failedReports = 0;
long timeTaken = jobFinishTime - jobStartTime;
long preMap = 0L;
long mapTime = 0L;
long intermediate = 0L;
long reduceTime = 0L;
long postReduce = 0L;
for(nodeData x : dataHashMap.values())
{
if((x.mapStart == 0) || (x.mapFinish == 0) || (x.reduceStart == 0) || (x.reduceFinish == 0))
failedReports += 1;
else
{
preMap += (x.mapStart - jobStartTime);
mapTime += (x.mapFinish - x.mapStart);
intermediate += (x.reduceStart - x.mapFinish);
reduceTime += (x.reduceFinish - x.reduceStart);
postReduce += (jobFinishTime - x.reduceFinish);
numNodes += 1;
}
}
outputWriter.write(Integer.toString(numNodes) + " total nodes reporting for " + Long.toString(timeTaken) + " milliseconds each\n");
outputWriter.write("Total node time taken before map operations: " + Long.toString(preMap) + "\n");
outputWriter.write("Total node time taken for map operations: " + Long.toString(mapTime) + "\n");
outputWriter.write("Total node time taken between map and reduce operations: " + Long.toString(intermediate) + "\n");
outputWriter.write("Total node time taken for reduce operations: " + Long.toString(reduceTime) + "\n");
outputWriter.write("Total node time taken after reduce operations: " + Long.toString(postReduce) + "\n");
outputWriter.write(Integer.toString(failedReports) + " nodes reporting incomplete data (not counted in above) \n");
outputWriter.close();
}
public static void main(String[] args)
{
String directory = "/home/guest/hadoop/logs/userlogs/thistask";
long startTime = 1205673885264L;
long finishTime = 1205673915411L;
Collection<File> theFiles = null;
try{theFiles = FileListing.getFileListing(new File(directory));}
catch(FileNotFoundException e){e.printStackTrace();}
try{new ProfileLogParser().Parse(theFiles, startTime, finishTime, System.out);}
catch(IOException e){e.printStackTrace();}
}
}