/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.history;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.Counters.Group;
/**
* HiveHistory.
*
*/
public class HiveHistory {
PrintWriter histStream; // History File stream
String histFileName; // History file name
private static final Log LOG = LogFactory.getLog("hive.ql.exec.HiveHistory");
private LogHelper console;
private Map<String, String> idToTableMap = null;
// Job Hash Map
private final HashMap<String, QueryInfo> queryInfoMap = new HashMap<String, QueryInfo>();
// Task Hash Map
private final HashMap<String, TaskInfo> taskInfoMap = new HashMap<String, TaskInfo>();
private static final String DELIMITER = " ";
/**
* RecordTypes.
*
*/
public static enum RecordTypes {
QueryStart,
QueryEnd,
TaskStart,
TaskEnd,
TaskProgress,
SessionStart,
SessionEnd,
Counters
};
/**
* Keys.
*
*/
public static enum Keys {
SESSION_ID,
QUERY_ID,
TASK_ID,
QUERY_RET_CODE,
QUERY_NUM_TASKS,
QUERY_STRING,
TIME,
TASK_RET_CODE,
TASK_NAME,
TASK_HADOOP_ID,
TASK_HADOOP_PROGRESS,
TASK_COUNTERS,
TASK_NUM_MAPPERS,
TASK_NUM_REDUCERS,
ROWS_INSERTED
};
private static final String KEY = "(\\w+)";
private static final String VALUE = "[[^\"]?]+"; // anything but a " in ""
private static final String ROW_COUNT_PATTERN = "TABLE_ID_(\\d+)_ROWCOUNT";
private static final Pattern pattern = Pattern.compile(KEY + "=" + "\""
+ VALUE + "\"");
private static final Pattern rowCountPattern = Pattern.compile(ROW_COUNT_PATTERN);
// temp buffer for parsed dataa
private static Map<String, String> parseBuffer = new HashMap<String, String>();
/**
* Listner interface Parser will call handle function for each record type.
*/
public static interface Listener {
void handle(RecordTypes recType, Map<String, String> values) throws IOException;
}
/**
* Parses history file and calls call back functions.
*
* @param path
* @param l
* @throws IOException
*/
public static void parseHiveHistory(String path, Listener l) throws IOException {
FileInputStream fi = new FileInputStream(path);
BufferedReader reader = new BufferedReader(new InputStreamReader(fi));
try {
String line = null;
StringBuilder buf = new StringBuilder();
while ((line = reader.readLine()) != null) {
buf.append(line);
// if it does not end with " then it is line continuation
if (!line.trim().endsWith("\"")) {
continue;
}
parseLine(buf.toString(), l);
buf = new StringBuilder();
}
} finally {
try {
reader.close();
} catch (IOException ex) {
}
}
}
/**
* Parse a single line of history.
*
* @param line
* @param l
* @throws IOException
*/
private static void parseLine(String line, Listener l) throws IOException {
// extract the record type
int idx = line.indexOf(' ');
String recType = line.substring(0, idx);
String data = line.substring(idx + 1, line.length());
Matcher matcher = pattern.matcher(data);
while (matcher.find()) {
String tuple = matcher.group(0);
String[] parts = tuple.split("=");
parseBuffer.put(parts[0], parts[1].substring(1, parts[1].length() - 1));
}
l.handle(RecordTypes.valueOf(recType), parseBuffer);
parseBuffer.clear();
}
/**
* Info.
*
*/
public static class Info {
}
/**
* SessionInfo.
*
*/
public static class SessionInfo extends Info {
public String sessionId;
};
/**
* QueryInfo.
*
*/
public static class QueryInfo extends Info {
public Map<String, String> hm = new HashMap<String, String>();
public Map<String, Long> rowCountMap = new HashMap<String, Long>();
};
/**
* TaskInfo.
*
*/
public static class TaskInfo extends Info {
public Map<String, String> hm = new HashMap<String, String>();
};
/**
* Construct HiveHistory object an open history log file.
*
* @param ss
*/
public HiveHistory(SessionState ss) {
try {
console = new LogHelper(LOG);
String conf_file_loc = ss.getConf().getVar(
HiveConf.ConfVars.HIVEHISTORYFILELOC);
if ((conf_file_loc == null) || conf_file_loc.length() == 0) {
console.printError("No history file location given");
return;
}
// Create directory
File f = new File(conf_file_loc);
if (!f.exists()) {
if (!f.mkdirs()) {
console.printError("Unable to create log directory " + conf_file_loc);
return;
}
}
Random randGen = new Random();
do {
histFileName = conf_file_loc + "/hive_job_log_" + ss.getSessionId() + "_"
+ Math.abs(randGen.nextInt()) + ".txt";
} while (new File(histFileName).exists());
console.printInfo("Hive history file=" + histFileName);
histStream = new PrintWriter(histFileName);
HashMap<String, String> hm = new HashMap<String, String>();
hm.put(Keys.SESSION_ID.name(), ss.getSessionId());
log(RecordTypes.SessionStart, hm);
} catch (FileNotFoundException e) {
console.printError("FAILED: Failed to open Query Log : " + histFileName
+ " " + e.getMessage(), "\n"
+ org.apache.hadoop.util.StringUtils.stringifyException(e));
}
}
/**
* @return historyFileName
*/
public String getHistFileName() {
return histFileName;
}
/**
* Write the a history record to history file.
*
* @param rt
* @param keyValMap
*/
void log(RecordTypes rt, Map<String, String> keyValMap) {
if (histStream == null) {
return;
}
StringBuilder sb = new StringBuilder();
sb.append(rt.name());
for (Map.Entry<String, String> ent : keyValMap.entrySet()) {
sb.append(DELIMITER);
String key = ent.getKey();
String val = ent.getValue();
if(val != null) {
val = val.replace('\n', ' ');
}
sb.append(key + "=\"" + val + "\"");
}
sb.append(DELIMITER);
sb.append(Keys.TIME.name() + "=\"" + System.currentTimeMillis() + "\"");
histStream.println(sb);
histStream.flush();
}
/**
* Called at the start of job Driver.execute().
*/
public void startQuery(String cmd, String id) {
SessionState ss = SessionState.get();
if (ss == null) {
return;
}
QueryInfo ji = new QueryInfo();
ji.hm.put(Keys.QUERY_ID.name(), id);
ji.hm.put(Keys.QUERY_STRING.name(), cmd);
queryInfoMap.put(id, ji);
log(RecordTypes.QueryStart, ji.hm);
}
/**
* Used to set job status and other attributes of a job.
*
* @param queryId
* @param propName
* @param propValue
*/
public void setQueryProperty(String queryId, Keys propName, String propValue) {
QueryInfo ji = queryInfoMap.get(queryId);
if (ji == null) {
return;
}
ji.hm.put(propName.name(), propValue);
}
/**
* Used to set task properties.
*
* @param taskId
* @param propName
* @param propValue
*/
public void setTaskProperty(String queryId, String taskId, Keys propName,
String propValue) {
String id = queryId + ":" + taskId;
TaskInfo ti = taskInfoMap.get(id);
if (ti == null) {
return;
}
ti.hm.put(propName.name(), propValue);
}
/**
* Serialize the task counters and set as a task property.
*
* @param queryId
* @param taskId
* @param ctrs
*/
public void setTaskCounters(String queryId, String taskId, Counters ctrs) {
String id = queryId + ":" + taskId;
QueryInfo ji = queryInfoMap.get(queryId);
StringBuilder sb1 = new StringBuilder("");
TaskInfo ti = taskInfoMap.get(id);
if ((ti == null) || (ctrs == null)) {
return;
}
StringBuilder sb = new StringBuilder("");
try {
boolean first = true;
for (Group group : ctrs) {
for (Counter counter : group) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append(group.getDisplayName());
sb.append('.');
sb.append(counter.getDisplayName());
sb.append(':');
sb.append(counter.getCounter());
String tab = getRowCountTableName(counter.getDisplayName());
if (tab != null) {
if (sb1.length() > 0) {
sb1.append(",");
}
sb1.append(tab);
sb1.append('~');
sb1.append(counter.getCounter());
ji.rowCountMap.put(tab, counter.getCounter());
}
}
}
} catch (Exception e) {
LOG.warn(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
if (sb1.length() > 0) {
taskInfoMap.get(id).hm.put(Keys.ROWS_INSERTED.name(), sb1.toString());
queryInfoMap.get(queryId).hm.put(Keys.ROWS_INSERTED.name(), sb1
.toString());
}
if (sb.length() > 0) {
taskInfoMap.get(id).hm.put(Keys.TASK_COUNTERS.name(), sb.toString());
}
}
public void printRowCount(String queryId) {
QueryInfo ji = queryInfoMap.get(queryId);
for (String tab : ji.rowCountMap.keySet()) {
console.printInfo(ji.rowCountMap.get(tab) + " Rows loaded to " + tab);
}
}
/**
* Called at the end of Job. A Job is sql query.
*
* @param queryId
*/
public void endQuery(String queryId) {
QueryInfo ji = queryInfoMap.get(queryId);
if (ji == null) {
return;
}
log(RecordTypes.QueryEnd, ji.hm);
queryInfoMap.remove(queryId);
}
/**
* Called at the start of a task. Called by Driver.run() A Job can have
* multiple tasks. Tasks will have multiple operator.
*
* @param task
*/
public void startTask(String queryId, Task<? extends Serializable> task,
String taskName) {
SessionState ss = SessionState.get();
if (ss == null) {
return;
}
TaskInfo ti = new TaskInfo();
ti.hm.put(Keys.QUERY_ID.name(), ss.getQueryId());
ti.hm.put(Keys.TASK_ID.name(), task.getId());
ti.hm.put(Keys.TASK_NAME.name(), taskName);
String id = queryId + ":" + task.getId();
taskInfoMap.put(id, ti);
log(RecordTypes.TaskStart, ti.hm);
}
/**
* Called at the end of a task.
*
* @param task
*/
public void endTask(String queryId, Task<? extends Serializable> task) {
String id = queryId + ":" + task.getId();
TaskInfo ti = taskInfoMap.get(id);
if (ti == null) {
return;
}
log(RecordTypes.TaskEnd, ti.hm);
taskInfoMap.remove(id);
}
/**
* Called at the end of a task.
*
* @param task
*/
public void progressTask(String queryId, Task<? extends Serializable> task) {
String id = queryId + ":" + task.getId();
TaskInfo ti = taskInfoMap.get(id);
if (ti == null) {
return;
}
log(RecordTypes.TaskProgress, ti.hm);
}
/**
* write out counters.
*/
static Map<String, String> ctrmap = null;
public void logPlanProgress(QueryPlan plan) throws IOException {
if (ctrmap == null) {
ctrmap = new HashMap<String, String>();
}
ctrmap.put("plan", plan.toString());
log(RecordTypes.Counters, ctrmap);
}
/**
* Set the table to id map.
*
* @param map
*/
public void setIdToTableMap(Map<String, String> map) {
idToTableMap = map;
}
/**
* Returns table name for the counter name.
*
* @param name
* @return tableName
*/
String getRowCountTableName(String name) {
if (idToTableMap == null) {
return null;
}
Matcher m = rowCountPattern.matcher(name);
if (m.find()) {
String tuple = m.group(1);
return idToTableMap.get(tuple);
}
return null;
}
@Override
public void finalize() throws Throwable {
if (histStream !=null){
histStream.close();
}
super.finalize();
}
}