HiveHistory.java example

Explorer
hive_blinkdb-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.history;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.mapred.Counters;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.Counters.Group;

/**
 * HiveHistory.
 *
 */
public class HiveHistory {

  PrintWriter histStream; // History File stream

  String histFileName; // History file name

  private static final Log LOG = LogFactory.getLog("hive.ql.exec.HiveHistory");

  private LogHelper console;

  private Map<String, String> idToTableMap = null;

  // Job Hash Map
  private final HashMap<String, QueryInfo> queryInfoMap = new HashMap<String, QueryInfo>();

  // Task Hash Map
  private final HashMap<String, TaskInfo> taskInfoMap = new HashMap<String, TaskInfo>();

  private static final String DELIMITER = " ";

  /**
   * RecordTypes.
   *
   */
  public static enum RecordTypes {
    QueryStart,
    QueryEnd,
    TaskStart,
    TaskEnd,
    TaskProgress,
    SessionStart,
    SessionEnd,
    Counters
  };

  /**
   * Keys.
   *
   */
  public static enum Keys {
    SESSION_ID,
    QUERY_ID,
    TASK_ID,
    QUERY_RET_CODE,
    QUERY_NUM_TASKS,
    QUERY_STRING,
    TIME,
    TASK_RET_CODE,
    TASK_NAME,
    TASK_HADOOP_ID,
    TASK_HADOOP_PROGRESS,
    TASK_COUNTERS,
    TASK_NUM_MAPPERS,
    TASK_NUM_REDUCERS,
    ROWS_INSERTED
  };

  private static final String KEY = "(\\w+)";
  private static final String VALUE = "[[^\"]?]+"; // anything but a " in ""
  private static final String ROW_COUNT_PATTERN = "TABLE_ID_(\\d+)_ROWCOUNT";

  private static final Pattern pattern = Pattern.compile(KEY + "=" + "\""
      + VALUE + "\"");

  private static final Pattern rowCountPattern = Pattern.compile(ROW_COUNT_PATTERN);

  // temp buffer for parsed dataa
  private static Map<String, String> parseBuffer = new HashMap<String, String>();

  /**
   * Listner interface Parser will call handle function for each record type.
   */
  public static interface Listener {

    void handle(RecordTypes recType, Map<String, String> values) throws IOException;
  }

  /**
   * Parses history file and calls call back functions.
   *
   * @param path
   * @param l
   * @throws IOException
   */
  public static void parseHiveHistory(String path, Listener l) throws IOException {
    FileInputStream fi = new FileInputStream(path);
    BufferedReader reader = new BufferedReader(new InputStreamReader(fi));
    try {
      String line = null;
      StringBuilder buf = new StringBuilder();
      while ((line = reader.readLine()) != null) {
        buf.append(line);
        // if it does not end with " then it is line continuation
        if (!line.trim().endsWith("\"")) {
          continue;
        }
        parseLine(buf.toString(), l);
        buf = new StringBuilder();
      }
    } finally {
      try {
        reader.close();
      } catch (IOException ex) {
      }
    }
  }

  /**
   * Parse a single line of history.
   *
   * @param line
   * @param l
   * @throws IOException
   */
  private static void parseLine(String line, Listener l) throws IOException {
    // extract the record type
    int idx = line.indexOf(' ');
    String recType = line.substring(0, idx);
    String data = line.substring(idx + 1, line.length());

    Matcher matcher = pattern.matcher(data);

    while (matcher.find()) {
      String tuple = matcher.group(0);
      String[] parts = tuple.split("=");

      parseBuffer.put(parts[0], parts[1].substring(1, parts[1].length() - 1));
    }

    l.handle(RecordTypes.valueOf(recType), parseBuffer);

    parseBuffer.clear();
  }

  /**
   * Info.
   *
   */
  public static class Info {

  }

  /**
   * SessionInfo.
   *
   */
  public static class SessionInfo extends Info {
    public String sessionId;
  };

  /**
   * QueryInfo.
   *
   */
  public static class QueryInfo extends Info {
    public Map<String, String> hm = new HashMap<String, String>();
    public Map<String, Long> rowCountMap = new HashMap<String, Long>();
  };

  /**
   * TaskInfo.
   *
   */
  public static class TaskInfo extends Info {
    public Map<String, String> hm = new HashMap<String, String>();

  };

  /**
   * Construct HiveHistory object an open history log file.
   *
   * @param ss
   */
  public HiveHistory(SessionState ss) {

    try {
      console = new LogHelper(LOG);
      String conf_file_loc = ss.getConf().getVar(
          HiveConf.ConfVars.HIVEHISTORYFILELOC);
      if ((conf_file_loc == null) || conf_file_loc.length() == 0) {
        console.printError("No history file location given");
        return;
      }

      // Create directory
      File f = new File(conf_file_loc);
      if (!f.exists()) {
        if (!f.mkdirs()) {
          console.printError("Unable to create log directory " + conf_file_loc);
          return;
        }
      }
      Random randGen = new Random();
      do {
        histFileName = conf_file_loc + "/hive_job_log_" + ss.getSessionId() + "_"
          + Math.abs(randGen.nextInt()) + ".txt";
      } while (new File(histFileName).exists());
      console.printInfo("Hive history file=" + histFileName);
      histStream = new PrintWriter(histFileName);

      HashMap<String, String> hm = new HashMap<String, String>();
      hm.put(Keys.SESSION_ID.name(), ss.getSessionId());
      log(RecordTypes.SessionStart, hm);
    } catch (FileNotFoundException e) {
      console.printError("FAILED: Failed to open Query Log : " + histFileName
          + " " + e.getMessage(), "\n"
          + org.apache.hadoop.util.StringUtils.stringifyException(e));
    }

  }

  /**
   * @return historyFileName
   */
  public String getHistFileName() {
    return histFileName;
  }

  /**
   * Write the a history record to history file.
   *
   * @param rt
   * @param keyValMap
   */
  void log(RecordTypes rt, Map<String, String> keyValMap) {

    if (histStream == null) {
      return;
    }

    StringBuilder sb = new StringBuilder();
    sb.append(rt.name());

    for (Map.Entry<String, String> ent : keyValMap.entrySet()) {

      sb.append(DELIMITER);
      String key = ent.getKey();
      String val = ent.getValue();
      if(val != null) {
        val = val.replace('\n', ' ');
      }
      sb.append(key + "=\"" + val + "\"");

    }
    sb.append(DELIMITER);
    sb.append(Keys.TIME.name() + "=\"" + System.currentTimeMillis() + "\"");
    histStream.println(sb);
    histStream.flush();

  }

  /**
   * Called at the start of job Driver.execute().
   */
  public void startQuery(String cmd, String id) {
    SessionState ss = SessionState.get();
    if (ss == null) {
      return;
    }
    QueryInfo ji = new QueryInfo();

    ji.hm.put(Keys.QUERY_ID.name(), id);
    ji.hm.put(Keys.QUERY_STRING.name(), cmd);

    queryInfoMap.put(id, ji);

    log(RecordTypes.QueryStart, ji.hm);

  }

  /**
   * Used to set job status and other attributes of a job.
   *
   * @param queryId
   * @param propName
   * @param propValue
   */
  public void setQueryProperty(String queryId, Keys propName, String propValue) {
    QueryInfo ji = queryInfoMap.get(queryId);
    if (ji == null) {
      return;
    }
    ji.hm.put(propName.name(), propValue);
  }

  /**
   * Used to set task properties.
   *
   * @param taskId
   * @param propName
   * @param propValue
   */
  public void setTaskProperty(String queryId, String taskId, Keys propName,
      String propValue) {
    String id = queryId + ":" + taskId;
    TaskInfo ti = taskInfoMap.get(id);
    if (ti == null) {
      return;
    }
    ti.hm.put(propName.name(), propValue);
  }

  /**
   * Serialize the task counters and set as a task property.
   *
   * @param queryId
   * @param taskId
   * @param ctrs
   */
  public void setTaskCounters(String queryId, String taskId, Counters ctrs) {
    String id = queryId + ":" + taskId;
    QueryInfo ji = queryInfoMap.get(queryId);
    StringBuilder sb1 = new StringBuilder("");
    TaskInfo ti = taskInfoMap.get(id);
    if ((ti == null) || (ctrs == null)) {
      return;
    }
    StringBuilder sb = new StringBuilder("");
    try {

      boolean first = true;
      for (Group group : ctrs) {
        for (Counter counter : group) {
          if (first) {
            first = false;
          } else {
            sb.append(',');
          }
          sb.append(group.getDisplayName());
          sb.append('.');
          sb.append(counter.getDisplayName());
          sb.append(':');
          sb.append(counter.getCounter());
          String tab = getRowCountTableName(counter.getDisplayName());
          if (tab != null) {
            if (sb1.length() > 0) {
              sb1.append(",");
            }
            sb1.append(tab);
            sb1.append('~');
            sb1.append(counter.getCounter());
            ji.rowCountMap.put(tab, counter.getCounter());

          }
        }
      }

    } catch (Exception e) {
      LOG.warn(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
    if (sb1.length() > 0) {
      taskInfoMap.get(id).hm.put(Keys.ROWS_INSERTED.name(), sb1.toString());
      queryInfoMap.get(queryId).hm.put(Keys.ROWS_INSERTED.name(), sb1
          .toString());
    }
    if (sb.length() > 0) {
      taskInfoMap.get(id).hm.put(Keys.TASK_COUNTERS.name(), sb.toString());
    }
  }

  public void printRowCount(String queryId) {
    QueryInfo ji = queryInfoMap.get(queryId);
    for (String tab : ji.rowCountMap.keySet()) {
      console.printInfo(ji.rowCountMap.get(tab) + " Rows loaded to " + tab);
    }
  }

  /**
   * Called at the end of Job. A Job is sql query.
   *
   * @param queryId
   */
  public void endQuery(String queryId) {

    QueryInfo ji = queryInfoMap.get(queryId);
    if (ji == null) {
      return;
    }
    log(RecordTypes.QueryEnd, ji.hm);
    queryInfoMap.remove(queryId);
  }

  /**
   * Called at the start of a task. Called by Driver.run() A Job can have
   * multiple tasks. Tasks will have multiple operator.
   *
   * @param task
   */
  public void startTask(String queryId, Task<? extends Serializable> task,
      String taskName) {
    SessionState ss = SessionState.get();
    if (ss == null) {
      return;
    }
    TaskInfo ti = new TaskInfo();

    ti.hm.put(Keys.QUERY_ID.name(), ss.getQueryId());
    ti.hm.put(Keys.TASK_ID.name(), task.getId());
    ti.hm.put(Keys.TASK_NAME.name(), taskName);

    String id = queryId + ":" + task.getId();
    taskInfoMap.put(id, ti);

    log(RecordTypes.TaskStart, ti.hm);

  }

  /**
   * Called at the end of a task.
   *
   * @param task
   */
  public void endTask(String queryId, Task<? extends Serializable> task) {
    String id = queryId + ":" + task.getId();
    TaskInfo ti = taskInfoMap.get(id);

    if (ti == null) {
      return;
    }
    log(RecordTypes.TaskEnd, ti.hm);
    taskInfoMap.remove(id);
  }

  /**
   * Called at the end of a task.
   *
   * @param task
   */
  public void progressTask(String queryId, Task<? extends Serializable> task) {
    String id = queryId + ":" + task.getId();
    TaskInfo ti = taskInfoMap.get(id);
    if (ti == null) {
      return;
    }
    log(RecordTypes.TaskProgress, ti.hm);

  }

  /**
   * write out counters.
   */
  static Map<String, String> ctrmap = null;

  public void logPlanProgress(QueryPlan plan) throws IOException {
    if (ctrmap == null) {
      ctrmap = new HashMap<String, String>();
    }
    ctrmap.put("plan", plan.toString());
    log(RecordTypes.Counters, ctrmap);
  }

  /**
   * Set the table to id map.
   *
   * @param map
   */
  public void setIdToTableMap(Map<String, String> map) {
    idToTableMap = map;
  }

  /**
   * Returns table name for the counter name.
   *
   * @param name
   * @return tableName
   */
  String getRowCountTableName(String name) {
    if (idToTableMap == null) {
      return null;
    }
    Matcher m = rowCountPattern.matcher(name);

    if (m.find()) {
      String tuple = m.group(1);
      return idToTableMap.get(tuple);
    }
    return null;

  }
  @Override
  public void finalize() throws Throwable {
    if (histStream !=null){
      histStream.close();
    }
    super.finalize();
  }
}