FileNameIndexUtils.java example

Explorer
yarn-comment-master
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.mapreduce.v2.jobhistory;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TypeConverter;
import org.apache.hadoop.mapreduce.v2.api.records.JobId;

public class FileNameIndexUtils {

  static final int JOB_NAME_TRIM_LENGTH = 50;
  
  // Sanitize job history file for predictable parsing
  static final String DELIMITER = "-";
  static final String DELIMITER_ESCAPE = "%2D";
  
  private static final Log LOG = LogFactory.getLog(FileNameIndexUtils.class);

  // Job history file names need to be backwards compatible
  // Only append new elements to the end of this list
  private static final int JOB_ID_INDEX = 0;
  private static final int SUBMIT_TIME_INDEX = 1;
  private static final int USER_INDEX = 2;
  private static final int JOB_NAME_INDEX = 3;
  private static final int FINISH_TIME_INDEX = 4;
  private static final int NUM_MAPS_INDEX = 5;
  private static final int NUM_REDUCES_INDEX = 6;
  private static final int JOB_STATUS_INDEX = 7;
  private static final int QUEUE_NAME_INDEX = 8;

  /**
   * Constructs the job history file name from the JobIndexInfo.
   * 
   * @param indexInfo the index info.
   * @return the done job history filename.
   */
  public static String getDoneFileName(JobIndexInfo indexInfo) throws IOException {
    StringBuilder sb = new StringBuilder();
    //JobId
    sb.append(escapeDelimiters(TypeConverter.fromYarn(indexInfo.getJobId()).toString()));
    sb.append(DELIMITER);
    
    //StartTime
    sb.append(indexInfo.getSubmitTime());
    sb.append(DELIMITER);
    
    //UserName
    sb.append(escapeDelimiters(getUserName(indexInfo)));
    sb.append(DELIMITER);
    
    //JobName
    sb.append(escapeDelimiters(trimJobName(getJobName(indexInfo))));
    sb.append(DELIMITER);
    
    //FinishTime
    sb.append(indexInfo.getFinishTime());
    sb.append(DELIMITER);
    
    //NumMaps
    sb.append(indexInfo.getNumMaps());
    sb.append(DELIMITER);
    
    //NumReduces
    sb.append(indexInfo.getNumReduces());
    sb.append(DELIMITER);
    
    //JobStatus
    sb.append(indexInfo.getJobStatus());
    sb.append(DELIMITER);
    
    //QueueName
    sb.append(indexInfo.getQueueName());

    sb.append(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION);
    return encodeJobHistoryFileName(sb.toString());
  }
  
  /**
   * Parses the provided job history file name to construct a
   * JobIndexInfo object which is returned.
   * 
   * @param jhFileName the job history filename.
   * @return a JobIndexInfo object built from the filename.
   */
  public static JobIndexInfo getIndexInfo(String jhFileName) throws IOException {
    String fileName = jhFileName.substring(0, jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION));
    JobIndexInfo indexInfo = new JobIndexInfo();
    
    String[] jobDetails = fileName.split(DELIMITER);
    
    JobID oldJobId = JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX]));
    JobId jobId = TypeConverter.toYarn(oldJobId);
    indexInfo.setJobId(jobId);

    // Do not fail if there are some minor parse errors
    try {
      try {
        indexInfo.setSubmitTime(
            Long.parseLong(decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX])));
      } catch (NumberFormatException e) {
        LOG.warn("Unable to parse submit time from job history file "
            + jhFileName + " : " + e);
      }

      indexInfo.setUser(
          decodeJobHistoryFileName(jobDetails[USER_INDEX]));

      indexInfo.setJobName(
          decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX]));

      try {
        indexInfo.setFinishTime(
            Long.parseLong(decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX])));
      } catch (NumberFormatException e) {
        LOG.warn("Unable to parse finish time from job history file "
            + jhFileName + " : " + e);
      }

      try {
        indexInfo.setNumMaps(
            Integer.parseInt(decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX])));
      } catch (NumberFormatException e) {
        LOG.warn("Unable to parse num maps from job history file "
            + jhFileName + " : " + e);
      }

      try {
        indexInfo.setNumReduces(
            Integer.parseInt(decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX])));
      } catch (NumberFormatException e) {
        LOG.warn("Unable to parse num reduces from job history file "
            + jhFileName + " : " + e);
      }

      indexInfo.setJobStatus(
          decodeJobHistoryFileName(jobDetails[JOB_STATUS_INDEX]));

      indexInfo.setQueueName(
          decodeJobHistoryFileName(jobDetails[QUEUE_NAME_INDEX]));
    } catch (IndexOutOfBoundsException e) {
      LOG.warn("Parsing job history file with partial data encoded into name: "
          + jhFileName);
    }
    
    return indexInfo;
  }

  
  /**
   * Helper function to encode the URL of the filename of the job-history 
   * log file.
   * 
   * @param logFileName file name of the job-history file
   * @return URL encoded filename
   * @throws IOException
   */
  public static String encodeJobHistoryFileName(String logFileName)
  throws IOException {
    String replacementDelimiterEscape = null;

    // Temporarily protect the escape delimiters from encoding
    if (logFileName.contains(DELIMITER_ESCAPE)) {
      replacementDelimiterEscape = nonOccursString(logFileName);

      logFileName = logFileName.replaceAll(DELIMITER_ESCAPE, replacementDelimiterEscape);
    }

    String encodedFileName = null;
    try {
      encodedFileName = URLEncoder.encode(logFileName, "UTF-8");
    } catch (UnsupportedEncodingException uee) {
      IOException ioe = new IOException();
      ioe.initCause(uee);
      ioe.setStackTrace(uee.getStackTrace());
      throw ioe;
    }

    // Restore protected escape delimiters after encoding
    if (replacementDelimiterEscape != null) {
      encodedFileName = encodedFileName.replaceAll(replacementDelimiterEscape, DELIMITER_ESCAPE);
    }

    return encodedFileName;
  }
  
  /**
   * Helper function to decode the URL of the filename of the job-history 
   * log file.
   * 
   * @param logFileName file name of the job-history file
   * @return URL decoded filename
   * @throws IOException
   */
  public static String decodeJobHistoryFileName(String logFileName)
  throws IOException {
    String decodedFileName = null;
    try {
      decodedFileName = URLDecoder.decode(logFileName, "UTF-8");
    } catch (UnsupportedEncodingException uee) {
      IOException ioe = new IOException();
      ioe.initCause(uee);
      ioe.setStackTrace(uee.getStackTrace());
      throw ioe;
    }
    return decodedFileName;
  }
  
  static String nonOccursString(String logFileName) {
    int adHocIndex = 0;

    String unfoundString = "q" + adHocIndex;

    while (logFileName.contains(unfoundString)) {
      unfoundString = "q" + ++adHocIndex;
    }

    return unfoundString + "q";
  }
  
  private static String getUserName(JobIndexInfo indexInfo) {
    return getNonEmptyString(indexInfo.getUser());
  }
  
  private static String getJobName(JobIndexInfo indexInfo) {
    return getNonEmptyString(indexInfo.getJobName());
  }

  //TODO Maybe handle default values for longs and integers here?
  
  private static String getNonEmptyString(String in) {
    if (in == null || in.length() == 0) {
      in = "NA";
    }
    return in;
  }
  
  private static String escapeDelimiters(String escapee) {
    return escapee.replaceAll(DELIMITER, DELIMITER_ESCAPE);
  }

  /**
   * Trims the job-name if required
   */
  private static String trimJobName(String jobName) {
    if (jobName.length() > JOB_NAME_TRIM_LENGTH) {
      jobName = jobName.substring(0, JOB_NAME_TRIM_LENGTH);
    }
    return jobName;
  }
}