/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce.v2.jobhistory; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import static java.nio.charset.StandardCharsets.UTF_8; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.TypeConverter; import org.apache.hadoop.mapreduce.v2.api.records.JobId; public class FileNameIndexUtils { // Sanitize job history file for predictable parsing static final String DELIMITER = "-"; static final String DELIMITER_ESCAPE = "%2D"; private static final Log LOG = LogFactory.getLog(FileNameIndexUtils.class); // Job history file names need to be backwards compatible // Only append new elements to the end of this list private static final int JOB_ID_INDEX = 0; private static final int SUBMIT_TIME_INDEX = 1; private static final int USER_INDEX = 2; private static final int JOB_NAME_INDEX = 3; private static final int FINISH_TIME_INDEX = 4; private static final int NUM_MAPS_INDEX = 5; private static final int NUM_REDUCES_INDEX = 6; private static final int JOB_STATUS_INDEX = 7; private static final int QUEUE_NAME_INDEX = 8; private static final int JOB_START_TIME_INDEX = 9; /** * Constructs the job history file name from the JobIndexInfo. * * @param indexInfo the index info. * @return the done job history filename. */ public static String getDoneFileName(JobIndexInfo indexInfo) throws IOException { return getDoneFileName(indexInfo, JHAdminConfig.DEFAULT_MR_HS_JOBNAME_LIMIT); } public static String getDoneFileName(JobIndexInfo indexInfo, int jobNameLimit) throws IOException { StringBuilder sb = new StringBuilder(); //JobId sb.append(encodeJobHistoryFileName(escapeDelimiters( TypeConverter.fromYarn(indexInfo.getJobId()).toString()))); sb.append(DELIMITER); //SubmitTime sb.append(encodeJobHistoryFileName(String.valueOf( indexInfo.getSubmitTime()))); sb.append(DELIMITER); //UserName sb.append(encodeJobHistoryFileName(escapeDelimiters( getUserName(indexInfo)))); sb.append(DELIMITER); //JobName sb.append(trimURLEncodedString(encodeJobHistoryFileName(escapeDelimiters( getJobName(indexInfo))), jobNameLimit)); sb.append(DELIMITER); //FinishTime sb.append(encodeJobHistoryFileName( String.valueOf(indexInfo.getFinishTime()))); sb.append(DELIMITER); //NumMaps sb.append(encodeJobHistoryFileName( String.valueOf(indexInfo.getNumMaps()))); sb.append(DELIMITER); //NumReduces sb.append(encodeJobHistoryFileName( String.valueOf(indexInfo.getNumReduces()))); sb.append(DELIMITER); //JobStatus sb.append(encodeJobHistoryFileName(indexInfo.getJobStatus())); sb.append(DELIMITER); //QueueName sb.append(escapeDelimiters(encodeJobHistoryFileName( getQueueName(indexInfo)))); sb.append(DELIMITER); //JobStartTime sb.append(encodeJobHistoryFileName( String.valueOf(indexInfo.getJobStartTime()))); sb.append(encodeJobHistoryFileName( JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION)); return sb.toString(); } /** * Parses the provided job history file name to construct a * JobIndexInfo object which is returned. * * @param jhFileName the job history filename. * @return a JobIndexInfo object built from the filename. */ public static JobIndexInfo getIndexInfo(String jhFileName) throws IOException { String fileName = jhFileName.substring(0, jhFileName.indexOf(JobHistoryUtils.JOB_HISTORY_FILE_EXTENSION)); JobIndexInfo indexInfo = new JobIndexInfo(); String[] jobDetails = fileName.split(DELIMITER); JobID oldJobId = JobID.forName(decodeJobHistoryFileName(jobDetails[JOB_ID_INDEX])); JobId jobId = TypeConverter.toYarn(oldJobId); indexInfo.setJobId(jobId); // Do not fail if there are some minor parse errors try { try { indexInfo.setSubmitTime(Long.parseLong( decodeJobHistoryFileName(jobDetails[SUBMIT_TIME_INDEX]))); } catch (NumberFormatException e) { LOG.warn("Unable to parse submit time from job history file " + jhFileName + " : " + e); } indexInfo.setUser( decodeJobHistoryFileName(jobDetails[USER_INDEX])); indexInfo.setJobName( decodeJobHistoryFileName(jobDetails[JOB_NAME_INDEX])); try { indexInfo.setFinishTime(Long.parseLong( decodeJobHistoryFileName(jobDetails[FINISH_TIME_INDEX]))); } catch (NumberFormatException e) { LOG.warn("Unable to parse finish time from job history file " + jhFileName + " : " + e); } try { indexInfo.setNumMaps(Integer.parseInt( decodeJobHistoryFileName(jobDetails[NUM_MAPS_INDEX]))); } catch (NumberFormatException e) { LOG.warn("Unable to parse num maps from job history file " + jhFileName + " : " + e); } try { indexInfo.setNumReduces(Integer.parseInt( decodeJobHistoryFileName(jobDetails[NUM_REDUCES_INDEX]))); } catch (NumberFormatException e) { LOG.warn("Unable to parse num reduces from job history file " + jhFileName + " : " + e); } indexInfo.setJobStatus( decodeJobHistoryFileName(jobDetails[JOB_STATUS_INDEX])); indexInfo.setQueueName( decodeJobHistoryFileName(jobDetails[QUEUE_NAME_INDEX])); try{ if (jobDetails.length <= JOB_START_TIME_INDEX) { indexInfo.setJobStartTime(indexInfo.getSubmitTime()); } else { indexInfo.setJobStartTime(Long.parseLong( decodeJobHistoryFileName(jobDetails[JOB_START_TIME_INDEX]))); } } catch (NumberFormatException e){ LOG.warn("Unable to parse start time from job history file " + jhFileName + " : " + e); } } catch (IndexOutOfBoundsException e) { LOG.warn("Parsing job history file with partial data encoded into name: " + jhFileName); } return indexInfo; } /** * Helper function to encode the URL of the filename of the job-history * log file. * * @param logFileName file name of the job-history file * @return URL encoded filename * @throws IOException */ public static String encodeJobHistoryFileName(String logFileName) throws IOException { String replacementDelimiterEscape = null; // Temporarily protect the escape delimiters from encoding if (logFileName.contains(DELIMITER_ESCAPE)) { replacementDelimiterEscape = nonOccursString(logFileName); logFileName = logFileName.replaceAll( DELIMITER_ESCAPE, replacementDelimiterEscape); } String encodedFileName = null; try { encodedFileName = URLEncoder.encode(logFileName, "UTF-8"); } catch (UnsupportedEncodingException uee) { IOException ioe = new IOException(); ioe.initCause(uee); ioe.setStackTrace(uee.getStackTrace()); throw ioe; } // Restore protected escape delimiters after encoding if (replacementDelimiterEscape != null) { encodedFileName = encodedFileName.replaceAll( replacementDelimiterEscape, DELIMITER_ESCAPE); } return encodedFileName; } /** * Helper function to decode the URL of the filename of the job-history * log file. * * @param logFileName file name of the job-history file * @return URL decoded filename * @throws IOException */ public static String decodeJobHistoryFileName(String logFileName) throws IOException { String decodedFileName = null; try { decodedFileName = URLDecoder.decode(logFileName, "UTF-8"); } catch (UnsupportedEncodingException uee) { IOException ioe = new IOException(); ioe.initCause(uee); ioe.setStackTrace(uee.getStackTrace()); throw ioe; } return decodedFileName; } static String nonOccursString(String logFileName) { int adHocIndex = 0; String unfoundString = "q" + adHocIndex; while (logFileName.contains(unfoundString)) { unfoundString = "q" + ++adHocIndex; } return unfoundString + "q"; } private static String getUserName(JobIndexInfo indexInfo) { return getNonEmptyString(indexInfo.getUser()); } private static String getJobName(JobIndexInfo indexInfo) { return getNonEmptyString(indexInfo.getJobName()); } private static String getQueueName(JobIndexInfo indexInfo) { return getNonEmptyString(indexInfo.getQueueName()); } //TODO Maybe handle default values for longs and integers here? private static String getNonEmptyString(String in) { if (in == null || in.length() == 0) { in = "NA"; } return in; } private static String escapeDelimiters(String escapee) { return escapee.replaceAll(DELIMITER, DELIMITER_ESCAPE); } /** * Trims the url-encoded string if required */ private static String trimURLEncodedString( String encodedString, int limitLength) { assert(limitLength >= 0) : "limitLength should be positive integer"; if (encodedString.length() <= limitLength) { return encodedString; } int index = 0; int increase = 0; byte[] strBytes = encodedString.getBytes(UTF_8); // calculate effective character length based on UTF-8 specification. // The size of a character coded in UTF-8 should be 4-byte at most. // See RFC3629 while (true) { byte b = strBytes[index]; if (b == '%') { byte minuend1 = strBytes[index + 1]; byte subtrahend1 = (byte)(Character.isDigit( minuend1) ? '0' : 'A' - 10); byte minuend2 = strBytes[index + 2]; byte subtrahend2 = (byte)(Character.isDigit( minuend2) ? '0' : 'A' - 10); int initialHex = ((Character.toUpperCase(minuend1) - subtrahend1) << 4) + (Character.toUpperCase(minuend2) - subtrahend2); if (0x00 <= initialHex && initialHex <= 0x7F) { // For 1-byte UTF-8 characters increase = 3; } else if (0xC2 <= initialHex && initialHex <= 0xDF) { // For 2-byte UTF-8 characters increase = 6; } else if (0xE0 <= initialHex && initialHex <= 0xEF) { // For 3-byte UTF-8 characters increase = 9; } else { // For 4-byte UTF-8 characters increase = 12; } } else { increase = 1; } if (index + increase > limitLength) { break; } else { index += increase; } } return encodedString.substring(0, index); } }