/** * (c) Copyright 2012 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kiji.mapreduce.framework; import java.io.Closeable; import java.io.IOException; import java.util.Collections; import java.util.Map; import java.util.NavigableMap; import com.google.common.collect.Maps; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.kiji.annotations.ApiAudience; import org.kiji.annotations.ApiStability; import org.kiji.mapreduce.avro.generated.JobHistoryEntry; import org.kiji.schema.AtomicKijiPutter; import org.kiji.schema.EntityId; import org.kiji.schema.Kiji; import org.kiji.schema.KijiDataRequest; import org.kiji.schema.KijiDataRequestBuilder; import org.kiji.schema.KijiRowData; import org.kiji.schema.KijiRowScanner; import org.kiji.schema.KijiTable; import org.kiji.schema.KijiTableReader; import org.kiji.schema.layout.KijiTableLayout; /** * A class providing an API to install and access the job history kiji table. * * Used in places like KijiMapReduceJob to record information about jobs run through Kiji. */ @ApiAudience.Framework @ApiStability.Evolving public final class JobHistoryKijiTable implements Closeable { /** Every existing job history table has at least this version. */ private static final String PREV_TABLE_LAYOUT_VERSION = "1"; /** The name of the table storing a history of completed jobs. */ private static final String TABLE_NAME = "job_history"; /** The path to the layout for the table in our resources. */ private static final String TABLE_LAYOUT_RESOURCE = "/org/kiji/mapreduce/job-history-layout.json"; /** JSON file for job history table that adds job counters family. */ private static final String TABLE_LAYOUT_V2 = "/org/kiji/mapreduce/job-history-layout-v2-counterfamily.json"; /** Column family where job history information is stored. */ public static final String JOB_HISTORY_FAMILY = "info"; /** Column family for job counters. */ public static final String JOB_HISTORY_COUNTERS_FAMILY = "counters"; /** Column family where extended information is stored. */ public static final String JOB_HISTORY_EXTENDED_INFO_FAMILY = "extendedInfo"; /** Qualifier where job IDs are stored. */ public static final String JOB_HISTORY_ID_QUALIFIER = "jobId"; /** Qualifier where job names are stored. */ public static final String JOB_HISTORY_NAME_QUALIFIER = "jobName"; /** Qualifier where job start times are stored. */ public static final String JOB_HISTORY_START_TIME_QUALIFIER = "startTime"; /** Qualifier where job end times are stored. */ public static final String JOB_HISTORY_END_TIME_QUALIFIER = "endTime"; /** Qualifier where job end statuses are stored. */ public static final String JOB_HISTORY_END_STATUS_QUALIFIER = "jobEndStatus"; /** Qualifier where job counters are stored. */ public static final String JOB_HISTORY_COUNTERS_QUALIFIER = "counters"; /** Qualifier where job configurations are stored. */ public static final String JOB_HISTORY_CONFIGURATION_QUALIFIER = "configuration"; /** Value stored to configuration qualifier if the job did not have a configuration. */ public static final String JOB_HISTORY_NO_CONFIGURATION_VALUE = "No configuration for job."; /** Message to record into the {@value #JOB_HISTORY_END_STATUS_QUALIFIER} on job success. */ public static final String SUCCEEDED = "SUCCEEDED"; /** Message to record into the {@value #JOB_HISTORY_END_STATUS_QUALIFIER} on job failure. */ public static final String FAILED = "FAILED"; /** The HBaseKijiTable managed by the JobHistoryKijiTable. */ private final KijiTable mKijiTable; /** * Opens a JobHistoryKijiTable for a given kiji, installing it if necessary. This method should * be matched with a call to {@link #close}. * * @param kiji The kiji instance to use. * @return An opened JobHistoryKijiTable. * @throws IOException If there is an error opening the table. */ public static JobHistoryKijiTable open(Kiji kiji) throws IOException { return new JobHistoryKijiTable(kiji); } /** * Returns the default name of the job history table. * * @return The name of the job history table as used by the installer. */ public static String getInstallName() { return TABLE_NAME; } /** * Extract the counters from a Job. * * @param job Job from which to get counters. * @return a map from counters to their counts. Keys are group:name. * @throws IOException in case of an error getting the counters. */ private static Map<String, Long> getCounters( final Job job ) throws IOException { final Counters counters = job.getCounters(); final Map<String, Long> countersMap = Maps.newHashMap(); for (String group : counters.getGroupNames()) { for (Counter counter : counters.getGroup(group)) { countersMap.put(String.format("%s:%s", group, counter.getName()), counter.getValue()); } } return countersMap; } /** * Add counters to an outstanding atomic transaction on the given atomic putter. * * @param putter atomic putter with an open transaction. * @param startTime time in milliseconds since the epoch at which the job started. * @param counters map of counters from the job. Keys should be of the form 'group:name'. * @throws IOException in case of an error adding the counters to the transaction. */ private static void writeCounters( final AtomicKijiPutter putter, final long startTime, final Map<String, Long> counters ) throws IOException { for (Map.Entry<String, Long> counterEntry : counters.entrySet()) { putter.put(JOB_HISTORY_COUNTERS_FAMILY, counterEntry.getKey(), startTime, counterEntry.getValue()); } } /** * Add extended information to an outstanding atomic transaction on the given atomic putter. * * @param putter atomic putter with an open transaction. * @param startTime time in milliseconds since the epoch at which the job started. * @param extendedInfo map of additional information about the job. * @throws IOException in case of an error adding the extended info to the transaction. */ private static void writeExtendedInfo( final AtomicKijiPutter putter, final long startTime, final Map<String, String> extendedInfo ) throws IOException { for (Map.Entry<String, String> infoEntry : extendedInfo.entrySet()) { putter.put(JOB_HISTORY_EXTENDED_INFO_FAMILY, infoEntry.getKey(), startTime, infoEntry.getValue()); } } /** * Private constructor that opens a new JobHistoryKijiTable, creating it if necessary. * This method also updates an existing layout to the latest layout for the job * history table. * * @param kiji The kiji instance to retrieve the job history table from. * @throws IOException If there's an error opening the underlying HBaseKijiTable. */ private JobHistoryKijiTable(Kiji kiji) throws IOException { install(kiji); mKijiTable = kiji.openTable(TABLE_NAME); } /** * Writes a job into the JobHistoryKijiTable. * * @param job The job to save. * @param startTime The time the job began, in milliseconds. * @param endTime The time the job ended, in milliseconds * @throws IOException If there is an error writing to the table. */ public void recordJob( final Job job, final long startTime, final long endTime ) throws IOException { recordJob( job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(), job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap()); } /** * Writes details of a job into the JobHistoryKijiTable. * * @param jobId unique identifier for the job. * @param jobName name of the job. * @param startTime time in milliseconds since the epoch at which the job started. * @param endTime time in milliseconds since the epoch at which the job ended. * @param jobSuccess whether the job completed successfully. * @param counters map of counters from the job. Keys should be of the form 'group:name'. * @param conf Configuration of the job. * @param extendedInfo any additional information which should be stored about the job. * @throws IOException in case of an error writing to the table. */ // CSOFF: ParameterNumberCheck public void recordJob( final String jobId, final String jobName, final long startTime, final long endTime, final boolean jobSuccess, final Configuration conf, final Map<String, Long> counters, final Map<String, String> extendedInfo ) throws IOException { // CSON: ParameterNumberCheck final EntityId eid = mKijiTable.getEntityId(jobId); final AtomicKijiPutter putter = mKijiTable.getWriterFactory().openAtomicPutter(); try { putter.begin(eid); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_ID_QUALIFIER, startTime, jobId); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_NAME_QUALIFIER, startTime, jobName); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_START_TIME_QUALIFIER, startTime, startTime); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_END_TIME_QUALIFIER, startTime, endTime); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_END_STATUS_QUALIFIER, startTime, (jobSuccess) ? SUCCEEDED : FAILED); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_COUNTERS_QUALIFIER, startTime, counters.toString()); if (null != conf) { final ByteArrayOutputStream baos = new ByteArrayOutputStream(); conf.writeXml(baos); putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_CONFIGURATION_QUALIFIER, startTime, baos.toString("UTF-8")); } else { putter.put(JOB_HISTORY_FAMILY, JOB_HISTORY_CONFIGURATION_QUALIFIER, startTime, JOB_HISTORY_NO_CONFIGURATION_VALUE); } writeCounters(putter, startTime, counters); writeExtendedInfo(putter, startTime, extendedInfo); putter.commit(); } finally { putter.close(); } } /** * Install the job history table into a Kiji instance. This should be called only * via open, because we might want to update the layout of the job history table. * * @param kiji The Kiji instance to install this table in. * @throws IOException If there is an error. */ private static void install(Kiji kiji) throws IOException { if (!kiji.getTableNames().contains(TABLE_NAME)) { // Try to install the job history table if necessary. kiji.createTable( KijiTableLayout.createFromEffectiveJsonResource(TABLE_LAYOUT_RESOURCE).getDesc()); } // At this point, we either have an existing table or we just installed a new // one. Check if the table is using the old layout, and update it if it is. if (kiji.getMetaTable().getTableLayout(TABLE_NAME).getDesc().getLayoutId() .equals(PREV_TABLE_LAYOUT_VERSION)) { KijiTableLayout ktl = KijiTableLayout .createFromEffectiveJsonResource(TABLE_LAYOUT_V2); kiji.modifyTableLayout(ktl.getDesc()); } // If there are further updates to the job history layout, they should probably be added here. } /** * Get the saved information for a particular JobID. * * @param jobId The id of the job to retrieve. * @return A KijiRowData containing all the information for the requested Job. * @throws IOException If there is an IO error retrieving the data. */ public JobHistoryEntry getJobDetails(String jobId) throws IOException { KijiDataRequestBuilder builder = KijiDataRequest.builder(); builder.newColumnsDef().addFamily("info") .addFamily("counters") .addFamily("extendedInfo"); final KijiDataRequest request = builder.build(); final KijiRowData data; final KijiTableReader reader = mKijiTable.openTableReader(); try { data = reader.get(mKijiTable.getEntityId(jobId), request); } finally { reader.close(); } // We have to pull out the maps here to get around a pickiness for the Java compiler because // getMostRecentValues returns a generic type, which causes a compile error while passing to // setExtendedInfo below. NavigableMap<String, String> tempExtMap = data.getMostRecentValues("extendedInfo"); NavigableMap<String, Long> tempCounterMap = data.getMostRecentValues("counters"); return JobHistoryEntry.newBuilder() .setJobId(data.getMostRecentValue("info", "jobId").toString()) .setJobName(data.getMostRecentValue("info", "jobName").toString()) .setJobStartTime(data.<Long>getMostRecentValue("info", "startTime")) .setJobEndTime(data.<Long>getMostRecentValue("info", "endTime")) .setJobEndStatus(data.getMostRecentValue("info", "jobEndStatus").toString()) .setJobCounters(data.getMostRecentValue("info", "counters").toString()) .setJobConfiguration(data.getMostRecentValue("info", "configuration").toString()) .setExtendedInfo(tempExtMap) .setCountersFamily(tempCounterMap) .build(); } /** * Get the saved information for all JobIDs. * * @return A KijiRowScanner containing details for all the JobIDs. * @throws IOException If there is an IO error retrieving the data. */ public KijiRowScanner getJobScanner() throws IOException { KijiDataRequest wdr = KijiDataRequest.create("info"); KijiTableReader wtr = mKijiTable.openTableReader(); try { return wtr.getScanner(wdr); } finally { wtr.close(); } } @Override public void close() throws IOException { mKijiTable.release(); } }