/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.tools.pigstats; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.Counters; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.FileBasedOutputSizeReader; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigStatsOutputSizeReader; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; import org.apache.pig.classification.InterfaceAudience; import org.apache.pig.classification.InterfaceStability; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.newplan.Operator; import org.apache.pig.newplan.PlanVisitor; import org.apache.pig.tools.pigstats.PigStats.JobGraph; /** * This class encapsulates the runtime statistics of a MapReduce job. * Job statistics is collected when job is completed. */ @InterfaceAudience.Public @InterfaceStability.Evolving public abstract class JobStats extends Operator { private static final Log LOG = LogFactory.getLog(JobStats.class); public static final String ALIAS = "JobStatistics:alias"; public static final String ALIAS_LOCATION = "JobStatistics:alias_location"; public static final String FEATURE = "JobStatistics:feature"; public static final String SUCCESS_HEADER = null; public static final String FAILURE_HEADER = null; public static enum JobState { UNKNOWN, SUCCESS, FAILED; } protected JobState state = JobState.UNKNOWN; protected ArrayList<OutputStats> outputs; protected ArrayList<InputStats> inputs; protected Configuration conf; protected long hdfsBytesRead = 0; protected long hdfsBytesWritten = 0; private String errorMsg; private Exception exception = null; protected JobStats(String name, JobGraph plan) { super(name, plan); outputs = new ArrayList<OutputStats>(); inputs = new ArrayList<InputStats>(); } public abstract String getJobId(); public void setConf(Configuration conf) { if (conf == null) { return; } this.conf = conf; } public JobState getState() { return state; } public boolean isSuccessful() { return (state == JobState.SUCCESS); } public void setSuccessful(boolean isSuccessful) { this.state = isSuccessful ? JobState.SUCCESS : JobState.FAILED; } public String getErrorMessage() { return errorMsg; } public Exception getException() { return exception; } public List<OutputStats> getOutputs() { return Collections.unmodifiableList(outputs); } public List<InputStats> getInputs() { return Collections.unmodifiableList(inputs); } public String getAlias() { return (String)getAnnotation(ALIAS); } public String getAliasLocation() { return (String)getAnnotation(ALIAS_LOCATION); } public String getFeature() { return (String)getAnnotation(FEATURE); } public long getHdfsBytesRead() { return hdfsBytesRead; } public long getHdfsBytesWritten() { return hdfsBytesWritten; } /** * Returns the total bytes written to user specified HDFS * locations of this job. */ public long getBytesWritten() { long count = 0; for (OutputStats out : outputs) { long n = out.getBytes(); if (n > 0) count += n; } return count; } /** * Returns the total number of records in user specified output * locations of this job. */ public long getRecordWrittern() { long count = 0; for (OutputStats out : outputs) { long rec = out.getNumberRecords(); if (rec > 0) count += rec; } return count; } @Override public abstract void accept(PlanVisitor v) throws FrontendException; @Override public boolean isEqual(Operator operator) { if (!(operator instanceof JobStats)) return false; return name.equalsIgnoreCase(operator.getName()); } public void setErrorMsg(String errorMsg) { this.errorMsg = errorMsg; } public void setBackendException(Exception e) { exception = e; } public abstract String getDisplayString(); /** * Calculate the median value from the given array * @param durations * @return median value */ protected long calculateMedianValue(List<Long> durations) { long median; // figure out the median Collections.sort(durations); int midPoint = durations.size() /2; if ((durations.size() & 1) == 1) { // odd median = durations.get(midPoint); } else { // even median = (durations.get(midPoint-1) + durations.get(midPoint)) / 2; } return median; } public boolean isSampler() { return getFeature().contains(ScriptState.PIG_FEATURE.SAMPLER.name()); } public boolean isIndexer() { return getFeature().contains(ScriptState.PIG_FEATURE.INDEXER.name()); } /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getNumberMaps} instead. */ @Deprecated abstract public int getNumberMaps(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getNumberReduces} instead. */ @Deprecated abstract public int getNumberReduces(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMaxMapTime} instead. */ @Deprecated abstract public long getMaxMapTime(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMinMapTime} instead. */ @Deprecated abstract public long getMinMapTime(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getAvgMapTime} instead. */ @Deprecated abstract public long getAvgMapTime(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMaxReduceTime} instead. */ @Deprecated abstract public long getMaxReduceTime(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMinReduceTime} instead. */ @Deprecated abstract public long getMinReduceTime(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getAvgREduceTime} instead. */ @Deprecated abstract public long getAvgREduceTime(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMapInputRecords} instead. */ @Deprecated abstract public long getMapInputRecords(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMapOutputRecords} instead. */ @Deprecated abstract public long getMapOutputRecords(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getReduceInputRecords} instead. */ @Deprecated abstract public long getReduceInputRecords(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getReduceOutputRecords} instead. */ @Deprecated abstract public long getReduceOutputRecords(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getSMMSpillCount} instead. */ @Deprecated abstract public long getSMMSpillCount(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getProactiveSpillCountObjects} instead. */ @Deprecated abstract public long getProactiveSpillCountObjects(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getProactiveSpillCountRecs} instead. */ @Deprecated abstract public long getProactiveSpillCountRecs(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getHadoopCounters} instead. */ @Deprecated abstract public Counters getHadoopCounters(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMultiStoreCounters} instead. */ @Deprecated abstract public Map<String, Long> getMultiStoreCounters(); /** * @deprecated If you are using mapreduce, please cast JobStats to org.apache.pig.tools.pigstats.mapreduce.MRJobStats, * then use {@link org.apache.pig.tools.pigstats.mapreduce.MRJobStats#getMultiInputCounters} instead. */ @Deprecated abstract public Map<String, Long> getMultiInputCounters(); /** * Looks up the output size reader from OUTPUT_SIZE_READER_KEY and invokes * it to get the size of output. If OUTPUT_SIZE_READER_KEY is not set, * defaults to FileBasedOutputSizeReader. * @param sto POStore * @param conf configuration */ public static long getOutputSize(POStore sto, Configuration conf) { PigStatsOutputSizeReader reader = null; String readerNames = conf.get( PigStatsOutputSizeReader.OUTPUT_SIZE_READER_KEY, FileBasedOutputSizeReader.class.getCanonicalName()); for (String className : readerNames.split(",")) { reader = (PigStatsOutputSizeReader) PigContext.instantiateFuncFromSpec(className); if (reader.supports(sto, conf)) { LOG.info("using output size reader: " + className); try { return reader.getOutputSize(sto, conf); } catch (FileNotFoundException e) { LOG.warn("unable to find the output file", e); return -1; } catch (IOException e) { LOG.warn("unable to get byte written of the job", e); return -1; } } } LOG.warn("unable to find an output size reader"); return -1; } }