/*******************************************************************************
*
* Pentaho Big Data
*
* Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.hadoop.mapreduce;
import com.thoughtworks.xstream.XStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Reporter;
import org.pentaho.di.core.Const;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.logging.LogLevel;
import org.pentaho.di.core.row.RowMeta;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.variables.VariableSpace;
import org.pentaho.di.core.variables.Variables;
import org.pentaho.di.trans.RowProducer;
import org.pentaho.di.trans.Trans;
import org.pentaho.hadoop.mapreduce.converter.spi.ITypeConverter;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.UUID;
import static org.pentaho.hadoop.shim.api.Configuration.STRING_COMBINE_SINGLE_THREADED;
import static org.pentaho.hadoop.shim.api.Configuration.STRING_REDUCE_SINGLE_THREADED;
@SuppressWarnings( "deprecation" )
public class PentahoMapReduceBase<K, V> extends MapReduceBase {
protected static enum Counter {
INPUT_RECORDS,
OUTPUT_RECORDS,
OUT_RECORD_WITH_NULL_KEY,
OUT_RECORD_WITH_NULL_VALUE
}
private final String ENVIRONMENT_VARIABLE_PREFIX = "java.system.";
private final String KETTLE_VARIABLE_PREFIX = "KETTLE_";
protected String transMapXml;
protected String transCombinerXml;
protected String transReduceXml;
protected String mapInputStepName;
protected String combinerInputStepName;
protected String reduceInputStepName;
protected String mapOutputStepName;
protected String combinerOutputStepName;
protected String reduceOutputStepName;
protected VariableSpace variableSpace = null;
protected Class<K> outClassK;
protected Class<V> outClassV;
protected String id = UUID.randomUUID().toString();
protected boolean debug = false;
protected LogLevel logLevel;
// the transformation that will be used as a mapper or reducer
protected Trans trans;
// One of these is what trans is to be used as
public static enum MROperations {
Map, Combine, Reduce
}
// we set this to what this object is being used for - map or reduce
protected MROperations mrOperation;
protected OutputCollectorRowListener<K, V> rowCollector;
protected boolean combineSingleThreaded;
protected boolean reduceSingleThreaded;
public PentahoMapReduceBase() throws KettleException {
}
@SuppressWarnings( "unchecked" )
@Override
public void configure( JobConf job ) {
super.configure( job );
debug = "true".equalsIgnoreCase( job.get( "debug" ) ); //$NON-NLS-1$
transMapXml = job.get( "transformation-map-xml" );
transCombinerXml = job.get( "transformation-combiner-xml" );
transReduceXml = job.get( "transformation-reduce-xml" );
mapInputStepName = job.get( "transformation-map-input-stepname" );
mapOutputStepName = job.get( "transformation-map-output-stepname" );
combinerInputStepName = job.get( "transformation-combiner-input-stepname" );
combinerOutputStepName = job.get( "transformation-combiner-output-stepname" );
combineSingleThreaded = isCombinerSingleThreaded( job );
reduceInputStepName = job.get( "transformation-reduce-input-stepname" );
reduceOutputStepName = job.get( "transformation-reduce-output-stepname" );
reduceSingleThreaded = isReducerSingleThreaded( job );
String xmlVariableSpace = job.get( "variableSpace" );
if ( !Const.isEmpty( xmlVariableSpace ) ) {
setDebugStatus( "PentahoMapReduceBase. variableSpace was retrieved from the job. The contents: " );
// deserialize from xml to variable space
XStream xStream = new XStream();
if ( xStream != null ) {
setDebugStatus( "PentahoMapReduceBase: Setting classes variableSpace property.: " );
variableSpace = (VariableSpace) xStream.fromXML( xmlVariableSpace );
for ( String variableName : variableSpace.listVariables() ) {
if ( variableName.startsWith( KETTLE_VARIABLE_PREFIX ) ) {
System.setProperty( variableName, variableSpace.getVariable( variableName ) );
}
}
}
} else {
setDebugStatus( "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration." );
variableSpace = new Variables();
}
// Check for environment variables in the userDefined variables
Iterator<Entry<String, String>> iter = job.iterator();
while ( iter.hasNext() ) {
Entry<String, String> entry = iter.next();
if ( entry.getKey().startsWith( ENVIRONMENT_VARIABLE_PREFIX ) ) {
System.setProperty( entry.getKey().substring( ENVIRONMENT_VARIABLE_PREFIX.length() ), entry.getValue() );
} else if ( entry.getKey().startsWith( KETTLE_VARIABLE_PREFIX ) ) {
System.setProperty( entry.getKey(), entry.getValue() );
}
}
MRUtil.passInformationToTransformation( variableSpace, job );
switch ( mrOperation ) {
case Combine:
outClassK = (Class<K>) job.getMapOutputKeyClass();
outClassV = (Class<V>) job.getMapOutputValueClass();
break;
case Reduce:
outClassK = (Class<K>) job.getOutputKeyClass();
outClassV = (Class<V>) job.getOutputValueClass();
break;
default:
throw new IllegalArgumentException( "Unsupported MapReduce operation: " + mrOperation );
}
if ( debug ) {
System.out.println( "Job configuration>" );
System.out.println( "Output key class: " + outClassK.getName() );
System.out.println( "Output value class: " + outClassV.getName() );
}
// set the log level to what the level of the job is
String stringLogLevel = job.get( "logLevel" );
if ( !Const.isEmpty( stringLogLevel ) ) {
logLevel = LogLevel.valueOf( stringLogLevel );
setDebugStatus( "Log level set to " + stringLogLevel );
} else {
System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set." );
}
createTrans( job );
}
@Override
public void close() throws IOException {
super.close();
}
@Deprecated
/**
* Use the other injectValue method - The paramters have been arranged to be more uniform
*/
public void injectValue( Object key, ITypeConverter inConverterK, ITypeConverter inConverterV,
RowMeta injectorRowMeta, RowProducer rowProducer, Object value, Reporter reporter )
throws Exception {
injectValue( key, inConverterK, value, inConverterV, injectorRowMeta, rowProducer, reporter );
}
public void injectValue( Object key, ITypeConverter inConverterK,
Object value, ITypeConverter inConverterV,
RowMetaInterface injectorRowMeta, RowProducer rowProducer, Reporter reporter )
throws Exception {
injectValue( key, 0, inConverterK, value, 1, inConverterV, injectorRowMeta, rowProducer, reporter );
}
public void injectValue( Object key, int keyOrdinal, ITypeConverter inConverterK,
Object value, int valueOrdinal, ITypeConverter inConverterV,
RowMetaInterface injectorRowMeta, RowProducer rowProducer, Reporter reporter )
throws Exception {
Object[] row = new Object[ injectorRowMeta.size() ];
row[ keyOrdinal ] =
inConverterK != null ? inConverterK.convert( injectorRowMeta.getValueMeta( keyOrdinal ), key ) : key;
row[ valueOrdinal ] =
inConverterV != null ? inConverterV.convert( injectorRowMeta.getValueMeta( valueOrdinal ), value ) : value;
if ( debug ) {
setDebugStatus( reporter, "Injecting input record [" + row[ keyOrdinal ] + "] - [" + row[ valueOrdinal ] + "]" );
}
rowProducer.putRow( injectorRowMeta, row );
}
protected void createTrans( final Configuration conf ) {
if ( mrOperation == null ) {
throw new RuntimeException(
"Map or reduce operation has not been specified. Call setMRType from implementing classes constructor." );
}
try {
if ( mrOperation.equals( MROperations.Map ) ) {
setDebugStatus( "Creating a transformation for a map." );
trans = MRUtil.getTrans( conf, transMapXml, false );
} else if ( mrOperation.equals( MROperations.Combine ) ) {
setDebugStatus( "Creating a transformation for a combiner." );
trans = MRUtil.getTrans( conf, transCombinerXml, isCombinerSingleThreaded( conf ) );
} else if ( mrOperation.equals( MROperations.Reduce ) ) {
setDebugStatus( "Creating a transformation for a reduce." );
trans = MRUtil.getTrans( conf, transReduceXml, isReducerSingleThreaded( conf ) );
}
} catch ( KettleException ke ) {
throw new RuntimeException( "Error loading transformation for " + mrOperation, ke ); //$NON-NLS-1$
}
}
private boolean isCombinerSingleThreaded( final Configuration conf ) {
return "true".equalsIgnoreCase( conf.get( STRING_COMBINE_SINGLE_THREADED ) );
}
private boolean isReducerSingleThreaded( final Configuration conf ) {
return "true".equalsIgnoreCase( conf.get( STRING_REDUCE_SINGLE_THREADED ) );
}
public void setMRType( MROperations mrOperation ) {
this.mrOperation = mrOperation;
}
public String getTransMapXml() {
return transMapXml;
}
public void setTransMapXml( String transMapXml ) {
this.transMapXml = transMapXml;
}
public String getTransCombinerXml() {
return transCombinerXml;
}
public void setCombinerMapXml( String transCombinerXml ) {
this.transCombinerXml = transCombinerXml;
}
public String getTransReduceXml() {
return transReduceXml;
}
public void setTransReduceXml( String transReduceXml ) {
this.transReduceXml = transReduceXml;
}
public String getMapInputStepName() {
return mapInputStepName;
}
public void setMapInputStepName( String mapInputStepName ) {
this.mapInputStepName = mapInputStepName;
}
public String getMapOutputStepName() {
return mapOutputStepName;
}
public void setMapOutputStepName( String mapOutputStepName ) {
this.mapOutputStepName = mapOutputStepName;
}
public String getCombinerInputStepName() {
return combinerInputStepName;
}
public void setCombinerInputStepName( String combinerInputStepName ) {
this.combinerInputStepName = combinerInputStepName;
}
public String getCombinerOutputStepName() {
return combinerOutputStepName;
}
public void setCombinerOutputStepName( String combinerOutputStepName ) {
this.combinerOutputStepName = combinerOutputStepName;
}
public String getReduceInputStepName() {
return reduceInputStepName;
}
public void setReduceInputStepName( String reduceInputStepName ) {
this.reduceInputStepName = reduceInputStepName;
}
public String getReduceOutputStepName() {
return reduceOutputStepName;
}
public void setReduceOutputStepName( String reduceOutputStepName ) {
this.reduceOutputStepName = reduceOutputStepName;
}
public Class<?> getOutClassK() {
return outClassK;
}
public void setOutClassK( Class<K> outClassK ) {
this.outClassK = outClassK;
}
public Class<?> getOutClassV() {
return outClassV;
}
public void setOutClassV( Class<V> outClassV ) {
this.outClassV = outClassV;
}
public Trans getTrans() {
return trans;
}
public void setTrans( Trans trans ) {
this.trans = trans;
}
public String getId() {
return id;
}
public void setId( String id ) {
this.id = id;
}
public Exception getException() {
return rowCollector != null ? rowCollector.getException() : null;
}
public void setDebugStatus( Reporter reporter, String message ) {
if ( debug ) {
System.out.println( message );
reporter.setStatus( message );
}
}
private void setDebugStatus( String message ) {
if ( debug ) {
System.out.println( message );
}
}
}