/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.mapreduce; import com.thoughtworks.xstream.XStream; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Reporter; import org.pentaho.di.core.Const; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.logging.LogLevel; import org.pentaho.di.core.row.RowMeta; import org.pentaho.di.core.row.RowMetaInterface; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.core.variables.Variables; import org.pentaho.di.trans.RowProducer; import org.pentaho.di.trans.Trans; import org.pentaho.hadoop.mapreduce.converter.spi.ITypeConverter; import java.io.IOException; import java.util.Iterator; import java.util.Map.Entry; import java.util.UUID; import static org.pentaho.hadoop.shim.api.Configuration.STRING_COMBINE_SINGLE_THREADED; import static org.pentaho.hadoop.shim.api.Configuration.STRING_REDUCE_SINGLE_THREADED; @SuppressWarnings( "deprecation" ) public class PentahoMapReduceBase<K, V> extends MapReduceBase { protected static enum Counter { INPUT_RECORDS, OUTPUT_RECORDS, OUT_RECORD_WITH_NULL_KEY, OUT_RECORD_WITH_NULL_VALUE } private final String ENVIRONMENT_VARIABLE_PREFIX = "java.system."; private final String KETTLE_VARIABLE_PREFIX = "KETTLE_"; protected String transMapXml; protected String transCombinerXml; protected String transReduceXml; protected String mapInputStepName; protected String combinerInputStepName; protected String reduceInputStepName; protected String mapOutputStepName; protected String combinerOutputStepName; protected String reduceOutputStepName; protected VariableSpace variableSpace = null; protected Class<K> outClassK; protected Class<V> outClassV; protected String id = UUID.randomUUID().toString(); protected boolean debug = false; protected LogLevel logLevel; // the transformation that will be used as a mapper or reducer protected Trans trans; // One of these is what trans is to be used as public static enum MROperations { Map, Combine, Reduce } // we set this to what this object is being used for - map or reduce protected MROperations mrOperation; protected OutputCollectorRowListener<K, V> rowCollector; protected boolean combineSingleThreaded; protected boolean reduceSingleThreaded; public PentahoMapReduceBase() throws KettleException { } @SuppressWarnings( "unchecked" ) @Override public void configure( JobConf job ) { super.configure( job ); debug = "true".equalsIgnoreCase( job.get( "debug" ) ); //$NON-NLS-1$ transMapXml = job.get( "transformation-map-xml" ); transCombinerXml = job.get( "transformation-combiner-xml" ); transReduceXml = job.get( "transformation-reduce-xml" ); mapInputStepName = job.get( "transformation-map-input-stepname" ); mapOutputStepName = job.get( "transformation-map-output-stepname" ); combinerInputStepName = job.get( "transformation-combiner-input-stepname" ); combinerOutputStepName = job.get( "transformation-combiner-output-stepname" ); combineSingleThreaded = isCombinerSingleThreaded( job ); reduceInputStepName = job.get( "transformation-reduce-input-stepname" ); reduceOutputStepName = job.get( "transformation-reduce-output-stepname" ); reduceSingleThreaded = isReducerSingleThreaded( job ); String xmlVariableSpace = job.get( "variableSpace" ); if ( !Const.isEmpty( xmlVariableSpace ) ) { setDebugStatus( "PentahoMapReduceBase. variableSpace was retrieved from the job. The contents: " ); // deserialize from xml to variable space XStream xStream = new XStream(); if ( xStream != null ) { setDebugStatus( "PentahoMapReduceBase: Setting classes variableSpace property.: " ); variableSpace = (VariableSpace) xStream.fromXML( xmlVariableSpace ); for ( String variableName : variableSpace.listVariables() ) { if ( variableName.startsWith( KETTLE_VARIABLE_PREFIX ) ) { System.setProperty( variableName, variableSpace.getVariable( variableName ) ); } } } } else { setDebugStatus( "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration." ); variableSpace = new Variables(); } // Check for environment variables in the userDefined variables Iterator<Entry<String, String>> iter = job.iterator(); while ( iter.hasNext() ) { Entry<String, String> entry = iter.next(); if ( entry.getKey().startsWith( ENVIRONMENT_VARIABLE_PREFIX ) ) { System.setProperty( entry.getKey().substring( ENVIRONMENT_VARIABLE_PREFIX.length() ), entry.getValue() ); } else if ( entry.getKey().startsWith( KETTLE_VARIABLE_PREFIX ) ) { System.setProperty( entry.getKey(), entry.getValue() ); } } MRUtil.passInformationToTransformation( variableSpace, job ); switch ( mrOperation ) { case Combine: outClassK = (Class<K>) job.getMapOutputKeyClass(); outClassV = (Class<V>) job.getMapOutputValueClass(); break; case Reduce: outClassK = (Class<K>) job.getOutputKeyClass(); outClassV = (Class<V>) job.getOutputValueClass(); break; default: throw new IllegalArgumentException( "Unsupported MapReduce operation: " + mrOperation ); } if ( debug ) { System.out.println( "Job configuration>" ); System.out.println( "Output key class: " + outClassK.getName() ); System.out.println( "Output value class: " + outClassV.getName() ); } // set the log level to what the level of the job is String stringLogLevel = job.get( "logLevel" ); if ( !Const.isEmpty( stringLogLevel ) ) { logLevel = LogLevel.valueOf( stringLogLevel ); setDebugStatus( "Log level set to " + stringLogLevel ); } else { System.out.println( "Could not retrieve the log level from the job configuration. logLevel will not be set." ); } createTrans( job ); } @Override public void close() throws IOException { super.close(); } @Deprecated /** * Use the other injectValue method - The paramters have been arranged to be more uniform */ public void injectValue( Object key, ITypeConverter inConverterK, ITypeConverter inConverterV, RowMeta injectorRowMeta, RowProducer rowProducer, Object value, Reporter reporter ) throws Exception { injectValue( key, inConverterK, value, inConverterV, injectorRowMeta, rowProducer, reporter ); } public void injectValue( Object key, ITypeConverter inConverterK, Object value, ITypeConverter inConverterV, RowMetaInterface injectorRowMeta, RowProducer rowProducer, Reporter reporter ) throws Exception { injectValue( key, 0, inConverterK, value, 1, inConverterV, injectorRowMeta, rowProducer, reporter ); } public void injectValue( Object key, int keyOrdinal, ITypeConverter inConverterK, Object value, int valueOrdinal, ITypeConverter inConverterV, RowMetaInterface injectorRowMeta, RowProducer rowProducer, Reporter reporter ) throws Exception { Object[] row = new Object[ injectorRowMeta.size() ]; row[ keyOrdinal ] = inConverterK != null ? inConverterK.convert( injectorRowMeta.getValueMeta( keyOrdinal ), key ) : key; row[ valueOrdinal ] = inConverterV != null ? inConverterV.convert( injectorRowMeta.getValueMeta( valueOrdinal ), value ) : value; if ( debug ) { setDebugStatus( reporter, "Injecting input record [" + row[ keyOrdinal ] + "] - [" + row[ valueOrdinal ] + "]" ); } rowProducer.putRow( injectorRowMeta, row ); } protected void createTrans( final Configuration conf ) { if ( mrOperation == null ) { throw new RuntimeException( "Map or reduce operation has not been specified. Call setMRType from implementing classes constructor." ); } try { if ( mrOperation.equals( MROperations.Map ) ) { setDebugStatus( "Creating a transformation for a map." ); trans = MRUtil.getTrans( conf, transMapXml, false ); } else if ( mrOperation.equals( MROperations.Combine ) ) { setDebugStatus( "Creating a transformation for a combiner." ); trans = MRUtil.getTrans( conf, transCombinerXml, isCombinerSingleThreaded( conf ) ); } else if ( mrOperation.equals( MROperations.Reduce ) ) { setDebugStatus( "Creating a transformation for a reduce." ); trans = MRUtil.getTrans( conf, transReduceXml, isReducerSingleThreaded( conf ) ); } } catch ( KettleException ke ) { throw new RuntimeException( "Error loading transformation for " + mrOperation, ke ); //$NON-NLS-1$ } } private boolean isCombinerSingleThreaded( final Configuration conf ) { return "true".equalsIgnoreCase( conf.get( STRING_COMBINE_SINGLE_THREADED ) ); } private boolean isReducerSingleThreaded( final Configuration conf ) { return "true".equalsIgnoreCase( conf.get( STRING_REDUCE_SINGLE_THREADED ) ); } public void setMRType( MROperations mrOperation ) { this.mrOperation = mrOperation; } public String getTransMapXml() { return transMapXml; } public void setTransMapXml( String transMapXml ) { this.transMapXml = transMapXml; } public String getTransCombinerXml() { return transCombinerXml; } public void setCombinerMapXml( String transCombinerXml ) { this.transCombinerXml = transCombinerXml; } public String getTransReduceXml() { return transReduceXml; } public void setTransReduceXml( String transReduceXml ) { this.transReduceXml = transReduceXml; } public String getMapInputStepName() { return mapInputStepName; } public void setMapInputStepName( String mapInputStepName ) { this.mapInputStepName = mapInputStepName; } public String getMapOutputStepName() { return mapOutputStepName; } public void setMapOutputStepName( String mapOutputStepName ) { this.mapOutputStepName = mapOutputStepName; } public String getCombinerInputStepName() { return combinerInputStepName; } public void setCombinerInputStepName( String combinerInputStepName ) { this.combinerInputStepName = combinerInputStepName; } public String getCombinerOutputStepName() { return combinerOutputStepName; } public void setCombinerOutputStepName( String combinerOutputStepName ) { this.combinerOutputStepName = combinerOutputStepName; } public String getReduceInputStepName() { return reduceInputStepName; } public void setReduceInputStepName( String reduceInputStepName ) { this.reduceInputStepName = reduceInputStepName; } public String getReduceOutputStepName() { return reduceOutputStepName; } public void setReduceOutputStepName( String reduceOutputStepName ) { this.reduceOutputStepName = reduceOutputStepName; } public Class<?> getOutClassK() { return outClassK; } public void setOutClassK( Class<K> outClassK ) { this.outClassK = outClassK; } public Class<?> getOutClassV() { return outClassV; } public void setOutClassV( Class<V> outClassV ) { this.outClassV = outClassV; } public Trans getTrans() { return trans; } public void setTrans( Trans trans ) { this.trans = trans; } public String getId() { return id; } public void setId( String id ) { this.id = id; } public Exception getException() { return rowCollector != null ? rowCollector.getException() : null; } public void setDebugStatus( Reporter reporter, String message ) { if ( debug ) { System.out.println( message ); reporter.setStatus( message ); } } private void setDebugStatus( String message ) { if ( debug ) { System.out.println( message ); } } }