/******************************************************************************* * * Pentaho Big Data * * Copyright (C) 2002-2016 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.mapreduce; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapred.JobConf; import org.pentaho.di.core.Const; import org.pentaho.di.core.KettleEnvironment; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.logging.LoggingObjectType; import org.pentaho.di.core.logging.SimpleLoggingObject; import org.pentaho.di.core.variables.VariableSpace; import org.pentaho.di.i18n.BaseMessages; import org.pentaho.di.trans.Trans; import org.pentaho.di.trans.TransConfiguration; import org.pentaho.di.trans.TransExecutionConfiguration; import org.pentaho.di.trans.TransMeta; import org.pentaho.di.trans.TransMeta.TransformationType; import java.io.FileOutputStream; import java.io.PrintWriter; import java.io.StringWriter; import java.util.UUID; public class MRUtil { /** * Path to the directory to load plugins from. This must be accessible from all TaskTracker nodes. */ public static final String PROPERTY_PENTAHO_KETTLE_PLUGINS_DIR = "pentaho.kettle.plugins.dir"; /** * Hadoop Configuration for setting KETTLE_HOME. See {@link Const#getKettleDirectory()} for usage. */ public static final String PROPERTY_PENTAHO_KETTLE_HOME = "pentaho.kettle.home"; public static Trans getTrans( final Configuration conf, final String transXml, boolean singleThreaded ) throws KettleException { initKettleEnvironment( conf ); TransConfiguration transConfiguration = TransConfiguration.fromXML( transXml ); TransMeta transMeta = transConfiguration.getTransMeta(); String carteObjectId = UUID.randomUUID().toString(); SimpleLoggingObject servletLoggingObject = new SimpleLoggingObject( "HADOOP_MAPPER", LoggingObjectType.CARTE, null ); //$NON-NLS-1$ servletLoggingObject.setContainerObjectId( carteObjectId ); TransExecutionConfiguration executionConfiguration = transConfiguration.getTransExecutionConfiguration(); servletLoggingObject.setLogLevel( executionConfiguration.getLogLevel() ); if ( singleThreaded ) { // Set the type to single threaded in case the user forgot... // transMeta.setTransformationType( TransformationType.SingleThreaded ); // Disable thread priority management as it will slow things down needlessly. // The single threaded engine doesn't use threads and doesn't need row locking. // transMeta.setUsingThreadPriorityManagment( false ); } else { transMeta.setTransformationType( TransformationType.Normal ); } return new Trans( transMeta, servletLoggingObject ); } /** * Initialize the Kettle environment with settings from the provided configuration * * @param conf * Configuration to configure Kettle environment with */ private static void initKettleEnvironment( Configuration conf ) throws KettleException { if ( !KettleEnvironment.isInitialized() ) { String kettleHome = getKettleHomeProperty( conf ); String pluginDir = getPluginDirProperty( conf ); System.setProperty( "KETTLE_HOME", kettleHome ); System.setProperty( Const.PLUGIN_BASE_FOLDERS_PROP, pluginDir ); System.out.println( BaseMessages.getString( MRUtil.class, "KettleHome.Info", kettleHome ) ); System.out.println( BaseMessages.getString( MRUtil.class, "PluginDirectory.Info", pluginDir ) ); KettleEnvironment.init(); } } public static void passInformationToTransformation( final VariableSpace variableSpace, final JobConf job ) { if ( variableSpace != null && job != null ) { variableSpace.setVariable( "Internal.Hadoop.NumMapTasks", Integer.toString( job.getNumMapTasks() ) ); variableSpace.setVariable( "Internal.Hadoop.NumReduceTasks", Integer.toString( job.getNumReduceTasks() ) ); String taskId = job.get( "mapred.task.id" ); variableSpace.setVariable( "Internal.Hadoop.TaskId", taskId ); // TODO: Verify if the string range holds true for all Hadoop distributions // Extract the node number from the task ID. // The consensus currently is that it's the part after the last underscore. // // Examples: // job_201208090841_9999 // job_201208090841_10000 // String nodeNumber; if ( Const.isEmpty( taskId ) ) { nodeNumber = "0"; } else { int lastUnderscoreIndex = taskId.lastIndexOf( "_" ); if ( lastUnderscoreIndex >= 0 ) { nodeNumber = taskId.substring( lastUnderscoreIndex + 1 ); } else { nodeNumber = "0"; } } // get rid of zeroes. // variableSpace.setVariable( "Internal.Hadoop.NodeNumber", Integer.toString( Integer.valueOf( nodeNumber ) ) ); } } /** * @return the current working directory for this JVM. */ public static String getWorkingDir() { return System.getProperty( "user.dir" ); } /** * Determines the Kettle Home property to use for this invocation from the configuration provided. If it is not set * the current working directory will be used. * * @param conf * Configuration to check for Kettle Home to be set in. * @return The Kettle Home directory to use */ public static String getKettleHomeProperty( Configuration conf ) { String kettleHome = conf.get( PROPERTY_PENTAHO_KETTLE_HOME ); if ( StringUtils.isEmpty( kettleHome ) ) { return getWorkingDir(); } return kettleHome; } /** * Builds a comma-separated list of paths to load Kettle plugins from. To be used as the value for the System property * {@link Const.PLUGIN_BASE_FOLDERS_PROP}. * * @param conf * Configuration to retrieve properties from * @return Comma-separated list of paths to look for Kettle plugins in */ public static String getPluginDirProperty( final Configuration conf ) throws KettleException { // Load plugins from the directory specified in the configuration String kettlePluginDir = conf.get( PROPERTY_PENTAHO_KETTLE_PLUGINS_DIR ); if ( StringUtils.isEmpty( kettlePluginDir ) ) { kettlePluginDir = getDefaultPluginDirs(); } return kettlePluginDir; } /** * Returns a comma-separated list of default paths to load Kettle plugins from. * * @return */ private static final String getDefaultPluginDirs() { return new StringBuilder().append( Const.DEFAULT_PLUGIN_BASE_FOLDERS ).append( "," ).append( getWorkingDir() ) .append( Const.FILE_SEPARATOR ).append( "plugins" ).toString(); } /** * Create a copy of {@code trans} */ public static Trans recreateTrans( Trans trans ) { return new Trans( trans.getTransMeta(), trans.getParent() ); } public static String getStackTrace( Throwable t ) { StringWriter stringWritter = new StringWriter(); PrintWriter printWritter = new PrintWriter( stringWritter, true ); t.printStackTrace( printWritter ); printWritter.flush(); stringWritter.flush(); return stringWritter.toString(); } public static void logMessage( String message ) { logMessage( Thread.currentThread().hashCode(), message ); } public static void logMessage( Throwable t ) { logMessage( Thread.currentThread().hashCode(), getStackTrace( t ) ); } public static void logMessage( String message, Throwable t ) { logMessage( Thread.currentThread().hashCode(), message ); logMessage( Thread.currentThread().hashCode(), getStackTrace( t ) ); } public static void logMessage( int id, String message ) { logMessage( new Integer( id ).toString(), message ); } public static void logMessage( int id, Throwable t ) { logMessage( new Integer( id ).toString(), getStackTrace( t ) ); } public static void logMessage( int id, String message, Throwable t ) { logMessage( new Integer( id ).toString(), message ); logMessage( new Integer( id ).toString(), getStackTrace( t ) ); } public static void logMessage( String id, String message ) { try { FileOutputStream fos = new FileOutputStream( "/tmp/PDIMapReduce.log", true ); //$NON-NLS-1$ if ( id != null ) { fos.write( ( id + ": " ).getBytes() ); //$NON-NLS-1$ } fos.write( message.getBytes() ); fos.write( System.getProperty( "line.separator" ).getBytes() ); //$NON-NLS-1$ fos.close(); } catch ( Throwable t ) { // ignore } } }