/*! ******************************************************************************
*
* Pentaho Big Data
*
* Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com
*
*******************************************************************************
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package org.pentaho.hadoop.shim.common;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.pentaho.hadoop.shim.api.Configuration;
import org.pentaho.hadoop.shim.api.mapred.RunningJob;
import java.io.IOException;
/**
* User: Dzmitry Stsiapanau Date: 7/22/14 Time: 11:59 AM
*/
public class ConfigurationProxyV2 implements Configuration {
protected Job job;
public ConfigurationProxyV2() throws IOException {
job = Job.getInstance();
addConfigsForJobConf();
}
@VisibleForTesting
void addConfigsForJobConf() {
job.getConfiguration().addResource( "hdfs-site.xml" );
job.getConfiguration().addResource( "hive-site.xml" );
job.getConfiguration().addResource( "hbase-site.xml" );
}
public JobConf getJobConf() {
return (JobConf) job.getConfiguration();
}
public Job getJob() {
return job;
}
/**
* Sets the MapReduce job name.
*
* @param jobName Name of job
*/
@Override
public void setJobName( String jobName ) {
getJob().setJobName( jobName );
}
/**
* Sets the property {@code name}'s value to {@code value}.
*
* @param name Name of property
* @param value Value of property
*/
@Override
public void set( String name, String value ) {
getJobConf().set( name, value );
}
/**
* Look up the value of a property.
*
* @param name Name of property
* @return Value of the property named {@code name}
*/
@Override
public String get( String name ) {
return getJobConf().get( name );
}
/**
* Look up the value of a property optionally returning a default value if the property is not set.
*
* @param name Name of property
* @param defaultValue Value to return if the property is not set
* @return Value of property named {@code name} or {@code defaultValue} if {@code name} is not set
*/
@Override
public String get( String name, String defaultValue ) {
return getJobConf().get( name, defaultValue );
}
/**
* Set the key class for the map output data.
*
* @param c the map output key class
*/
@Override
public void setMapOutputKeyClass( Class<?> c ) {
getJob().setMapOutputKeyClass( c );
}
/**
* Set the value class for the map output data.
*
* @param c the map output value class
*/
@Override
public void setMapOutputValueClass( Class<?> c ) {
getJob().setMapOutputValueClass( c );
}
@SuppressWarnings( "unchecked" )
@Override
public void setMapperClass( Class<?> c ) {
if ( org.apache.hadoop.mapred.Mapper.class.isAssignableFrom( c ) ) {
setUseOldMapApi();
getJobConf().setMapperClass( (Class<? extends org.apache.hadoop.mapred.Mapper>) c );
} else if ( org.apache.hadoop.mapreduce.Mapper.class.isAssignableFrom( c ) ) {
getJob().setMapperClass( (Class<? extends org.apache.hadoop.mapreduce.Mapper>) c );
}
}
private void setUseOldMapApi() {
set( "mapred.mapper.new-api", "false" );
}
@SuppressWarnings( "unchecked" )
@Override
public void setCombinerClass( Class<?> c ) {
if ( org.apache.hadoop.mapred.Reducer.class.isAssignableFrom( c ) ) {
setUseOldRedApi();
getJobConf().setCombinerClass( (Class<? extends org.apache.hadoop.mapred.Reducer>) c );
} else if ( org.apache.hadoop.mapreduce.Reducer.class.isAssignableFrom( c ) ) {
getJob().setCombinerClass( (Class<? extends org.apache.hadoop.mapreduce.Reducer>) c );
}
}
private void setUseOldRedApi() {
set( "mapred.reducer.new-api", "false" );
}
@SuppressWarnings( "unchecked" )
@Override
public void setReducerClass( Class<?> c ) {
if ( org.apache.hadoop.mapred.Reducer.class.isAssignableFrom( c ) ) {
setUseOldRedApi();
getJobConf().setReducerClass( (Class<? extends org.apache.hadoop.mapred.Reducer>) c );
} else if ( org.apache.hadoop.mapreduce.Reducer.class.isAssignableFrom( c ) ) {
getJob().setReducerClass( (Class<? extends org.apache.hadoop.mapreduce.Reducer>) c );
}
}
@Override
public void setOutputKeyClass( Class<?> c ) {
getJob().setOutputKeyClass( c );
}
@Override
public void setOutputValueClass( Class<?> c ) {
getJob().setOutputValueClass( c );
}
@SuppressWarnings( "unchecked" )
@Override
public void setMapRunnerClass( Class<?> c ) {
if ( org.apache.hadoop.mapred.MapRunnable.class.isAssignableFrom( c ) ) {
getJobConf().setMapRunnerClass( (Class<? extends org.apache.hadoop.mapred.MapRunnable>) c );
}
}
@SuppressWarnings( "unchecked" )
@Override
public void setInputFormat( Class<?> inputFormat ) {
if ( org.apache.hadoop.mapred.InputFormat.class.isAssignableFrom( inputFormat ) ) {
setUseOldMapApi();
getJobConf().setInputFormat( (Class<? extends org.apache.hadoop.mapred.InputFormat>) inputFormat );
} else if ( org.apache.hadoop.mapreduce.InputFormat.class.isAssignableFrom( inputFormat ) ) {
getJob().setInputFormatClass( (Class<? extends org.apache.hadoop.mapreduce.InputFormat>) inputFormat );
}
}
@SuppressWarnings( "unchecked" )
@Override
public void setOutputFormat( Class<?> outputFormat ) {
if ( org.apache.hadoop.mapred.OutputFormat.class.isAssignableFrom( outputFormat ) ) {
setUseOldRedApi();
if ( getJobConf().getNumReduceTasks() == 0 || get( "mapred.partitioner.class" ) != null ) {
setUseOldMapApi();
}
getJobConf().setOutputFormat( (Class<? extends org.apache.hadoop.mapred.OutputFormat>) outputFormat );
} else if ( org.apache.hadoop.mapreduce.OutputFormat.class.isAssignableFrom( outputFormat ) ) {
getJob().setOutputFormatClass( (Class<? extends org.apache.hadoop.mapreduce.OutputFormat>) outputFormat );
}
}
@Override
public void setInputPaths( org.pentaho.hadoop.shim.api.fs.Path... paths ) {
if ( paths == null ) {
return;
}
Path[] actualPaths = new Path[ paths.length ];
for ( int i = 0; i < paths.length; i++ ) {
actualPaths[ i ] = ShimUtils.asPath( paths[ i ] );
}
try {
FileInputFormat.setInputPaths( getJob(), actualPaths );
} catch ( IOException e ) {
e.printStackTrace();
}
}
@Override
public void setOutputPath( org.pentaho.hadoop.shim.api.fs.Path path ) {
FileOutputFormat.setOutputPath( getJob(), ShimUtils.asPath( path ) );
}
@Override
public void setJarByClass( Class<?> c ) {
getJob().setJarByClass( c );
}
@Override
public void setJar( String url ) {
getJob().setJar( url );
}
/**
* Provide a hint to Hadoop for the number of map tasks to start for the MapReduce job submitted with this
* configuration.
*
* @param n the number of map tasks for this job
*/
@Override
public void setNumMapTasks( int n ) {
getJobConf().setNumMapTasks( n );
}
/**
* Sets the requisite number of reduce tasks for the MapReduce job submitted with this configuration. <p>If {@code n}
* is {@code zero} there will not be a reduce (or sort/shuffle) phase and the output of the map tasks will be written
* directly to the distributed file system under the path specified via {@link #setOutputPath(org.pentaho.hadoop
* .shim.api.fs.Path)</p>
*
* @param n the number of reduce tasks required for this job
* @param n
*/
@Override
public void setNumReduceTasks( int n ) {
getJob().setNumReduceTasks( n );
}
/**
* Set the array of string values for the <code>name</code> property as as comma delimited values.
*
* @param name property name.
* @param values The values
*/
@Override
public void setStrings( String name, String... values ) {
getJobConf().setStrings( name, values );
}
/**
* Get the default file system URL as stored in this configuration.
*
* @return the default URL if it was set, otherwise empty string
*/
@Override
public String getDefaultFileSystemURL() {
return get( "fs.default.name", "" );
}
/**
* Hack
* Return this configuration as was asked with provided delegate class (If it is possible).
*
* @param delegate class of desired return object
* @return this configuration delegate object if possible
*/
@Override
public <T> T getAsDelegateConf( Class<T> delegate ) {
if ( delegate.isAssignableFrom( this.getClass() ) ) {
return (T) this;
} else if ( delegate.isAssignableFrom( JobConf.class ) ) {
return (T) getJobConf();
} else {
return null;
}
}
/**
* Submit job for the current configuration provided by this implementation.
*
* @return RunningJob implementation
*/
@Override public RunningJob submit() throws IOException, ClassNotFoundException, InterruptedException {
getJob().submit();
return new RunningJobProxyV2( getJob() );
}
}