/*! ****************************************************************************** * * Pentaho Big Data * * Copyright (C) 2002-2017 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ package org.pentaho.hadoop.shim.common; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.pentaho.hadoop.shim.api.Configuration; import org.pentaho.hadoop.shim.api.mapred.RunningJob; import java.io.IOException; /** * User: Dzmitry Stsiapanau Date: 7/22/14 Time: 11:59 AM */ public class ConfigurationProxyV2 implements Configuration { protected Job job; public ConfigurationProxyV2() throws IOException { job = Job.getInstance(); addConfigsForJobConf(); } @VisibleForTesting void addConfigsForJobConf() { job.getConfiguration().addResource( "hdfs-site.xml" ); job.getConfiguration().addResource( "hive-site.xml" ); job.getConfiguration().addResource( "hbase-site.xml" ); } public JobConf getJobConf() { return (JobConf) job.getConfiguration(); } public Job getJob() { return job; } /** * Sets the MapReduce job name. * * @param jobName Name of job */ @Override public void setJobName( String jobName ) { getJob().setJobName( jobName ); } /** * Sets the property {@code name}'s value to {@code value}. * * @param name Name of property * @param value Value of property */ @Override public void set( String name, String value ) { getJobConf().set( name, value ); } /** * Look up the value of a property. * * @param name Name of property * @return Value of the property named {@code name} */ @Override public String get( String name ) { return getJobConf().get( name ); } /** * Look up the value of a property optionally returning a default value if the property is not set. * * @param name Name of property * @param defaultValue Value to return if the property is not set * @return Value of property named {@code name} or {@code defaultValue} if {@code name} is not set */ @Override public String get( String name, String defaultValue ) { return getJobConf().get( name, defaultValue ); } /** * Set the key class for the map output data. * * @param c the map output key class */ @Override public void setMapOutputKeyClass( Class<?> c ) { getJob().setMapOutputKeyClass( c ); } /** * Set the value class for the map output data. * * @param c the map output value class */ @Override public void setMapOutputValueClass( Class<?> c ) { getJob().setMapOutputValueClass( c ); } @SuppressWarnings( "unchecked" ) @Override public void setMapperClass( Class<?> c ) { if ( org.apache.hadoop.mapred.Mapper.class.isAssignableFrom( c ) ) { setUseOldMapApi(); getJobConf().setMapperClass( (Class<? extends org.apache.hadoop.mapred.Mapper>) c ); } else if ( org.apache.hadoop.mapreduce.Mapper.class.isAssignableFrom( c ) ) { getJob().setMapperClass( (Class<? extends org.apache.hadoop.mapreduce.Mapper>) c ); } } private void setUseOldMapApi() { set( "mapred.mapper.new-api", "false" ); } @SuppressWarnings( "unchecked" ) @Override public void setCombinerClass( Class<?> c ) { if ( org.apache.hadoop.mapred.Reducer.class.isAssignableFrom( c ) ) { setUseOldRedApi(); getJobConf().setCombinerClass( (Class<? extends org.apache.hadoop.mapred.Reducer>) c ); } else if ( org.apache.hadoop.mapreduce.Reducer.class.isAssignableFrom( c ) ) { getJob().setCombinerClass( (Class<? extends org.apache.hadoop.mapreduce.Reducer>) c ); } } private void setUseOldRedApi() { set( "mapred.reducer.new-api", "false" ); } @SuppressWarnings( "unchecked" ) @Override public void setReducerClass( Class<?> c ) { if ( org.apache.hadoop.mapred.Reducer.class.isAssignableFrom( c ) ) { setUseOldRedApi(); getJobConf().setReducerClass( (Class<? extends org.apache.hadoop.mapred.Reducer>) c ); } else if ( org.apache.hadoop.mapreduce.Reducer.class.isAssignableFrom( c ) ) { getJob().setReducerClass( (Class<? extends org.apache.hadoop.mapreduce.Reducer>) c ); } } @Override public void setOutputKeyClass( Class<?> c ) { getJob().setOutputKeyClass( c ); } @Override public void setOutputValueClass( Class<?> c ) { getJob().setOutputValueClass( c ); } @SuppressWarnings( "unchecked" ) @Override public void setMapRunnerClass( Class<?> c ) { if ( org.apache.hadoop.mapred.MapRunnable.class.isAssignableFrom( c ) ) { getJobConf().setMapRunnerClass( (Class<? extends org.apache.hadoop.mapred.MapRunnable>) c ); } } @SuppressWarnings( "unchecked" ) @Override public void setInputFormat( Class<?> inputFormat ) { if ( org.apache.hadoop.mapred.InputFormat.class.isAssignableFrom( inputFormat ) ) { setUseOldMapApi(); getJobConf().setInputFormat( (Class<? extends org.apache.hadoop.mapred.InputFormat>) inputFormat ); } else if ( org.apache.hadoop.mapreduce.InputFormat.class.isAssignableFrom( inputFormat ) ) { getJob().setInputFormatClass( (Class<? extends org.apache.hadoop.mapreduce.InputFormat>) inputFormat ); } } @SuppressWarnings( "unchecked" ) @Override public void setOutputFormat( Class<?> outputFormat ) { if ( org.apache.hadoop.mapred.OutputFormat.class.isAssignableFrom( outputFormat ) ) { setUseOldRedApi(); if ( getJobConf().getNumReduceTasks() == 0 || get( "mapred.partitioner.class" ) != null ) { setUseOldMapApi(); } getJobConf().setOutputFormat( (Class<? extends org.apache.hadoop.mapred.OutputFormat>) outputFormat ); } else if ( org.apache.hadoop.mapreduce.OutputFormat.class.isAssignableFrom( outputFormat ) ) { getJob().setOutputFormatClass( (Class<? extends org.apache.hadoop.mapreduce.OutputFormat>) outputFormat ); } } @Override public void setInputPaths( org.pentaho.hadoop.shim.api.fs.Path... paths ) { if ( paths == null ) { return; } Path[] actualPaths = new Path[ paths.length ]; for ( int i = 0; i < paths.length; i++ ) { actualPaths[ i ] = ShimUtils.asPath( paths[ i ] ); } try { FileInputFormat.setInputPaths( getJob(), actualPaths ); } catch ( IOException e ) { e.printStackTrace(); } } @Override public void setOutputPath( org.pentaho.hadoop.shim.api.fs.Path path ) { FileOutputFormat.setOutputPath( getJob(), ShimUtils.asPath( path ) ); } @Override public void setJarByClass( Class<?> c ) { getJob().setJarByClass( c ); } @Override public void setJar( String url ) { getJob().setJar( url ); } /** * Provide a hint to Hadoop for the number of map tasks to start for the MapReduce job submitted with this * configuration. * * @param n the number of map tasks for this job */ @Override public void setNumMapTasks( int n ) { getJobConf().setNumMapTasks( n ); } /** * Sets the requisite number of reduce tasks for the MapReduce job submitted with this configuration. <p>If {@code n} * is {@code zero} there will not be a reduce (or sort/shuffle) phase and the output of the map tasks will be written * directly to the distributed file system under the path specified via {@link #setOutputPath(org.pentaho.hadoop * .shim.api.fs.Path)</p> * * @param n the number of reduce tasks required for this job * @param n */ @Override public void setNumReduceTasks( int n ) { getJob().setNumReduceTasks( n ); } /** * Set the array of string values for the <code>name</code> property as as comma delimited values. * * @param name property name. * @param values The values */ @Override public void setStrings( String name, String... values ) { getJobConf().setStrings( name, values ); } /** * Get the default file system URL as stored in this configuration. * * @return the default URL if it was set, otherwise empty string */ @Override public String getDefaultFileSystemURL() { return get( "fs.default.name", "" ); } /** * Hack * Return this configuration as was asked with provided delegate class (If it is possible). * * @param delegate class of desired return object * @return this configuration delegate object if possible */ @Override public <T> T getAsDelegateConf( Class<T> delegate ) { if ( delegate.isAssignableFrom( this.getClass() ) ) { return (T) this; } else if ( delegate.isAssignableFrom( JobConf.class ) ) { return (T) getJobConf(); } else { return null; } } /** * Submit job for the current configuration provided by this implementation. * * @return RunningJob implementation */ @Override public RunningJob submit() throws IOException, ClassNotFoundException, InterruptedException { getJob().submit(); return new RunningJobProxyV2( getJob() ); } }