/** * Copyright 2011 LiveRamp * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.liveramp.hank.hadoop; import java.io.IOException; import org.apache.commons.lang.NotImplementedException; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.liveramp.hank.config.CoordinatorConfigurator; import com.liveramp.hank.config.InvalidConfigurationException; import com.liveramp.hank.config.yaml.YamlCoordinatorConfigurator; import com.liveramp.hank.util.CommandLineChecker; public class HadoopDomainBuilder extends AbstractHadoopDomainBuilder { private static final Logger LOG = LoggerFactory.getLogger(HadoopDomainBuilder.class); private final String inputPath; private final Class<? extends InputFormat> inputFormatClass; private final Class<? extends Mapper> mapperClass; public HadoopDomainBuilder(final String inputPath, final Class<? extends InputFormat> inputFormatClass, final Class<? extends Mapper> mapperClass) { this.inputPath = inputPath; this.inputFormatClass = inputFormatClass; this.mapperClass = mapperClass; } public HadoopDomainBuilder(JobConf conf, final String inputPath, final Class<? extends InputFormat> inputFormatClass, final Class<? extends Mapper> mapperClass) { super(conf); this.inputPath = inputPath; this.inputFormatClass = inputFormatClass; this.mapperClass = mapperClass; } // Use a non-default output format @Override protected void configureJob(JobConf conf) { // Input specification conf.setInputFormat(inputFormatClass); FileInputFormat.setInputPaths(conf, inputPath); // Mapper class and key/value classes conf.setMapperClass(mapperClass); conf.setMapOutputKeyClass(KeyAndPartitionWritableComparable.class); conf.setMapOutputValueClass(ValueWritable.class); // Reducer class and key/value classes conf.setReducerClass(DomainBuilderReducer.class); conf.setOutputKeyClass(KeyAndPartitionWritable.class); conf.setOutputValueClass(ValueWritable.class); // Partitioner conf.setPartitionerClass(DomainBuilderPartitioner.class); } public static void main(String[] args) throws IOException, InvalidConfigurationException { CommandLineChecker.check(args, new String[] {"domain name", "config path", "jobjar", "input path", "output_path"}, HadoopDomainBuilder.class); String domainName = args[0]; CoordinatorConfigurator configurator = new YamlCoordinatorConfigurator(args[1]); String jobJar = args[2]; String inputPath = args[3]; String outputPath = args[4]; DomainBuilderProperties properties = new DomainBuilderProperties(domainName, configurator).setOutputPath(outputPath); JobConf conf = new JobConf(); conf.setJar(jobJar); conf.setJobName(HadoopDomainBuilder.class.getSimpleName() + " Domain " + domainName + ", Output path: " + outputPath); HadoopDomainBuilder builder = new HadoopDomainBuilder(conf, inputPath, SequenceFileInputFormat.class, DomainBuilderMapperDefault.class); LOG.info("Building Hank domain " + domainName + " from input " + inputPath + " and coordinator configuration " + configurator); // TODO: Create DomainVersionProperties throw new NotImplementedException("TODO: Create DomainVersionProperties"); // builder.buildHankDomain(properties, null); } }