package de.l3s.common.features.hadoop;
/*
* TIMETool - Large-scale Temporal Search in MapReduce
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
/*
* THIS SOFTWARE IS PROVIDED BY THE LEMUR PROJECT AS PART OF THE CLUEWEB09
* PROJECT AND OTHER CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
* NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @author
*/
import java.net.URI;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import de.l3s.common.hadoop.WholeFileInputFormat;
import de.l3s.common.models.timeseries.Timeseries;
public class TimeSeriesJob extends Configured implements Tool{
private static final String JOB_NAME = "name";
private static final String INPUT_OPT = "in";
private static final String OUTPUT_OPT = "out";
private static final String REDUCE_NO = "reduce";
private static final String REMOVE_OUTPUT = "rmo";
private static final String COMPRESS_OPT = "compress";
private static final int DEFAULT_REDUCER_NO = 24;
@Override
public int run(String[] args) throws Exception {
Options opts = new Options();
Option jnameOpt = OptionBuilder.withArgName("job-name").hasArg(true)
.withDescription("Timeseries analysis")
.create(JOB_NAME);
Option inputOpt = OptionBuilder.withArgName("input-path").hasArg(true)
.withDescription("Timeseries file path (required)")
.create(INPUT_OPT);
Option outputOpt = OptionBuilder.withArgName("output-path").hasArg(true)
.withDescription("output file path (required)")
.create(OUTPUT_OPT);
Option reduceOpt = OptionBuilder.withArgName("reduce-no").hasArg(true)
.withDescription("number of reducer nodes").create(REDUCE_NO);
Option rmOpt = OptionBuilder.withArgName("remove-out").hasArg(false)
.withDescription("remove the output then create again before writing files onto it")
.create(REMOVE_OUTPUT);
Option cOpt = OptionBuilder.withArgName("compress-option").hasArg(true)
.withDescription("compression option").create(COMPRESS_OPT);
opts.addOption(jnameOpt);
opts.addOption(inputOpt);
opts.addOption(reduceOpt);
opts.addOption(outputOpt);
opts.addOption(rmOpt);
opts.addOption(cOpt);
CommandLine cl;
CommandLineParser parser = new GnuParser();
try {
cl = parser.parse(opts, args);
} catch (ParseException e) {
System.err.println("Error parsing command line: " + e.getMessage());
return -1;
}
if (!cl.hasOption(INPUT_OPT) || !cl.hasOption(OUTPUT_OPT)) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(getClass().getName(), opts);
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
int reduceNo = DEFAULT_REDUCER_NO;
if (cl.hasOption(REDUCE_NO)) {
try {
reduceNo = Integer.parseInt(cl.getOptionValue(REDUCE_NO));
} catch (NumberFormatException e) {
System.err.println("Error parsing reducer number: "
+ e.getMessage());
}
}
String jobName = "Distributed timeseries [R] correlation";
if (cl.hasOption(JOB_NAME)) {
jobName = cl.getOptionValue(JOB_NAME);
jobName = jobName.replace('-', ' ');
}
if (cl.hasOption(REMOVE_OUTPUT)) {
}
String input = cl.getOptionValue(INPUT_OPT);
String output = cl.getOptionValue(OUTPUT_OPT);
Configuration conf = getConf();
//DistributedCache.createSymlink(conf);
//DistributedCache.addCacheFile(new URI("hdfs://master.hadoop:8020/user/nguyen/lib/"), conf);
Job job = Job.getInstance(conf, jobName);
job.setJarByClass(TimeSeriesJob.class);
job.setMapperClass(TimeSeriesMapper.class);
job.setReducerClass(TimeSeriesReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Timeseries.class);
job.setNumReduceTasks(reduceNo);
job.setInputFormatClass(WholeFileInputFormat.class);
WholeFileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, new Path(output));
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new TimeSeriesJob(),
args);
System.exit(res);
}
}