/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ package edu.indiana.soic.ts.mapreduce; import edu.indiana.soic.ts.utils.TSConfiguration; import edu.indiana.soic.ts.utils.TableUtils; import edu.indiana.soic.ts.utils.Constants; import edu.indiana.soic.ts.utils.Utils; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.text.ParseException; import java.util.*; import java.util.concurrent.TimeUnit; public class VectorCalculator { private static final Logger LOG = LoggerFactory.getLogger(VectorCalculator.class); private String startDate; private String endDate; private int window; private int headShift; private int tailShift; private TSConfiguration tsConfiguration; public void configure(TSConfiguration tsConfiguration) { Map conf = tsConfiguration.getConf(); this.tsConfiguration = tsConfiguration; startDate = (String) conf.get(TSConfiguration.START_DATE); endDate = (String) conf.get(TSConfiguration.END_DATE); this.window = (int) conf.get(TSConfiguration.TIME_WINDOW); this.headShift = (int) conf.get(TSConfiguration.TIME_SHIFT_HEAD); this.tailShift = (int) conf.get(TSConfiguration.TIME_SHIFT_TAIL); if (startDate == null || startDate.isEmpty()) { throw new RuntimeException("Start date should be specified"); } if (endDate == null || endDate.isEmpty()) { throw new RuntimeException("End date should be specified"); } } public void submitJob() { try { Configuration config = HBaseConfiguration.create(); config.set("mapreduce.output.textoutputformat.separator", ","); TreeMap<String, List<Date>> genDates = TableUtils.genDates(TableUtils.getDate(startDate), TableUtils.getDate(endDate), this.window, TimeUnit.DAYS, this.headShift, this.tailShift, TimeUnit.DAYS); LOG.info("Start Date : {} End Date : {}, Gen dates size: {}", startDate, endDate, genDates.size()); for (String id : genDates.keySet()) { LOG.info("Vector calculation for: {}", id); Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs List<Date> dates = genDates.get(id); String start = TableUtils.convertDateToString(dates.get(0)); String end = TableUtils.convertDateToString(dates.get(1)); List<String> suitableDateList = TableUtils.getDates(start, end); config.set(Constants.Job.NO_OF_DAYS, String.valueOf(suitableDateList.size())); LOG.info("Vector calculator for start: {}, end: {} time window: {}, shift: {}, days: {}", startDate, endDate, window, headShift, suitableDateList.size()); for (String date : suitableDateList){ scan.addColumn(Constants.STOCK_TABLE_CF_BYTES, date.getBytes()); } Job job = new Job(config,"Vector calculation: " + id); job.setJarByClass(VectorCalculator.class); TableMapReduceUtil.initTableMapperJob( Constants.STOCK_TABLE_NAME, // input HBase table name scan, // Scan instance to control CF and attribute selection VectorCalculatorMapper.class, // mapper IntWritable.class, // mapper output key Text.class, // mapper output value job); // adjust directories as required String outPutDir = tsConfiguration.getInterMediateVectorDir() + "/" + id; FileOutputFormat.setOutputPath(job, new Path(outPutDir)); boolean b = job.waitForCompletion(true); if (!b) { LOG.error("Error with job for vector calculation"); throw new RuntimeException("Error with job for vector calculation"); } Utils.concatOutput(config, id, outPutDir, tsConfiguration.getVectorDir()); } } catch (ParseException e) { LOG.error("Error while parsing date", e); throw new RuntimeException("Error while parsing date", e); } catch (InterruptedException | ClassNotFoundException | IOException e) { LOG.error("Error while creating the job", e); throw new RuntimeException("Error while creating the job", e); } } public static void main(String[] args) { String configFile = Utils.getConfigurationFile(args); TSConfiguration tsConfiguration = new TSConfiguration(configFile); VectorCalculator vectorCalculator = new VectorCalculator(); vectorCalculator.configure(tsConfiguration); vectorCalculator.submitJob(); } }