package de.l3s.common.features.hadoop.movingaverage; import java.io.IOException; import java.util.Iterator; import java.util.LinkedList; import java.util.PriorityQueue; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import de.l3s.common.features.hadoop.movingaverage.SlidingWindow; import de.l3s.common.models.timeseries.TimeseriesDataPoint; /** * * NoShuffleSort_MovingAverageReducer * * In this version of the reducer the points do not arrive in pre-sorted form so * we have to maintain an in-memory queue to sort these points * * */ public class NoShuffleSort_MovingAverageReducer extends MapReduceBase implements Reducer<Text, TimeseriesDataPoint, Text, Text> { static enum PointCounters { POINTS_SEEN, POINTS_ADDED_TO_WINDOWS, MOVING_AVERAGES_CALCD }; private JobConf configuration; @Override public void configure(JobConf job) { this.configuration = job; } // configure() public void reduce(Text key, Iterator<TimeseriesDataPoint> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { TimeseriesDataPoint next_point; float point_sum = 0; float moving_avg = 0; // make static long day_in_ms = 24 * 60 * 60 * 1000; // should match the width of your training samples sizes int iWindowSizeInDays = this.configuration.getInt( "tv.floe.caduceus.hadoop.movingaverage.windowSize", 30); int iWindowStepSizeInDays = this.configuration.getInt( "tv.floe.caduceus.hadoop.movingaverage.windowStepSize", 1); long iWindowSizeInMS = iWindowSizeInDays * day_in_ms; // = // this.configuration.getInt("tv.floe.examples.mr.sax.windowSize", // 14 ); long iWindowStepSizeInMS = iWindowStepSizeInDays * day_in_ms; // = // this.configuration.getInt("tv.floe.examples.mr.sax.windowStepSize", // 7 ); Text out_key = new Text(); Text out_val = new Text(); SlidingWindow sliding_window = new SlidingWindow(iWindowSizeInMS, iWindowStepSizeInMS, day_in_ms); PriorityQueue<TimeseriesDataPoint> oPointHeapNew = new PriorityQueue<TimeseriesDataPoint>(); while (values.hasNext()) { next_point = values.next(); // we need to copy the points into new objects since MR re-uses k/v // pairs // to avoid GC churn TimeseriesDataPoint point_copy = new TimeseriesDataPoint(); point_copy.copy(next_point); oPointHeapNew.add(point_copy); } // while while (oPointHeapNew.isEmpty() == false) { reporter.incrCounter(PointCounters.POINTS_ADDED_TO_WINDOWS, 1); next_point = oPointHeapNew.poll(); try { sliding_window.AddPoint(next_point); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } if (sliding_window.WindowIsFull()) { reporter.incrCounter(PointCounters.MOVING_AVERAGES_CALCD, 1); LinkedList<TimeseriesDataPoint> oWindow = sliding_window .GetCurrentWindow(); String strBackDate = oWindow.getLast().getDate(); // ---------- compute the moving average here ----------- out_key.set("Group: " + key.toString() + ", Date: " + strBackDate); point_sum = 0; for (int x = 0; x < oWindow.size(); x++) { point_sum += oWindow.get(x).fValue; } // for moving_avg = point_sum / oWindow.size(); out_val.set("Moving Average: " + moving_avg); output.collect(out_key, out_val); // 2. step window forward sliding_window.SlideWindowForward(); } } } // reduce }