package de.l3s.common.features.hadoop.movingaverage;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import de.l3s.common.features.hadoop.movingaverage.SlidingWindow;
import de.l3s.common.models.timeseries.TimeseriesDataPoint;
import de.l3s.common.models.timeseries.TimeseriesKey;
/**
* MovingAverageReducer
*
* Example use of secondary sort and a sliding window to produce a moving
* average.
*
* As opposed to loading all of the points into a data structure beforehand,
* this Reducer loads only as many points as are needed to fill the window,
* continually streaming the points through the window as it receives them.
*
*
* Ignores the fact that values may be missing, calculates window based on time
* delta as opposed to number of samples/points in window.
*
* When only stepping one day we could get away with a simpler algorithm that
* was more efficient, but this example is meant to show how a full sliding
* window would work.
*
* Also notice the copying of the points into the sliding window; this is
* because Hadoop reusues Writables.
*
*
* @author jpatterson
*
*/
public class MovingAverageReducer extends MapReduceBase implements
Reducer<TimeseriesKey, TimeseriesDataPoint, Text, Text> {
static enum PointCounters {
POINTS_SEEN, POINTS_ADDED_TO_WINDOWS, MOVING_AVERAGES_CALCD
};
static long day_in_ms = 24 * 60 * 60 * 1000;
private JobConf configuration;
@Override
public void configure(JobConf job) {
this.configuration = job;
}
public void reduce(TimeseriesKey key, Iterator<TimeseriesDataPoint> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
TimeseriesDataPoint next_point;
float point_sum = 0;
float moving_avg = 0;
int iWindowSizeInDays = this.configuration.getInt(
"tv.floe.caduceus.hadoop.movingaverage.windowSize", 30);
int iWindowStepSizeInDays = this.configuration.getInt(
"tv.floe.caduceus.hadoop.movingaverage.windowStepSize", 1);
long iWindowSizeInMS = iWindowSizeInDays * day_in_ms;
long iWindowStepSizeInMS = iWindowStepSizeInDays * day_in_ms;
Text out_key = new Text();
Text out_val = new Text();
SlidingWindow sliding_window = new SlidingWindow(iWindowSizeInMS,
iWindowStepSizeInMS, day_in_ms);
while (values.hasNext()) {
while (sliding_window.WindowIsFull() == false && values.hasNext()) {
reporter.incrCounter(PointCounters.POINTS_ADDED_TO_WINDOWS, 1);
next_point = values.next();
TimeseriesDataPoint p_copy = new TimeseriesDataPoint();
p_copy.copy(next_point);
try {
sliding_window.AddPoint(p_copy);
} catch (Exception e) {
e.printStackTrace();
}
}
if (sliding_window.WindowIsFull()) {
reporter.incrCounter(PointCounters.MOVING_AVERAGES_CALCD, 1);
LinkedList<TimeseriesDataPoint> oWindow = sliding_window
.GetCurrentWindow();
String strBackDate = oWindow.getLast().getDate();
// ---------- compute the moving average here -----------
out_key.set("Group: " + key.getGroup() + ", Date: "
+ strBackDate);
point_sum = 0;
for (int x = 0; x < oWindow.size(); x++) {
point_sum += oWindow.get(x).fValue;
} // for
moving_avg = point_sum / oWindow.size();
out_val.set("Moving Average: " + moving_avg);
output.collect(out_key, out_val);
// 2. step window forward
sliding_window.SlideWindowForward();
}
} // while
out_key.set("debug > " + key.getGroup()
+ " --------- end of group -------------");
out_val.set("");
output.collect(out_key, out_val);
} // reduce
}