/**
* Copyright (C) 2011 Brian Ferris <bdferris@onebusaway.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.onebusaway.transit_data_federation.bundle.tasks.history;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.onebusaway.collections.FactoryMap;
import org.onebusaway.collections.Range;
import org.onebusaway.gtfs.model.AgencyAndId;
import org.onebusaway.gtfs.model.AgencyAndIdInstance;
import org.onebusaway.transit_data_federation.impl.realtime.history.BlockLocationArchiveRecord;
import org.onebusaway.transit_data_federation.impl.realtime.history.ScheduleDeviationHistory;
import org.onebusaway.transit_data_federation.services.AgencyAndIdLibrary;
import org.onebusaway.transit_data_federation.services.realtime.ScheduleDeviationHistoryDao;
import org.onebusaway.transit_data_federation.services.transit_graph.TransitGraphDao;
import org.onebusaway.transit_data_federation.services.transit_graph.TripEntry;
import org.onebusaway.utility.InterpolationLibrary;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import cern.colt.list.DoubleArrayList;
import cern.jet.stat.Descriptive;
public class BlockLocationHistoryTask implements Runnable {
private static Logger _log = LoggerFactory.getLogger(BlockLocationHistoryTask.class);
private TransitGraphDao _transitGraphDao;
private ScheduleDeviationHistoryDao _scheduleDeviationHistoryDao;
private BlockLocationArchiveSource _source;
private int _sampleTimeStep = 300;
private int _minSampleSize = 10;
private double _outlierRatio = 2;
private AgencyAndId _skipToTrip = null;
@Autowired
public void setTransitGraphDao(TransitGraphDao transitGraphDao) {
_transitGraphDao = transitGraphDao;
}
@Autowired
public void setScheduleDeviationHistoryDao(
ScheduleDeviationHistoryDao scheduleDeviationHistoryDao) {
_scheduleDeviationHistoryDao = scheduleDeviationHistoryDao;
}
public void setSource(BlockLocationArchiveSource source) {
_source = source;
}
public void setSampleStepSize(int sampleTimeStep) {
_sampleTimeStep = sampleTimeStep;
}
public void seMinSampleSize(int minSampleSize) {
_minSampleSize = minSampleSize;
}
public void setOutlierRatio(double outlierRatio) {
_outlierRatio = outlierRatio;
}
public void setSkipToTrip(String tripId) {
_skipToTrip = AgencyAndIdLibrary.convertFromString(tripId);
}
@Override
public void run() {
if (_source == null) {
_log.info("No BlockLocationHistoryTask data source specified. Skipping this optional task");
}
int tripIndex = 0;
Iterable<TripEntry> allTrips = _transitGraphDao.getAllTrips();
boolean skipTo = _skipToTrip != null;
for (TripEntry trip : allTrips) {
if (tripIndex % 20 == 0)
_log.info("tripsProcessed=" + tripIndex);
tripIndex++;
if (_skipToTrip != null && trip.getId().equals(_skipToTrip)) {
skipTo = false;
} else if (!skipTo) {
try {
processTrip(trip);
} catch (Throwable ex) {
_log.warn("error processing trip " + trip.getId(), ex);
}
}
}
}
private void processTrip(TripEntry trip) {
List<BlockLocationArchiveRecord> records = _source.getRecordsForTrip(trip.getId());
Map<AgencyAndId, BlockLocationArchiveRecordMap> recordsByTrip = loadRecords(records);
List<ScheduleDeviationHistory> histories = new ArrayList<ScheduleDeviationHistory>();
for (Map.Entry<AgencyAndId, BlockLocationArchiveRecordMap> entry : recordsByTrip.entrySet()) {
AgencyAndId tripId = entry.getKey();
BlockLocationArchiveRecordMap recordsByInstance = entry.getValue();
/**
* If we don't have enough samples, skip the trip
*/
if (recordsByInstance.size() < _minSampleSize)
continue;
ScheduleDeviationHistory history = constructHistory(tripId,
recordsByInstance);
histories.add(history);
}
if (!histories.isEmpty())
_scheduleDeviationHistoryDao.saveScheduleDeviationHistory(histories);
}
private Map<AgencyAndId, BlockLocationArchiveRecordMap> loadRecords(
List<BlockLocationArchiveRecord> records) {
Map<AgencyAndId, BlockLocationArchiveRecordMap> recordsByTrip = new FactoryMap<AgencyAndId, BlockLocationArchiveRecordMap>(
new BlockLocationArchiveRecordMap());
for (BlockLocationArchiveRecord record : records) {
AgencyAndId tripId = record.getTripId();
AgencyAndIdInstance instance = new AgencyAndIdInstance(tripId,
record.getServiceDate());
recordsByTrip.get(record.getTripId()).get(instance).add(record);
}
return recordsByTrip;
}
private ScheduleDeviationHistory constructHistory(AgencyAndId tripId,
BlockLocationArchiveRecordMap recordsByInstance) {
List<SortedMap<Integer, Double>> traces = new ArrayList<SortedMap<Integer, Double>>();
Range tRange = new Range();
sortAndArrangeTraces(recordsByInstance, traces, tRange);
int step = computeSamplingStep(traces);
int from = (int) (Math.ceil(tRange.getMin() / step) * step);
int to = (int) (Math.floor(tRange.getMax() / step) * step);
SortedMap<Integer, Double> mus = new TreeMap<Integer, Double>();
SortedMap<Integer, Double> sigmas = new TreeMap<Integer, Double>();
computeMeanAndStandardDeviationForTraces(traces, from, to, step, mus,
sigmas);
removeOutlierTraces(traces, mus, sigmas);
int numOfTraces = traces.size();
DoubleArrayList scheduleTimes = new DoubleArrayList();
List<DoubleArrayList> scheduleDeviations = new ArrayList<DoubleArrayList>();
for (int i = 0; i < numOfTraces; i++)
scheduleDeviations.add(new DoubleArrayList());
for (int t = from; t <= to; t += step) {
DoubleArrayList rawValues = new DoubleArrayList();
DoubleArrayList values = new DoubleArrayList();
for (SortedMap<Integer, Double> m : traces) {
if (t < m.firstKey() || t > m.lastKey()) {
rawValues.add(Double.NaN);
continue;
}
double schedDev = InterpolationLibrary.interpolate(m, t);
values.add(schedDev);
rawValues.add(schedDev);
}
if (values.size() < Math.max(_minSampleSize, 2))
continue;
double mu = Descriptive.mean(values);
double sigma = Descriptive.sampleStandardDeviation(values.size(),
Descriptive.sampleVariance(values, mu));
int goodValueCount = pruneOutlierValues(rawValues, mu, sigma);
if (goodValueCount < _minSampleSize)
continue;
scheduleTimes.add(t);
for (int traceIndex = 0; traceIndex < traces.size(); traceIndex++)
scheduleDeviations.get(traceIndex).add(rawValues.get(traceIndex));
}
scheduleTimes.trimToSize();
double[] scheduleTimesArray = scheduleTimes.elements();
double[][] scheduleDeviationsArrays = new double[numOfTraces][];
for (int traceIndex = 0; traceIndex < numOfTraces; traceIndex++) {
DoubleArrayList list = scheduleDeviations.get(traceIndex);
list.trimToSize();
scheduleDeviationsArrays[traceIndex] = list.elements();
}
return new ScheduleDeviationHistory(tripId, scheduleTimesArray,
scheduleDeviationsArrays);
}
private void sortAndArrangeTraces(
BlockLocationArchiveRecordMap recordsByInstance,
List<SortedMap<Integer, Double>> maps, Range tRange) {
for (List<BlockLocationArchiveRecord> records : recordsByInstance.values()) {
SortedMap<Integer, Double> m = new TreeMap<Integer, Double>();
for (BlockLocationArchiveRecord record : records) {
int effectiveScheduleTime = (int) ((record.getTime() - record.getServiceDate()) / 1000 - record.getScheduleDeviation());
m.put(effectiveScheduleTime, (double) record.getScheduleDeviation());
tRange.addValue(effectiveScheduleTime);
}
maps.add(m);
}
}
private int computeSamplingStep(List<SortedMap<Integer, Double>> traces) {
int minStep = Integer.MAX_VALUE;
for (SortedMap<Integer, Double> m : traces) {
if (m.size() < 5)
continue;
int step = (m.lastKey() - m.firstKey()) / m.size();
if (step == 0)
continue;
minStep = Math.min(step, minStep);
}
if (minStep == Integer.MAX_VALUE || minStep == 0)
return _sampleTimeStep;
return (int) (Math.ceil(minStep / 60.0) * 60);
}
private void computeMeanAndStandardDeviationForTraces(
List<SortedMap<Integer, Double>> traces, int from, int to, int step,
SortedMap<Integer, Double> mus, SortedMap<Integer, Double> sigmas) {
for (int x = from; x <= to; x += step) {
DoubleArrayList values = new DoubleArrayList();
for (SortedMap<Integer, Double> m : traces) {
if (x < m.firstKey() || x > m.lastKey())
continue;
double schedDev = InterpolationLibrary.interpolate(m, x);
values.add(schedDev);
}
if (values.size() < 2)
continue;
double mu = Descriptive.mean(values);
double sigma = Descriptive.sampleStandardDeviation(values.size(),
Descriptive.sampleVariance(values, mu));
mus.put(x, mu);
sigmas.put(x, sigma);
}
}
private void removeOutlierTraces(List<SortedMap<Integer, Double>> maps,
SortedMap<Integer, Double> mus, SortedMap<Integer, Double> sigmas) {
Iterator<SortedMap<Integer, Double>> it = maps.iterator();
while (it.hasNext()) {
SortedMap<Integer, Double> m = it.next();
if (isTraceAnOutlier(m, mus, sigmas))
it.remove();
}
}
private boolean isTraceAnOutlier(SortedMap<Integer, Double> m,
SortedMap<Integer, Double> mus, SortedMap<Integer, Double> sigmas) {
double outliers = 0;
for (Map.Entry<Integer, Double> entry : m.entrySet()) {
int t = entry.getKey();
double value = entry.getValue();
double mu = InterpolationLibrary.interpolate(mus, t);
double sigma = InterpolationLibrary.interpolate(sigmas, t);
if (Math.abs(value - mu) > _outlierRatio * sigma)
outliers++;
}
/**
* Is more than half the data an outlier? Then exclude the trace
*/
return (outliers / m.size() > 0.5);
}
private int pruneOutlierValues(DoubleArrayList rawValues, double mu,
double sigma) {
int goodValueCount = 0;
for (int i = 0; i < rawValues.size(); i++) {
double value = rawValues.get(i);
if (Double.isNaN(value))
continue;
if (Math.abs(value - mu) > _outlierRatio * sigma)
rawValues.set(i, Double.NaN);
else
goodValueCount++;
}
return goodValueCount;
}
/****
*
****/
public static class BlockLocationArchiveRecordMap extends
FactoryMap<AgencyAndIdInstance, List<BlockLocationArchiveRecord>> {
private static final long serialVersionUID = 1L;
public BlockLocationArchiveRecordMap() {
super(new ArrayList<BlockLocationArchiveRecord>());
}
}
}