/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.ml.data.temporal;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import org.encog.ml.data.MLData;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.basic.BasicMLData;
import org.encog.ml.data.basic.BasicMLDataPair;
import org.encog.neural.data.basic.BasicNeuralData;
import org.encog.neural.data.basic.BasicNeuralDataSet;
import org.encog.util.time.TimeSpan;
import org.encog.util.time.TimeUnit;
/**
* This class implements a temporal neural data set. A temporal neural dataset
* is designed to use a neural network to predict.
*
* A temporal dataset is a stream of data over a time range. This time range is
* broken up into "points". Each point can contain one or more values. These
* values are either the values that you would like to predict, or use to
* predict. It is possible for a value to be both predicted and used to predict.
* For example, if you were trying to predict a trend in a stock's price
* fluctuations you might very well use the security price for both.
*
* Each point that we have data for is stored in the TemporalPoint class. Each
* TemporalPoint will contain one more data values. These data values are
* described by the TemporalDataDescription class. For example, if you had five
* TemporalDataDescription objects added to this class, each Temporal point
* object would contain five values.
*
* Points are arranged by sequence number. No two points can have the same
* sequence numbers. Methods are provided to allow you to add points using the
* Date class. These dates are resolved to sequence number using the level of
* granularity specified for this class. No two points can occupy the same
* granularity increment.
*
* @author jheaton
*/
public class TemporalMLDataSet extends BasicNeuralDataSet implements Serializable {
/**
* The serial id.
*/
private static final long serialVersionUID = 7846736117000051687L;
/**
* Error message: adds are not supported.
*/
public static final String ADD_NOT_SUPPORTED =
"Direct adds to the temporal dataset are not supported. "
+ "Add TemporalPoint objects and call generate.";
/**
* Descriptions of the data needed.
*/
private final List<TemporalDataDescription> descriptions =
new ArrayList<TemporalDataDescription>();
/**
* The temporal points at which we have data.
*/
private final List<TemporalPoint> points = new ArrayList<TemporalPoint>();
/**
* The size of the input window, this is the data being used to predict.
*/
private int inputWindowSize;
/**
* The size of the prediction window.
*/
private int predictWindowSize;
/**
* The lowest sequence.
*/
private int lowSequence;
/**
* The highest sequence.
*/
private int highSequence;
/**
* How big would we like the input size to be.
*/
private int desiredSetSize;
/**
* How many input neurons will be used.
*/
private int inputNeuronCount;
/**
* How many output neurons will there be.
*/
private int outputNeuronCount;
/**
* What is the date for the first temporal point.
*/
private Date startingPoint;
/**
* What is the granularity of the temporal points? Days, months, years, etc?
*/
private TimeUnit sequenceGrandularity;
/**
* Construct a dataset.
*
* @param inputWindowSize
* What is the input window size.
* @param predictWindowSize
* What is the prediction window size.
*/
public TemporalMLDataSet(final int inputWindowSize,
final int predictWindowSize) {
this.inputWindowSize = inputWindowSize;
this.predictWindowSize = predictWindowSize;
this.lowSequence = Integer.MIN_VALUE;
this.highSequence = Integer.MAX_VALUE;
this.desiredSetSize = Integer.MAX_VALUE;
this.startingPoint = null;
this.sequenceGrandularity = TimeUnit.DAYS;
}
/**
* Adding directly is not supported. Rather, add temporal points and
* generate the training data.
*
* @param data
* Not used.
*/
@Override
public void add(final MLData data) {
throw new TemporalError(TemporalMLDataSet.ADD_NOT_SUPPORTED);
}
/**
* Adding directly is not supported. Rather, add temporal points and
* generate the training data.
*
* @param inputData
* Not used.
* @param idealData
* Not used.
*/
@Override
public void add(final MLData inputData, final MLData idealData) {
throw new TemporalError(TemporalMLDataSet.ADD_NOT_SUPPORTED);
}
/**
* Adding directly is not supported. Rather, add temporal points and
* generate the training data.
*
* @param inputData
* Not used.
*/
@Override
public void add(final MLDataPair inputData) {
throw new TemporalError(TemporalMLDataSet.ADD_NOT_SUPPORTED);
}
/**
* Add a data description.
*
* @param desc
* The data description to add.
*/
public void addDescription(final TemporalDataDescription desc) {
if (this.points.size() > 0) {
final String str = "Can't add anymore descriptions, there are "
+ "already temporal points defined.";
throw new TemporalError(str);
}
final int index = this.descriptions.size();
desc.setIndex(index);
this.descriptions.add(desc);
calculateNeuronCounts();
}
/**
* Calculate the actual set size, this is the number of training set entries
* that will be generated.
*
* @return The size of the training set.
*/
public int calculateActualSetSize() {
int result = calculatePointsInRange();
result = Math.min(this.desiredSetSize, result);
return result;
}
/**
* Calculate how many input and output neurons will be needed for the
* current data.
*/
public void calculateNeuronCounts() {
this.inputNeuronCount = 0;
this.outputNeuronCount = 0;
for (final TemporalDataDescription desc : this.descriptions) {
if (desc.isInput()) {
this.inputNeuronCount += this.inputWindowSize;
}
if (desc.isPredict()) {
this.outputNeuronCount += this.predictWindowSize;
}
}
}
/**
* Calculate how many points are in the high and low range. These are the
* points that the training set will be generated on.
*
* @return The number of points.
*/
public int calculatePointsInRange() {
int result = 0;
for (final TemporalPoint point : this.points) {
if (isPointInRange(point)) {
result++;
}
}
return result;
}
/**
* Calculate the index to start at.
*
* @return the starting index.
*/
public int calculateStartIndex() {
for (int i = 0; i < this.points.size(); i++) {
final TemporalPoint point = this.points.get(i);
if (isPointInRange(point)) {
return i;
}
}
return -1;
}
/**
* Clear the entire dataset.
*/
public void clear() {
this.descriptions.clear();
this.points.clear();
getData().clear();
}
/**
* Create a temporal point from a time. Using the granularity each date is
* given a unique sequence number. No two dates that fall in the same
* granularity should be specified.
*
* @param when
* The time that this point should be created at.
* @return The point TemporalPoint created.
*/
public TemporalPoint createPoint(final Date when) {
final int sequence = getSequenceFromDate(when);
final TemporalPoint point = new TemporalPoint(this.descriptions.size());
point.setSequence(sequence);
this.points.add(point);
return point;
}
/**
* Create a temporal data point using a sequence number. They can also be
* created using time. No two points should have the same sequence number.
*
* @param sequence
* The sequence number.
* @return A new TemporalPoint object.
*/
public TemporalPoint createPoint(final int sequence) {
final TemporalPoint point = new TemporalPoint(this.descriptions.size());
point.setSequence(sequence);
this.points.add(point);
return point;
}
/**
* Format data according to the type specified in the description.
*
* @param desc
* The data description.
* @param index
* The index to format the data at.
* @return The formatted data.
*/
private double formatData(final TemporalDataDescription desc,
final int index) {
final double[] result = new double[1];
switch (desc.getType()) {
case DELTA_CHANGE:
result[0] = getDataDeltaChange(desc, index);
break;
case PERCENT_CHANGE:
result[0] = getDataPercentChange(desc, index);
break;
case RAW:
result[0] = getDataRAW(desc, index);
break;
default:
throw new TemporalError("Unsupported data type.");
}
if (desc.getActivationFunction() != null) {
desc.getActivationFunction().activationFunction(result,0,result.length);
}
return result[0];
}
/**
* Generate the training sets.
*/
public void generate() {
sortPoints();
// add one to the start index so we are "one ahead", needed to calculate DELTA, if that encoding is chosen.
final int start = calculateStartIndex() + 1;
final int setSize = calculateActualSetSize();
final int range = start + setSize - this.predictWindowSize
- this.inputWindowSize;
for (int i = start; i < range; i++) {
final BasicMLData input = generateInputNeuralData(i);
final BasicMLData ideal = generateOutputNeuralData(i
+ this.inputWindowSize);
final BasicMLDataPair pair = new BasicMLDataPair(input,
ideal);
super.add(pair);
}
}
/**
* Generate input neural data for the specified index.
*
* @param index
* The index to generate neural data for.
* @return The input neural data generated.
*/
public BasicNeuralData generateInputNeuralData(final int index) {
final BasicNeuralData result = new BasicNeuralData(
this.inputNeuronCount);
int resultIndex = 0;
for (int i = 0; i < this.inputWindowSize; i++) {
int descriptionIndex = 0;
for (final TemporalDataDescription desc : this.descriptions) {
if (desc.isInput()) {
result.setData(resultIndex++, formatData(desc, index + i));
}
descriptionIndex++;
}
}
return result;
}
/**
* Generate neural ideal data for the specified index.
*
* @param index
* The index to generate for.
* @return The neural data generated.
*/
public BasicNeuralData generateOutputNeuralData(final int index) {
if (index + this.predictWindowSize > this.points.size()) {
final String str = "Can't generate prediction temporal data "
+ "beyond the end of provided data.";
throw new TemporalError(str);
}
final BasicNeuralData result = new BasicNeuralData(
this.outputNeuronCount);
int resultIndex = 0;
for (int i = 0; i < this.predictWindowSize; i++) {
int descriptionIndex = 0;
for (final TemporalDataDescription desc : this.descriptions) {
if (desc.isPredict()) {
result.setData(resultIndex++, formatData(desc, index + i));
}
descriptionIndex++;
}
}
return result;
}
/**
* Get data between two points in delta form.
*
* @param desc
* The data description.
* @param index
* The index to get data from.
* @return The requested data.
*/
private double getDataDeltaChange(final TemporalDataDescription desc,
final int index) {
if (index == 0) {
return 0.0;
}
final TemporalPoint point = this.points.get(index);
final TemporalPoint previousPoint = this.points.get(index - 1);
return point.getData(desc.getIndex())
- previousPoint.getData(desc.getIndex());
}
/**
* Get data between two points in percent form.
*
* @param desc
* The data description.
* @param index
* The index to get data from.
* @return The requested data.
*/
private double getDataPercentChange(final TemporalDataDescription desc,
final int index) {
if (index == 0) {
return 0.0;
}
final TemporalPoint point = this.points.get(index);
final TemporalPoint previousPoint = this.points.get(index - 1);
final double currentValue = point.getData(desc.getIndex());
final double previousValue = previousPoint.getData(desc.getIndex());
return (currentValue - previousValue) / previousValue;
}
/**
* Get data between two points in raw form.
*
* @param desc
* The data description.
* @param index
* The index to get data from.
* @return The requested data.
*/
private double getDataRAW(final TemporalDataDescription desc,
final int index) {
// Note: The reason that we subtract 1 from the index is because we are always one ahead.
// This allows the DELTA change formatter to work. DELTA change requires two timeslices,
// so we have to be one ahead. RAW only requires one, so we shift backwards.
final TemporalPoint point = this.points.get(index-1);
return point.getData(desc.getIndex());
}
/**
* @return A list of the data descriptions.
*/
public List<TemporalDataDescription> getDescriptions() {
return this.descriptions;
}
/**
* @return the desiredSetSize
*/
public int getDesiredSetSize() {
return this.desiredSetSize;
}
/**
* @return the highSequence
*/
public int getHighSequence() {
return this.highSequence;
}
/**
* @return the inputNeuronCount
*/
public int getInputNeuronCount() {
return this.inputNeuronCount;
}
/**
* @return the inputWindowSize
*/
public int getInputWindowSize() {
return this.inputWindowSize;
}
/**
* @return the lowSequence
*/
public int getLowSequence() {
return this.lowSequence;
}
/**
* @return the outputNeuronCount
*/
public int getOutputNeuronCount() {
return this.outputNeuronCount;
}
/**
* @return The temporal points.
*/
public List<TemporalPoint> getPoints() {
return this.points;
}
/**
* @return the predictWindowSize
*/
public int getPredictWindowSize() {
return this.predictWindowSize;
}
/**
* Create a sequence number from a time. The first date will be zero, and
* subsequent dates will be increased according to the grandularity
* specified.
*
* @param when
* The date to generate the sequence number for.
* @return A sequence number.
*/
public int getSequenceFromDate(final Date when) {
int sequence;
if (this.startingPoint != null) {
final TimeSpan span = new TimeSpan(this.startingPoint, when);
sequence = (int) span.getSpan(this.sequenceGrandularity);
} else {
this.startingPoint = when;
sequence = 0;
}
return sequence;
}
/**
* @return the sequenceGrandularity
*/
public TimeUnit getSequenceGrandularity() {
return this.sequenceGrandularity;
}
/**
* @return the startingPoint
*/
public Date getStartingPoint() {
return this.startingPoint;
}
/**
* Is the specified point within the range. If a point is in the selection
* range, then the point will be used to generate the training sets.
*
* @param point
* The point to consider.
* @return True if the point is within the range.
*/
public boolean isPointInRange(final TemporalPoint point) {
return (point.getSequence() >= getLowSequence())
&& (point.getSequence() <= getHighSequence());
}
/**
* @param desiredSetSize
* the desiredSetSize to set
*/
public void setDesiredSetSize(final int desiredSetSize) {
this.desiredSetSize = desiredSetSize;
}
/**
* @param highSequence
* the highSequence to set
*/
public void setHighSequence(final int highSequence) {
this.highSequence = highSequence;
}
/**
* @param inputWindowSize
* the inputWindowSize to set
*/
public void setInputWindowSize(final int inputWindowSize) {
this.inputWindowSize = inputWindowSize;
}
/**
* @param lowSequence
* the lowSequence to set
*/
public void setLowSequence(final int lowSequence) {
this.lowSequence = lowSequence;
}
/**
* @param predictWindowSize
* the predictWindowSize to set
*/
public void setPredictWindowSize(final int predictWindowSize) {
this.predictWindowSize = predictWindowSize;
}
/**
* @param sequenceGrandularity
* the sequenceGrandularity to set
*/
public void setSequenceGrandularity(final TimeUnit sequenceGrandularity) {
this.sequenceGrandularity = sequenceGrandularity;
}
/**
* @param startingPoint
* the startingPoint to set
*/
public void setStartingPoint(final Date startingPoint) {
this.startingPoint = startingPoint;
}
/**
* Sort the points.
*/
public void sortPoints() {
Collections.sort(this.points);
}
}