/*
* Copyright (c) 2010 Pentaho Corporation. All rights reserved.
* This software was developed by Pentaho Corporation and is provided under the terms
* of the GNU Lesser General Public License, Version 2.1. You may not use
* this file except in compliance with the license. If you need a copy of the license,
* please go to http://www.gnu.org/licenses/lgpl-2.1.txt. The Original Code is Time Series
* Forecasting. The Initial Developer is Pentaho Corporation.
*
* Software distributed under the GNU Lesser Public License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. Please refer to
* the license for the specific language governing your rights and limitations.
*/
/*
* TSLagMaker.java
* Copyright (C) 2010 Pentaho Corporation
*/
package weka.classifiers.timeseries.core;
import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Enumeration;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.SelectedTag;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Add;
import weka.filters.unsupervised.attribute.AddExpression;
import weka.filters.unsupervised.attribute.AddID;
import weka.filters.unsupervised.attribute.Copy;
import weka.filters.unsupervised.attribute.MathExpression;
import weka.filters.unsupervised.attribute.Remove;
import weka.filters.unsupervised.attribute.RenameAttribute;
/**
* A class for creating lagged versions of target variable(s) for use in time
* series forecasting. Uses the TimeseriesTranslate filter. Has options for
* creating averages of consecutive lagged variables (which can be useful for
* long lagged variables). Some polynomials of time are also created (if there
* is a time stamp), such as time^2 and time^3. Also creates cross products
* between time and the lagged and averaged lagged variables. If there is no
* date time stamp in the data then the user has the option of having an
* artificial time stamp created. Time stamps, real or otherwise, are used for
* modeling trends rather than using a differencing-based approach.
*
* Also has routines for dealing with a date timestamp - i.e. it can detect a
* monthly time period (because months are different lengths) and maps date time
* stamps to equal spaced time intervals. For example, in general, a date time
* stamp is remapped by subtracting the first observed value and adding this
* value divided by the constant delta (difference between consecutive steps) to
* the result. In the case of a detected monthly time period, the remapping
* involves subtracting the base year and then adding to this the number of the
* month within the current year plus twelve times the number of intervening
* years since the base year.
*
* Also has routines for adding new attributes derived from a date time stamp to
* the data - e.g. AM indicator, day of the week, month, quarter etc. In the
* case where there is no real data time stamp, the user may specify a nominal
* periodic variable (if one exists in the data). For example, month might be
* coded as a nominal value. In this case it can be specified as the primary
* periodic variable. The point is, that in all these cases (nominal periodic
* and date-derived periodics), we are able to determine what the value of these
* variables will be in future instances (as computed from the last known
* historic instance).
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
* @version $Revision: 51180 $
*/
public class TSLagMaker implements Serializable {
/** For serialization */
private static final long serialVersionUID = -1697901820770907975L;
/** The names of the fields to create lagged variables for */
protected List<String> m_fieldsToLag = null;
/**
* The names of the fields to be considered "overlay" data - i.e. we will be
* supplied with values for these for future instances.
*/
protected List<String> m_overlayFields = null;
/** The minimum lag */
protected int m_minLag = 1;
/** The maximum lag */
protected int m_maxLag = 12;
/**
* Optionally fine tune the selection of lagged attributes within the min and
* max via a range string.
*/
protected String m_lagFineTune = "";
/**
* Whether to replace a number of consecutive long lagged variables with their
* average.
*/
protected boolean m_averageConsecutiveLongLags = false;
/**
* If replacing long lagged variables with averages, do so for those long
* lagged variables with lag greater than this
*/
protected int m_averageLagsAfter = 2;
/**
* How many consecutive lagged variables to average, if averaging long lagged
* variables
*/
protected int m_numConsecutiveToAverage = 2;
/** The name of the timestamp attribute (if there is one) */
protected String m_timeStampName = "";
/**
* Whether to adjust for trends. If a timestamp attribute is named then
* adjusting for trends will occur. If there is no timestamp attribute in the
* data, then turning this on will result in an artificial timestamp attribute
* getting added to the data.
*/
protected boolean m_adjustForTrends = true;
/**
* Whether to stabilize the variance in the field to be forecast by applying a
* log transform
*/
protected boolean m_adjustForVariance = false;
/** True if an artificial time index has been added to the data */
protected boolean m_useArtificialTimeIndex = false;
/** Include time/lag interaction terms? */
protected boolean m_includeTimeLagCrossProducts = true;
/** artificial time and last known real time value */
protected double m_lastTimeValue = -1;
/**
* Used to add an artificial time attribute to the data if the user has
* selected to adjust for trends and there isn't a time stamp in the data
*/
protected AddID m_artificialTimeMaker;
/** Filters for creating the various lagged and derived attributes */
protected List<Filter> m_varianceAdjusters;
protected List<Filter> m_lagMakers;
protected List<Filter> m_averagedLagMakers;
protected List<Filter> m_timeIndexMakers;
protected List<Filter> m_timeLagCrossProductMakers;
protected Remove m_extraneousAttributeRemover;
/** The name of the primary periodic attribute */
protected String m_primaryPeriodicName = "";
/**
* Holds a map of primary periodic values as keys and their immediate
* successors (chronologically) as values. The primary periodic attribute (if
* available) should relate to the time interval of the instances (e.g.
* hourly, daily, monthly etc.).
*/
protected Map<String, String> m_primaryPeriodicSequence;
/**
* A map (keyed by attribute) of maps for looking up the values of secondary
* periodic attribute values that correspond to the values of the primary
* periodic attribute
*/
protected Map<Attribute, Map<String, String>> m_secondaryPeriodicLookups;
protected Instances m_originalHeader;
/**
* This holds the most recent (time wise) training or primed instance. We can
* use it to determine the t+1 periodic value for the primary periodic
* attribute
*/
protected Instance m_lastHistoricInstance;
/** pre-defined fields that can be derived from a genuine date time stamp */
protected boolean m_am = false;
protected boolean m_dayOfWeek = false;
protected boolean m_weekend = false;
protected boolean m_monthOfYear = false;
protected boolean m_quarter = false;
protected boolean m_dayOfMonth = false;
protected boolean m_numDaysInMonth = false;
/** custom defined fields that can be derived from a genuine date time stamp */
protected Map<String, ArrayList<CustomPeriodicTest>> m_customPeriodics;
protected List<Filter> m_derivedPeriodicMakers;
// protected boolean m_advanceTimeStampByMonth = false;
protected PeriodicityHandler m_dateBasedPeriodicity = new PeriodicityHandler();
protected Periodicity m_userHintPeriodicity = Periodicity.UNKNOWN;
/**
* Delete instances from the start of the transformed series where lagged
* variables are missing? Default leaves missing value handling to the base
* learner.
*/
protected boolean m_deleteMissingFromStartOfSeries = false;
/** Stores the first time stamp value in the data */
protected long m_dateTimeStampBase;
protected Add m_addDateMap;
/**
* Holds the difference between the time stamps for the two most recent
* training instances or the average difference over consecutive training
* instances if the differences are not constant. Either this or date
* arithmetic (to advance time stamp by month) is used to advance the
* timestamp for future instances.
*/
// protected double m_deltaTime = -1;
/**
* Date time stamps that should be skipped - i.e. not considered as an
* increment. E.g financial markets don't trade on the weekend, so the
* difference between friday closing and the following monday closing is one
* time unit (and not three). Can accept strings such as "sat", "sunday",
* "jan", "august", or explicit dates (with optional formatting string) such
* as "2011-07-04@yyyy-MM-dd", or integers. Integers are interpreted with
* respect to the periodicity - e.g for daily data they are interpreted as day
* of the year; for hourly data, hour of the day; weekly data, week of the
* year.
*/
protected String m_skipEntries;
/** Default formatting string for explicit dates in the skip list */
protected String m_dateFormat = "yyyy-MM-dd'T'HH:mm:ss";
/**
* Reset the lag maker.
*/
public void reset() {
m_artificialTimeMaker = null;
m_varianceAdjusters = null;
m_lagMakers = null;
m_averagedLagMakers = null;
m_timeIndexMakers = null;
m_timeLagCrossProductMakers = null;
m_derivedPeriodicMakers = null;
m_extraneousAttributeRemover = null;
m_lastTimeValue = -1;
// m_deltaTime = -1;
// m_dateBasedPeriodicity = Periodicity.UNKNOWN;
// m_skipEntries = null;
// m_dateFormat = "yyyy-MM-dd'T'HH:mm:ss";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration<Option> listOptions() {
Vector<Option> newVector = new Vector<Option>();
newVector.add(new Option("\tSet the fields to lag.", "F", 1,
"-F <comma separated list of names>"));
newVector.add(new Option("\tSet the fields to be considered "
+ "as overlay data.", "overlay", 1,
"-overlay <comma separated list of names>"));
newVector.add(new Option("\tSet the minimum lag length to generate."
+ "\n\t(default = 1)", "L", 1, "-L <num>"));
newVector.add(new Option("\tSet the maximum lag length to generate."
+ "\n\t(default = 12)", "M", 1, "-M <num>"));
newVector.add(new Option("\tAverage consecutive long lags.", "A", 0, "-A"));
newVector.add(new Option("\tAverage those lags longer than this number of"
+ "time steps.\n\tUse in conjuction with -A is selected.\n\t"
+ "(default = 2)", "B", 1, "-B <num>"));
newVector.add(new Option("\tFine tune selection of lags within min and "
+ "max by specifying" + " ranges", "R", 1, "-R <ranges>"));
newVector.add(new Option("\tAverage this many consecutive long lags.\n\t"
+ "Use in conjuction with -B (default = 2)", "C", 1, "-C <num>"));
newVector.add(new Option("\tDon't adjust for trends.", "Z", 0, "-Z"));
newVector.add(new Option("\tSpecify the name of the timestamp field", "G",
1, "-G <timestamp name>"));
newVector.add(new Option("\tAdjust for variance.", "V", 0, "-V"));
newVector.add(new Option(
"\tAdd an AM/PM indicator (requires a date timestamp)", "am-pm", 0,
"-am-pm"));
newVector.add(new Option("\tAdd a day of the week field (requres a date"
+ " timestamp)", "dayofweek", 0, "-dayofweek"));
newVector.add(new Option("\tAdd a day of the month field (requres a date"
+ " timestamp)", "dayofmonth", 0, "-dayofmonth"));
newVector.add(new Option(
"\tAdd a number of days in the month field (requres a date"
+ " timestamp)", "numdaysinmonth", 0, "-numdaysinmonth"));
newVector.add(new Option(
"\tAdd a weekend indicator (requires a date timestamp)", "weekend", 0,
"-weekend"));
newVector.add(new Option("\tAdd a month field (requires a date timestamp)",
"month", 0, "-month"));
newVector.add(new Option("\tAdd a quarter of the year field ("
+ "requires a date timestamp)", "quarter", 0, "-quarter"));
newVector.add(new Option("\tAdd a custom date-derived boolean field ("
+ "requires a date timestamp).\n\tFormat: \"fieldName="
+ "Test Test|Test Test| ...\n\twhere "
+ "Test=OPERATORyear:month:week-of-yr:week-of-month:"
+ "day-of-yr:day-of-month:day-of-week:hour:min:second\n\te.g."
+ "XmasHoliday=>:dec::::24::: <:jan::::3:::\n\t"
+ "Legal OPERATORs are =,>,<,>=,<=. For = operator only\n\t"
+ "one Test is needed rather than a pair.\n\tThis option may"
+ " be specified more than once on the command line\n\t"
+ "in order to define multiple variables.", "custom", 1, "-custom"));
newVector
.add(new Option(
"\tAdd a comma-separated 'skip' list of dates that should not\n\t"
+ "be considered as a time step. Days of the week,\n\t"
+ "months of the year, 'weekend', integers (indicating day of year\n\t"
+ ", hour of day etc.) or specific dates are all valid entries.\n\t"
+ "E.g sat,sun,27-08-2011,28-08-2011", "skip", 1, "-skip"));
return newVector.elements();
}
/**
* Creates a Range object for the user-specified lag range String
*
* @param lagRange a range as a String
* @return a Range object
* @throws Exception if the supplied range is illegal with respect to the min
* and max lag values.
*/
protected Range getLagRangeSelection(String lagRange) throws Exception {
Range r = new Range(lagRange);
try {
r.setUpper(m_maxLag);
} catch (IllegalArgumentException e) {
throw new Exception("The lag selection range '" + lagRange + "' is"
+ "illegal with respect to the specified min and max" + "lags.");
}
// still need to check against the min
int[] selectedIndexes = r.getSelection();
int max = selectedIndexes[Utils.maxIndex(selectedIndexes)] + 1;
int min = selectedIndexes[Utils.minIndex(selectedIndexes)] + 1;
if (max < m_minLag || min > m_maxLag) {
throw new Exception("The lag selection range '" + lagRange + "' is"
+ "illegal with respect to the specified min and max" + "lags.");
}
return r;
}
/**
* Parses a given list of options.
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String fieldsToLag = Utils.getOption('F', options);
if (fieldsToLag.length() == 0) {
throw new Exception("Must specify the name of at least one field "
+ "to create lags for!");
}
String[] fieldNames = fieldsToLag.split(",");
List<String> fieldList = new ArrayList<String>();
for (String f : fieldNames) {
fieldList.add(f);
}
setFieldsToLag(fieldList);
String overlayFields = Utils.getOption("overlay", options);
if (overlayFields.length() > 0) {
String[] names = overlayFields.split(",");
List<String> nameList = new ArrayList<String>();
for (String f : names) {
nameList.add(f);
}
setOverlayFields(nameList);
}
String minL = Utils.getOption('L', options);
if (minL.length() > 0) {
int mL = Integer.parseInt(minL);
setMinLag(mL);
if (mL < 1) {
throw new Exception("Minimum lag can't be less than 1!");
}
}
String maxL = Utils.getOption('M', options);
if (maxL.length() > 0) {
int mL = Integer.parseInt(maxL);
setMaxLag(mL);
}
if (getMaxLag() < getMinLag()) {
throw new Exception("Can't have the maximum lag set lower than the "
+ "minimum lag!");
}
String lagRange = Utils.getOption('R', options);
m_lagFineTune = lagRange;
if (m_lagFineTune.length() > 0) {
// check the range for consistency with respect to min and max
getLagRangeSelection(lagRange);
}
boolean avLongLags = Utils.getFlag('A', options);
setAverageConsecutiveLongLags(avLongLags);
String avLongerThan = Utils.getOption('B', options);
if (avLongerThan.length() > 0) {
int avL = Integer.parseInt(avLongerThan);
if (avL < getMinLag() || avL > getMaxLag()) {
throw new Exception("Average consecutive long lags value can't "
+ "be less than the minimum lag or greater than the "
+ "maximum lag!");
}
setAverageLagsAfter(avL);
}
String consecutiveLongLagS = Utils.getOption('C', options);
if (consecutiveLongLagS.length() > 0) {
int consecutive = Integer.parseInt(consecutiveLongLagS);
if (consecutive < 1 || consecutive > (getMaxLag() - getMinLag())) {
throw new Exception("Number of consecutive long lags to average "
+ "must be greater than 0 and less than "
+ (getMaxLag() - getMinLag()));
}
setNumConsecutiveLongLagsToAverage(consecutive);
}
boolean dontAdjTrends = Utils.getFlag('Z', options);
setAdjustForTrends(!dontAdjTrends);
boolean adjVariance = Utils.getFlag("V", options);
setAdjustForVariance(adjVariance);
String timeStampF = Utils.getOption('G', options);
if (timeStampF.length() > 0) {
setTimeStampField(timeStampF);
}
setAddAMIndicator(Utils.getFlag("am-pm", options));
setAddDayOfWeek(Utils.getFlag("dayofweek", options));
setAddDayOfMonth(Utils.getFlag("dayofmonth", options));
setAddNumDaysInMonth(Utils.getFlag("numdaysinmonth", options));
setAddWeekendIndicator(Utils.getFlag("weekend", options));
setAddMonthOfYear(Utils.getFlag("month", options));
setAddQuarterOfYear(Utils.getFlag("quarter", options));
// custom date-derived periodic fields
String customPeriodic = Utils.getOption("custom", options);
while (customPeriodic.length() > 0) {
addCustomPeriodic(customPeriodic);
}
String primaryPeriodicN = Utils.getOption("periodic", options);
if (primaryPeriodicN.length() > 0) {
setPrimaryPeriodicFieldName(primaryPeriodicN);
}
String skipString = Utils.getOption("skip", options);
if (skipString.length() > 0) {
setSkipEntries(skipString);
}
}
/**
* Gets the current settings of the LagMaker.
*
* @return an array of strings suitable for passing to setOptions
*/
public String[] getOptions() {
ArrayList<String> options = new ArrayList<String>();
List<String> fieldsToLag = getFieldsToLag();
options.add("-F");
options.add(fieldsToLag.toString());
if (getOverlayFields() != null && getOverlayFields().size() > 0) {
options.add("-O");
options.add(getOverlayFields().toString());
}
options.add("-L");
options.add("" + getMinLag());
options.add("-M");
options.add("" + getMaxLag());
if (m_lagFineTune.length() > 0) {
options.add("-R");
options.add(getLagRange());
}
if (getAverageConsecutiveLongLags()) {
options.add("-A");
options.add("-B");
options.add("" + getAverageLagsAfter());
options.add("-C");
options.add("" + getNumConsecutiveLongLagsToAverage());
}
if (!getAdjustForTrends()) {
options.add("-Z");
}
if (getAdjustForVariance()) {
options.add("-V");
}
if (getTimeStampField() != null && getTimeStampField().length() > 0) {
options.add("-G");
options.add(getTimeStampField());
}
if (getAddAMIndicator()) {
options.add("-am-pm");
}
if (getAddDayOfWeek()) {
options.add("-dayofweek");
}
if (getAddDayOfMonth()) {
options.add("-dayofmonth");
}
if (getAddNumDaysInMonth()) {
options.add("-numdaysinmonth");
}
if (getAddWeekendIndicator()) {
options.add("-weekend");
}
if (getAddMonthOfYear()) {
options.add("-month");
}
if (getAddQuarterOfYear()) {
options.add("-quarter");
}
if (getSkipEntries() != null && getSkipEntries().length() > 0) {
options.add("-skip");
options.add(getSkipEntries());
}
if (m_customPeriodics != null && m_customPeriodics.keySet().size() > 0) {
for (String name : m_customPeriodics.keySet()) {
List<CustomPeriodicTest> tests = m_customPeriodics.get(name);
options.add("-custom");
StringBuffer tempBuff = new StringBuffer();
tempBuff.append("\"");
for (int i = 0; i < tests.size(); i++) {
tempBuff.append(tests.get(i).toString());
if (i < tests.size() - 1) {
tempBuff.append("|");
} else {
tempBuff.append("\"");
}
}
options.add(tempBuff.toString());
}
}
return options.toArray(new String[1]);
}
/**
* Get the date-derived custom periodic attributes in use.
*
* @return a Map, keyed by field name, of custom date-derived periodic fields.
*/
public Map<String, ArrayList<CustomPeriodicTest>> getCustomPeriodics() {
return m_customPeriodics;
}
/**
* Add a custom date-derived periodic
*
* @param customPeriodic the new custom date-derived periodic in textual form.
*/
public void addCustomPeriodic(String customPeriodic) {
if (m_customPeriodics == null) {
m_customPeriodics = new HashMap<String, ArrayList<CustomPeriodicTest>>();
}
ArrayList<CustomPeriodicTest> tests = new ArrayList<CustomPeriodicTest>();
int nameSplit = customPeriodic.indexOf('=');
String fieldName = customPeriodic.substring(0, nameSplit);
customPeriodic = customPeriodic.substring(nameSplit + 1,
customPeriodic.length());
String[] parts = customPeriodic.split("|");
for (String p : parts) {
CustomPeriodicTest c = new CustomPeriodicTest(p);
tests.add(c);
}
m_customPeriodics.put(fieldName, tests);
}
/**
* Clear all custom date-derived periodic fields.
*/
public void clearCustomPeriodics() {
m_customPeriodics = null;
}
/**
* Set the date-derived custom periodic fields to use/compute
*
* @param custom a Map, keyed by field name, of custom date-derived periodic
* fields to use.
*/
public void setCustomPeriodics(
Map<String, ArrayList<CustomPeriodicTest>> custom) {
m_customPeriodics = custom;
}
/**
* Set the names of the fields to create lagged variables for
*
* @param names a List of field names for which to create lagged variables
* @throws Exception if a problem occurs
*/
public void setFieldsToLag(List<String> names) throws Exception {
m_fieldsToLag = names;
}
/**
* Get the names of the fields to create lagged variables for.
*
* @return a List of field names for which lagged variables will be created.
*/
public List<String> getFieldsToLag() {
return m_fieldsToLag;
}
/**
* Set the names of fields in the data that are to be considered "overlay"
* fields - i.e. they will be externally provided for future instances.
*
* @param overlayNames the names of the fields that are to be considered
* "overlay" fields
*/
public void setOverlayFields(List<String> overlayNames) {
m_overlayFields = overlayNames;
}
/**
* Get overlay fields
*
* @return a list of field names that are set as "overlay" fields
*/
public List<String> getOverlayFields() {
return m_overlayFields;
}
/**
* Set the name of the time stamp field in the data
*
* @param name the name of the time stamp field
*/
public void setTimeStampField(String name) {
m_timeStampName = name;
/*
* if (name == null || name.length() == 0) { m_useArtificialTimeIndex =
* false; } else { m_useArtificialTimeIndex = true; }
*/
}
/**
* Get the name of the time stamp field.
*
* @return the name of the time stamp field or null if one hasn't been
* specified.
*/
public String getTimeStampField() {
return m_timeStampName;
}
/**
* Set whether to adjust for trends or not. If there is no time stamp field
* specified, and this is set to true, then an artificial time stamp will be
* created.
*
* @param a true if we are to adjust for trends via a real or artificial time
* stamp
*/
public void setAdjustForTrends(boolean a) {
m_adjustForTrends = a;
}
/**
* Returns true if we are adjusting for trends via a real or artificial time
* stamp.
*
* @return true if we are adjusting for trends via a real or artificial time
* stamp in the data.
*/
public boolean getAdjustForTrends() {
return m_adjustForTrends;
}
/**
* Set whether to adjust for variance in the data by taking the log of the
* target(s).
*
* @param v true to adjust for variance by taking the log of the target(s).
*/
public void setAdjustForVariance(boolean v) {
m_adjustForVariance = v;
}
/**
* Returns true if we are adjusting for variance by taking the log of the
* target(s).
*
* @return true if we are adjusting for variance.
*/
public boolean getAdjustForVariance() {
return m_adjustForVariance;
}
/**
* Set ranges by which to fine-tune the creation of lagged attributes.
*
* @param ranges a list of ranges as a string
*/
public void setFineTuneLags(String ranges) {
m_lagFineTune = ranges;
}
/**
* Get the ranges used to fine tune the creation of lagged attributes.
*
* @return the ranges as a string
*/
public String getFineTuneLags() {
return m_lagFineTune;
}
/**
* Set the minimum lag to create (default = 1, i.e. t-1).
*
* @param min the minimum lag to create
*/
public void setMinLag(int min) {
m_minLag = min;
}
/**
* Get the minimum lag to create.
*
* @return the minimum lag to create.
*/
public int getMinLag() {
return m_minLag;
}
/**
* Set the maximum lag to create (default = 12, i.e. t-12).
*
* @param max the maximum lag to create.
*/
public void setMaxLag(int max) {
m_maxLag = max;
}
/**
* Get the maximum lag to create.
*
* @return the maximum lag to create.
*/
public int getMaxLag() {
return m_maxLag;
}
/**
* Set ranges to fine tune lag selection.
*
* @param lagRange a set of ranges (e.g. 2,3,4,7-9).
*/
public void setLagRange(String lagRange) {
m_lagFineTune = lagRange;
}
/**
* Get the ranges used to fine tune lag selection
*
* @return the ranges (if any) used to fine tune lag selection
*/
public String getLagRange() {
return m_lagFineTune;
}
/**
* Sets whether to average consecutive long lagged variables. Setting this to
* true creates new variables that are averages of long lags and the original
* lagged variables involved are removed.
*
* @param avg true if consecutive long lags are to be averaged.
*/
public void setAverageConsecutiveLongLags(boolean avg) {
m_averageConsecutiveLongLags = avg;
}
/**
* Returns true if consecutive long lagged variables are to be averaged.
*
* @return true if consecutive long lagged variables are to be averaged.
*/
public boolean getAverageConsecutiveLongLags() {
return m_averageConsecutiveLongLags;
}
/**
* Set at which point consecutive long lagged variables are to be averaged
* (default = 2, i.e. start replacing lagged variables after t-2 with
* averages).
*
* @param a the point at which to start averaging consecutive long lagged
* variables.
*/
public void setAverageLagsAfter(int a) {
m_averageLagsAfter = a;
}
/**
* Return the point after which long lagged variables will be averaged.
*
* @return the point after which long lagged variables will be averaged.
*/
public int getAverageLagsAfter() {
return m_averageLagsAfter;
}
/**
* Set the number of long lagged variables to average for each averaged
* variable created (default = 2, e.g. a set average after value of 2 and a
* num consecutive to average = 2 will average t-3 and t-4 into a new
* variable, t-5 and t-6 into a new variable ect.
*
* @param c the number of consecutive long lagged variables to average.
*/
public void setNumConsecutiveLongLagsToAverage(int c) {
m_numConsecutiveToAverage = c;
}
/**
* Get the number of consecutive long lagged variables to average.
*
* @return the number of long lagged variables to average.
*/
public int getNumConsecutiveLongLagsToAverage() {
return m_numConsecutiveToAverage;
}
/**
* Set the name of a periodic attribute in the data. This attribute has to be
* nominal and cyclic so that it is possible to know what the value will be
* given the current one.
*
* @param p the name of the primary periodic attribute (if any) in the data.
*/
public void setPrimaryPeriodicFieldName(String p) {
m_primaryPeriodicName = p;
}
/**
* The name of the primary periodic attribute or null if one hasn't been
* specified.
*
* @return the name of the primary periodic attribute or null if one hasn't
* been specified.
*/
public String getPrimaryPeriodicFieldName() {
return m_primaryPeriodicName;
}
/**
* Set whether to create an AM indicator attribute. Has no effect if there
* isn't a date-based time stamp in the data.
*
* @param am true if an AM indicator attribute is to be created.
*/
public void setAddAMIndicator(boolean am) {
m_am = am;
}
/**
* Return true if an AM indicator attribute is to be created.
*
* @return true if an AM indiciator attribute is to be created.
*/
public boolean getAddAMIndicator() {
return m_am;
}
/**
* Set whether to create a day of the week attribute. Has no effect if there
* isn't a date-based time stamp in the data.
*
* @param d true if a day of the week attribute is to be created.
*/
public void setAddDayOfWeek(boolean d) {
m_dayOfWeek = d;
}
/**
* Return true if a day of the week attribute is to be created.
*
* @return true if a day of the week attribute is to be created.
*/
public boolean getAddDayOfWeek() {
return m_dayOfWeek;
}
/**
* Set whether to create a day of the month attribute. Has no effect if there
* isn't a date-based time stamp in the data.
*
* @param d true if a day of the month attribute is to be created.
*/
public void setAddDayOfMonth(boolean d) {
m_dayOfMonth = d;
}
/**
* Return true if a day of the month attribute is to be created.
*
* @return true if a day of the month attribute is to be created.
*/
public boolean getAddDayOfMonth() {
return m_dayOfMonth;
}
/**
* Set whether to create a numeric attribute that holds the number of days in
* the month.
*
* @param d true if a num days in month attribute is to be created.
*/
public void setAddNumDaysInMonth(boolean d) {
m_numDaysInMonth = d;
}
/**
* Return true if a num days in the month attribute is to be created.
*
* @return true if a num days in the month attribute is to be created.
*/
public boolean getAddNumDaysInMonth() {
return m_numDaysInMonth;
}
/**
* Set whether to create a weekend indicator attribute. Has no effect if there
* isn't a date-based time stamp in the data.
*
* @param w true if a weekend indicator attribute is to be created.
*/
public void setAddWeekendIndicator(boolean w) {
m_weekend = w;
}
/**
* Returns true if a weekend indicator attribute is to be created.
*
* @return true if a weekend indicator attribute is to be created.
*/
public boolean getAddWeekendIndicator() {
return m_weekend;
}
/**
* Set whether to create a month of the year attribute. Has no effect if there
* isn't a date-based time stamp in the data.
*
* @param m true if a month of the year attribute is to be created.
*/
public void setAddMonthOfYear(boolean m) {
m_monthOfYear = m;
}
/**
* Returns true if a month of the year attribute is to be created.
*
* @return true if a month of the year attribute is to be created.
*/
public boolean getAddMonthOfYear() {
return m_monthOfYear;
}
/**
* Set whether to create a quarter attribute. Has no effect if there isn't a
* date-based time stamp in the data.
*
* @param q true if a quarter attribute is to be added.
*/
public void setAddQuarterOfYear(boolean q) {
m_quarter = q;
}
/**
* Returns true if a quarter attribute is to be created.
*
* @return true if a quarter attribute is to be created.
*/
public boolean getAddQuarterOfYear() {
return m_quarter;
}
/**
* Returns true if an artificial time index is in use.
*
* @return true if an artificial time index is in use.
*/
public boolean isUsingAnArtificialTimeIndex() {
return m_useArtificialTimeIndex;
}
/**
* Set the starting value for the artificial time stamp.
*
* @param value the value to initialize the artificial time stamp with.
* @throws Exception if an artificial time stamp is not being used.
*/
public void setArtificialTimeStartValue(double value) throws Exception {
if (isUsingAnArtificialTimeIndex()) {
m_lastTimeValue = value;
} else {
throw new Exception("Not using an artificial time index");
}
}
/**
* Returns the current value of the artificial time stamp. After training,
* after priming, and prior to forecasting, this will be equal to the number
* of training instances seen.
*
* @return the current value of the artificial time stamp.
* @throws Exception if an artificial time stamp is not being used.
*/
public double getArtificialTimeStartValue() throws Exception {
if (!isUsingAnArtificialTimeIndex()) {
throw new Exception("Not using an artificial time index!");
}
return m_lastTimeValue;
}
/**
* Returns the current (i.e. most recent) time stamp value. Unlike an
* artificial time stamp, the value after training, after priming and before
* forecasting, will be equal to the time stamp of the most recent priming
* instance.
*
* @return the current time stamp value
* @throws Exception if the lag maker is not adjusting for trends or no time
* stamp attribute has been specified.
*/
public double getCurrentTimeStampValue() throws Exception {
if (m_adjustForTrends && m_timeStampName.length() > 0) {
return m_lastTimeValue;
}
throw new Exception("Not using a time stamp!");
}
/**
* Increment the artificial time value with the supplied incrememt value.
*
* @param increment the value to increment by.
*/
public void incrementArtificialTimeValue(int increment) {
m_lastTimeValue += increment;
}
/**
* Return the difference between time values. This may be only approximate for
* periods based on dates. It is best to used date-based arithmetic in this
* case for incrementing/decrementing time stamps.
*
* @return the (average) difference between time values.
*/
public double getDeltaTime() {
return m_dateBasedPeriodicity.deltaTime(); // m_deltaTime;
}
/**
* Gets the Periodicity representing the time stamp in use for this lag maker.
* If the lag maker is not adjusting for trends, or an artificial time stamp
* is being used, then null is returned.
*
* @return the Periodicity in use, or null if the lag maker is not adjusting
* for trends or is using an artificial time stamp.
*/
public Periodicity getPeriodicity() {
if (!m_adjustForTrends || m_useArtificialTimeIndex) {
return null;
}
return m_dateBasedPeriodicity.getPeriodicity();
}
/**
* Set the periodicity for the data. This is ignored if the lag maker is not
* adjusting for trends or is using an artificial time stamp. If not specified
* or set to Periodicity.UNKNOWN (the default) then heuristics will be used to
* try and automatically determine the periodicity.
*
* @param toUse the periodicity to use
*/
public void setPeriodicity(Periodicity toUse) {
m_userHintPeriodicity = toUse;
}
/**
* Set the list of time units to be 'skipped' - i.e. not considered as an
* increment. E.g financial markets don't trade on the weekend, so the
* difference between friday closing and the following monday closing is one
* time unit (and not three). Can accept strings such as "sat", "sunday",
* "jan", "august", or explicit dates (with optional formatting string) such
* as "2011-07-04@yyyy-MM-dd", or integers. Integers are interpreted with
* respect to the periodicity - e.g for daily data they are interpreted as day
* of the year; for hourly data, hour of the day; weekly data, week of the
* year.
*
* @param skipEntries a comma separated list of strings, explicit dates and
* integers.
*/
public void setSkipEntries(String skipEntries) {
m_skipEntries = skipEntries;
}
/**
* Get a list of time units to be 'skipped' - i.e. not considered as an
* increment. E.g financial markets don't trade on the weekend, so the
* difference between friday closing and the following monday closing is one
* time unit (and not three). Can accept strings such as "sat", "sunday",
* "jan", "august", or explicit dates (with optional formatting string) such
* as "2011-07-04@yyyy-MM-dd", or integers. Integers are interpreted with
* respect to the periodicity - e.g for daily data they are interpreted as day
* of the year; for hourly data, hour of the day; weekly data, week of the
* year.
*
* @return a comma-separated list of strings, explicit dates and integers
*/
public String getSkipEntries() {
return m_skipEntries;
}
private List<Object> createLagFiller(Instances insts, String targetName)
throws Exception {
// Classifier lagFiller = new weka.classifiers.functions.LeastMedSq();
Classifier lagFiller = new weka.classifiers.functions.LinearRegression();
ArrayList<Attribute> atts = new ArrayList<Attribute>();
atts.add(new Attribute("time"));
atts.add(new Attribute("target"));
Instances simple = new Instances("simple", atts, insts.numInstances());
int targetIndex = insts.attribute(targetName).index();
for (int i = 0; i < insts.numInstances(); i++) {
double targetValue = insts.instance(i).value(targetIndex);
double time = i;
double[] vals = new double[2];
vals[0] = time;
vals[1] = targetValue;
DenseInstance d = new DenseInstance(1.0, vals);
simple.add(d);
}
simple.setClassIndex(1);
lagFiller.buildClassifier(simple);
System.err.println(lagFiller);
simple = new Instances(simple, 0);
List<Object> results = new ArrayList<Object>();
results.add(lagFiller);
results.add(simple);
return results;
}
private Instances createLags(Instances insts) throws Exception {
if (m_fieldsToLag == null || m_fieldsToLag.get(0).length() == 0) {
throw new Exception("Field to forecast is not specified!");
}
m_lagMakers = new ArrayList<Filter>();
// do we have a fine tuning range for lags?
Range r = null;
int[] rangeIndexes = null;
if (m_lagFineTune.length() > 0) {
r = getLagRangeSelection(m_lagFineTune);
rangeIndexes = r.getSelection();
}
for (int j = 0; j < m_fieldsToLag.size(); j++) {
int classIndex = insts.attribute(m_fieldsToLag.get(j)).index();
if (classIndex < 0) {
throw new Exception("Can't find field '" + m_fieldsToLag.get(j) + "'!");
}
// ---------------------
// List<Object> lagFillerHolder = createLagFiller(insts,
// m_fieldsToLag.get(j));
// Classifier missingLagFiller = (Classifier)lagFillerHolder.get(0);
// Instances lagFillerHeader = (Instances)lagFillerHolder.get(1);
// ---------------------
for (int i = m_minLag; i <= m_maxLag; i++) {
// check against fine tuning ranges if set
if (rangeIndexes != null) {
boolean ok = false;
for (int z = 0; z < rangeIndexes.length; z++) {
if (rangeIndexes[z] + 1 == i) {
ok = true;
break;
}
}
if (!ok) {
continue;
}
}
Copy c = new Copy();
c.setAttributeIndices("" + (classIndex + 1));
c.setInputFormat(insts);
insts = Filter.useFilter(insts, c);
m_lagMakers.add(c);
RenameAttribute rename = new RenameAttribute();
rename.setAttributeIndices("last");
rename.setReplace("Lag_" + m_fieldsToLag.get(j));
rename.setInputFormat(insts);
insts = Filter.useFilter(insts, rename);
m_lagMakers.add(rename);
// AddExpression is convenient to make a copy and set a new name
/*
* AddExpression addE = new AddExpression(); addE.setName("Lag_" +
* m_fieldsToLag.get(j)); addE.setExpression("a" + (classIndex + 1) +
* "*1"); addE.setInputFormat(insts); insts = Filter.useFilter(insts,
* addE); m_lagMakers.add(addE);
*/
// now time shift it
TimeSeriesTranslate timeS = new TimeSeriesTranslate();
timeS.setAttributeIndices("last");
timeS.setInstanceRange(-i);
timeS.setInputFormat(insts);
insts = Filter.useFilter(insts, timeS);
m_lagMakers.add(timeS);
// --------------
// now use the missingLagFiller to project back and fill in
// the unknown values for lag elements before the beginning
// of the series. Our artificial time begins at the start of
// the series at 0.
/*
* int count = 0; int lagIndex = insts.numAttributes() - 1; for (int z =
* -i; z < 0; z++) { double time = z; double[] vals = new double[2];
* vals[0] = time; vals[1] = Utils.missingValue(); DenseInstance d = new
* DenseInstance(1.0, vals); d.setDataset(lagFillerHeader); double
* predictedTarget = missingLagFiller.classifyInstance(d); if
* (insts.instance(count).isMissing(lagIndex)) {
* insts.instance(count).setValue(lagIndex, predictedTarget); } else {
* System
* .err.println("***** lag value is not missing!! (project missing lags)"
* ); } count++; }
*/
// --------------
}
}
// System.err.println(insts);
return insts;
}
private Instances createAveragedLags(Instances insts) throws Exception {
if (!m_averageConsecutiveLongLags) {
m_averagedLagMakers = null;
return insts;
}
if (m_numConsecutiveToAverage > getMaxLag() - getAverageLagsAfter()) {
if (getMaxLag() - getAverageLagsAfter() > 1) {
m_numConsecutiveToAverage = getMaxLag() - getAverageLagsAfter();
} else {
m_averagedLagMakers = null;
return insts;
}
}
m_averagedLagMakers = new ArrayList<Filter>();
int numAtts = insts.numAttributes();
String removeLongLagIndexes = "";
for (int z = 0; z < m_fieldsToLag.size(); z++) {
int firstLagIndex = -1;
// locate the first lagged attribute
for (int i = 0; i < insts.numAttributes(); i++) {
if (insts.attribute(i).name().startsWith("Lag_" + m_fieldsToLag.get(z))) {
firstLagIndex = i;
break;
}
}
if (firstLagIndex < 0) {
throw new Exception("Can't find the first lag attribute for "
+ m_fieldsToLag.get(z) + "!");
}
for (int i = firstLagIndex; i < numAtts;) {
if (!insts.attribute(i).name()
.startsWith("Lag_" + m_fieldsToLag.get(z))) {
// finished
break;
}
// need to parse the lag number out of the name
String lagNumS = insts.attribute(i).name()
.replace("Lag_" + m_fieldsToLag.get(z) + "-", "");
int lagNum = Integer.parseInt(lagNumS);
int lastLagNum = lagNum;
if (/* (i - firstLagIndex + 1) */lagNum > m_averageLagsAfter) {
int attNumber = i + 1;
removeLongLagIndexes += (i + 1) + ",";
String avExpression = "(a" + attNumber;
String avAttName = "Avg(" + insts.attribute(i).name();
int denom = 1;
// build the expression
for (int j = 1; j < m_numConsecutiveToAverage; j++) {
if ((i + j) < insts.numAttributes()
&& insts.attribute(i + j).name()
.startsWith("Lag_" + m_fieldsToLag.get(z))) {
String currNumS = insts.attribute(i + j).name()
.replace("Lag_" + m_fieldsToLag.get(z) + "-", "");
int currentLagNum = Integer.parseInt(currNumS);
// only average consecutive long lags (so truncate
// if there is a jump of more than 1
if (currentLagNum - lastLagNum == 1) {
avExpression += " + a" + (attNumber + j);
avAttName += "," + insts.attribute(i + j).name();
denom++;
removeLongLagIndexes += (i + j + 1) + ",";
lastLagNum = currentLagNum;
} else {
break;
}
} else {
break;
}
}
avExpression += ")/" + denom;
avAttName += ")";
AddExpression addE = new AddExpression();
addE.setName(avAttName);
addE.setExpression(avExpression);
addE.setInputFormat(insts);
insts = Filter.useFilter(insts, addE);
m_averagedLagMakers.add(addE);
i += denom;
} else {
i++;
}
}
}
if (removeLongLagIndexes.length() > 0) {
removeLongLagIndexes = removeLongLagIndexes.substring(0,
removeLongLagIndexes.lastIndexOf(','));
Remove r = new Remove();
r.setAttributeIndices(removeLongLagIndexes);
r.setInputFormat(insts);
insts = Filter.useFilter(insts, r);
m_averagedLagMakers.add(r);
}
return insts;
}
private Instances createTimeIndexes(Instances insts) throws Exception {
m_timeIndexMakers = null;
if (m_timeStampName != null && m_timeStampName.length() > 0
&& m_adjustForTrends) {
int timeStampIndex = insts.attribute(m_timeStampName).index();
if (timeStampIndex < 0) {
throw new Exception("Can't find time stamp attribute '"
+ m_timeStampName + "' in the data!");
}
String timeStampName = m_timeStampName;
if (insts.attribute(timeStampIndex).isDate()) {
// we'll use the remapped one
timeStampIndex = insts.attribute(m_timeStampName + "-remapped").index();
timeStampName += "-remapped";
}
if (!insts.attribute(timeStampIndex).isNumeric()) {
throw new Exception("Time stamp attribute '" + m_timeStampName
+ "' is not numeric!");
}
/*
* Instance first = insts.instance(insts.numInstances() - 1); Instance two
* = insts.instance(insts.numInstances() - 2); m_deltaTime =
* first.value(timeStampIndex) - two.value(timeStampIndex);
*/
m_timeIndexMakers = new ArrayList<Filter>();
AddExpression addE = new AddExpression();
addE.setName(timeStampName + "^2");
addE.setExpression("a" + (timeStampIndex + 1) + "^2");
addE.setInputFormat(insts);
insts = Filter.useFilter(insts, addE);
m_timeIndexMakers.add(addE);
addE = new AddExpression();
addE.setName(timeStampName + "^3");
addE.setExpression("a" + (timeStampIndex + 1) + "^3");
addE.setInputFormat(insts);
insts = Filter.useFilter(insts, addE);
m_timeIndexMakers.add(addE);
}
return insts;
}
public Instances createTimeLagCrossProducts(Instances insts) throws Exception {
m_timeLagCrossProductMakers = null;
if (m_timeStampName == null || m_timeStampName.length() == 0
|| !m_adjustForTrends) {
return insts;
}
int numAtts = insts.numAttributes();
int firstLagIndex = -1;
// locate the first lagged attribute
for (int i = 0; i < numAtts; i++) {
if (insts.attribute(i).name().startsWith("Lag_")) {
firstLagIndex = i;
break;
}
}
if (firstLagIndex < 0) {
m_timeLagCrossProductMakers = null;
return insts;
}
int timeStampIndex = insts.attribute(m_timeStampName).index();
if (timeStampIndex < 0) {
return insts;
}
String timeStampName = m_timeStampName;
if (insts.attribute(timeStampIndex).isDate()) {
// use the remapped one
timeStampIndex = insts.attribute(m_timeStampName + "-remapped").index();
timeStampName += "-remapped";
}
m_timeLagCrossProductMakers = new ArrayList<Filter>();
for (int i = firstLagIndex; i < insts.numAttributes(); i++) {
if (!(insts.attribute(i).name().startsWith("Lag_") || insts.attribute(i)
.name().startsWith("Avg("))) {
break;
}
AddExpression addE = new AddExpression();
addE.setName(timeStampName + "*" + insts.attribute(i).name());
addE.setExpression("a" + (timeStampIndex + 1) + "*a" + (i + 1));
addE.setInputFormat(insts);
insts = Filter.useFilter(insts, addE);
m_timeLagCrossProductMakers.add(addE);
}
return insts;
}
private Instances createVarianceAdjusters(Instances insts) throws Exception {
if (!m_adjustForVariance) {
return insts;
}
if (m_fieldsToLag == null || m_fieldsToLag.get(0).length() == 0) {
throw new Exception("Fields to lag is not specified!");
}
m_varianceAdjusters = new ArrayList<Filter>();
for (String field : m_fieldsToLag) {
int index = insts.attribute(field).index();
if (index < 0) {
throw new Exception("Can't find field '" + field + "'!");
}
MathExpression mathE = new MathExpression();
mathE.setIgnoreRange("" + (index + 1));
mathE.setInvertSelection(true);
mathE.setExpression("log(A)");
mathE.setInputFormat(insts);
insts = Filter.useFilter(insts, mathE);
m_varianceAdjusters.add(mathE);
}
return insts;
}
// this is useful for reducing the scale of a date timestamp. Since dates
// are stored internally in elapsed milliseconds, they are large numbers and
// any model coefficient computed for the timestamp is likely to be extremely
// small (appearing as 0 in output due to 4 decimal places precision).
// Furthermore,
// date timestamps with a periodicity of a month are not a constant number of
// milliseconds in length from one month to the next - remapping corrects this
protected Instances createDateTimestampRemap(Instances insts)
throws Exception {
Instances result = insts;
if (m_adjustForTrends && !m_useArtificialTimeIndex
&& m_timeStampName != null && m_timeStampName.length() > 0) {
if (result.attribute(m_timeStampName).isDate()) {
int origIndex = result.attribute(m_timeStampName).index();
// find first non-missing date and set as base
GregorianCalendar c = new GregorianCalendar();
for (int i = 0; i < result.numInstances(); i++) {
if (!result.instance(i).isMissing(origIndex)) {
if (m_dateBasedPeriodicity.getPeriodicity() == Periodicity.MONTHLY
|| m_dateBasedPeriodicity.getPeriodicity() == Periodicity.WEEKLY
|| m_dateBasedPeriodicity.getPeriodicity() == Periodicity.QUARTERLY) {
Date d = new Date((long) result.instance(i).value(origIndex));
c.setTime(d);
m_dateTimeStampBase = c.get(Calendar.YEAR);
} else {
m_dateTimeStampBase = (long) result.instance(i).value(origIndex);
}
break;
}
}
m_addDateMap = new Add();
m_addDateMap.setAttributeName(m_timeStampName + "-remapped");
m_addDateMap.setInputFormat(result);
result = Filter.useFilter(result, m_addDateMap);
Instance previous = result.instance(0);
// now loop through and compute remapped date
for (int i = 0; i < result.numInstances(); i++) {
Instance current = result.instance(i);
current = m_dateBasedPeriodicity.remapDateTimeStamp(current,
previous, m_timeStampName);
previous = current;
/*
* if (!current.isMissing(origIndex)) { if (m_dateBasedPeriodicity ==
* Periodicity.MONTHLY || m_dateBasedPeriodicity == Periodicity.WEEKLY
* || m_dateBasedPeriodicity == Periodicity.QUARTERLY) { Date d = new
* Date((long)current.value(origIndex)); c.setTime(d); long year =
* c.get(Calendar.YEAR); long month = c.get(Calendar.MONTH); long week
* = c.get(Calendar.WEEK_OF_YEAR); long remapped = 0; if
* (m_dateBasedPeriodicity == Periodicity.MONTHLY) { remapped = ((year
* - m_dateTimeStampBase) * 12) + month; } else if
* (m_dateBasedPeriodicity == Periodicity.WEEKLY) { remapped = ((year
* - m_dateTimeStampBase) * 52) + week;
*
* // adjust for the case where week 1 of the year actually starts //
* in the last week of December if (month == Calendar.DECEMBER && week
* == 1) { remapped += 52; } } else if (m_dateBasedPeriodicity ==
* Periodicity.QUARTERLY) { remapped = ((year - m_dateTimeStampBase) *
* 4) + ((month / 3L) + 1L); }
* current.setValue(current.numAttributes() - 1, (double)remapped); }
* else { double remapped = current.value(origIndex) -
* m_dateTimeStampBase; remapped /=
* m_dateBasedPeriodicity.deltaTime();//m_deltaTime;
* current.setValue(current.numAttributes() - 1, remapped); } }
*/
}
}
}
return result;
}
protected Instance remapDateTimeStamp(Instance inst) throws Exception {
Instance result = inst;
if (m_addDateMap != null) {
m_addDateMap.input(result);
result = m_addDateMap.output();
result = m_dateBasedPeriodicity.remapDateTimeStamp(result, null,
m_timeStampName);
/*
* int origIndex = result.dataset().attribute(m_timeStampName).index();
* Calendar c = new GregorianCalendar();
*
* if (!result.isMissing(origIndex)) { if (m_dateBasedPeriodicity ==
* Periodicity.MONTHLY || m_dateBasedPeriodicity == Periodicity.WEEKLY ||
* m_dateBasedPeriodicity == Periodicity.QUARTERLY) { Date d = new
* Date((long)result.value(origIndex)); c.setTime(d); long year =
* c.get(Calendar.YEAR); long month = c.get(Calendar.MONTH); long week =
* c.get(Calendar.WEEK_OF_YEAR); long remapped = 0; if
* (m_dateBasedPeriodicity == Periodicity.MONTHLY) { remapped = ((year -
* m_dateTimeStampBase) * 12) + month; } else if (m_dateBasedPeriodicity
* == Periodicity.WEEKLY) { remapped = ((year - m_dateTimeStampBase) * 52)
* + week;
*
* // adjust for the case where week 1 of the year actually starts // in
* the last week of December if (month == Calendar.DECEMBER && week == 1)
* { remapped += 52; } } else if (m_dateBasedPeriodicity ==
* Periodicity.QUARTERLY) { remapped = ((year - m_dateTimeStampBase) * 4)
* + ((month / 3L) + 1L); } result.setValue(result.numAttributes() - 1,
* (double)remapped); } else { double remapped = result.value(origIndex) -
* m_dateTimeStampBase; remapped /=
* m_dateBasedPeriodicity.deltaTime();//m_deltaTime;
* result.setValue(result.numAttributes() - 1, remapped); } }
*/
}
return result;
}
/**
* Enum defining periodicity
*/
public static enum Periodicity {
UNKNOWN, HOURLY, DAILY, WEEKLY, MONTHLY, QUARTERLY, YEARLY;
private double m_deltaTime;
public double deltaTime() {
return m_deltaTime;
}
public void setDeltaTime(double deltaTime) {
m_deltaTime = deltaTime;
}
}
/**
* Helper class to manage time stamp manipulation with respect to various
* periodicities. Has a routine to remap the time stamp, which is useful for
* date time stamps. Since dates are just manipulated internally as the number
* of milliseconds elapsed since the epoch, and any global trend modelling in
* regression functions results in enormous coefficients for this variable -
* remapping to a more reasonable scale prevents this. It also makes it easier
* to handle the case where there are time periods that shouldn't be
* considered as a time unit increment, e.g. weekends and public holidays for
* financial trading data. These "holes" in the data can be accomodated by
* accumulating a negative offset for the remapped date when a particular
* data/time occurs in a user-specified "skip" list.
*
* @author Mark Hall (mhall{[at]}pentaho{[dot]}com)
*/
public static class PeriodicityHandler implements Serializable {
/**
* For serialization
*/
private static final long serialVersionUID = 6330232772323425050L;
/** Periodicity of this handler */
protected Periodicity m_handlerPeriodicity = Periodicity.UNKNOWN;
/** Delta time between consecutive units */
private double m_deltaTime;
/** True if we are managing a date-based periodicity */
private boolean m_isDateBased;
/** first date time stamp seen in batch training */
private long m_dateTimeStampInitialVal;
/** last date time stamp value seen in batch training */
private long m_dateTimeStampFinalVal;
/**
* = year of initial time stamp val for weekly, monthly or quarterly
* periodicies, otherwise is the same as initial time stamp val
**/
private long m_dateTimeStampBaseVal;
/** holds the date-based entries that should be 'skipped' */
private List<Object> m_skipList;
/**
* any adjustment for remapped date values accumulated via time unit skips
* that occur during the training data time frame
*/
private long m_trainingRemapSkipAdjust = 0;
/**
* Set periodicity to manage
*
* @param p the periodicity to manage
*/
public void setPeriodicity(Periodicity p) {
m_handlerPeriodicity = p;
}
/**
* Get periodicity being managed
*
* @return the periodicity being managed
*/
public Periodicity getPeriodicity() {
return m_handlerPeriodicity;
}
/**
* Set a list of skip entries
*
* @param aList a comma separated list of date-based entries. May include
* strings such as 'sat' or 'june', specific dates (with optional
* format string) such as '2011-08-22@yyyy-MM-dd' or integers
* (which get interpreted differently depending on the periodicity)
*
* @param dateFormat a default date format to use for parsing dates
* @throws Exception if an entry in the list is unparsable or unrecognized
*/
public void setSkipList(String aList, String dateFormat) throws Exception {
if (aList != null && aList.length() > 0) {
// reset skip list and skip adjust
m_skipList = new ArrayList<Object>();
m_trainingRemapSkipAdjust = 0;
String[] parts = aList.split(",");
for (String p : parts) {
p = p.trim();
// try as day of week or month of the year first
if (m_handlerPeriodicity == Periodicity.UNKNOWN
|| m_handlerPeriodicity == Periodicity.HOURLY
|| m_handlerPeriodicity == Periodicity.DAILY
|| m_handlerPeriodicity == Periodicity.MONTHLY) {
if (p.equalsIgnoreCase("mon") || p.equalsIgnoreCase("monday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("mon");
continue;
} else if (p.equalsIgnoreCase("tue")
|| p.equalsIgnoreCase("tuesday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("tue");
continue;
} else if (p.equalsIgnoreCase("wed")
|| p.equalsIgnoreCase("wednesday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("wed");
continue;
} else if (p.equalsIgnoreCase("thu")
|| p.equalsIgnoreCase("thursday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("thu");
continue;
} else if (p.equalsIgnoreCase("fri")
|| p.equalsIgnoreCase("friday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("fri");
continue;
} else if (p.equalsIgnoreCase("sat")
|| p.equalsIgnoreCase("saturday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("sat");
continue;
} else if (p.equalsIgnoreCase("sun")
|| p.equalsIgnoreCase("sunday")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY)
m_skipList.add("sun");
continue;
} else if (p.equalsIgnoreCase("weekend")) {
if (m_handlerPeriodicity != Periodicity.MONTHLY) {
m_skipList.add("sat");
m_skipList.add("sun");
continue;
}
} else if (p.equalsIgnoreCase("jan")
|| p.equalsIgnoreCase("january")) {
m_skipList.add("jan");
continue;
} else if (p.equalsIgnoreCase("feb")
|| p.equalsIgnoreCase("february")) {
m_skipList.add("feb");
continue;
} else if (p.equalsIgnoreCase("mar") || p.equalsIgnoreCase("march")) {
m_skipList.add("mar");
continue;
} else if (p.equalsIgnoreCase("apr") || p.equalsIgnoreCase("april")) {
m_skipList.add("apr");
continue;
} else if (p.equalsIgnoreCase("may")) {
m_skipList.add("may");
continue;
} else if (p.equalsIgnoreCase("jun") || p.equalsIgnoreCase("june")) {
m_skipList.add("jun");
continue;
} else if (p.equalsIgnoreCase("jul") || p.equalsIgnoreCase("july")) {
m_skipList.add("jul");
continue;
} else if (p.equalsIgnoreCase("aug")
|| p.equalsIgnoreCase("august")) {
m_skipList.add("aug");
continue;
} else if (p.equalsIgnoreCase("sep")
|| p.equalsIgnoreCase("september")) {
m_skipList.add("sep");
continue;
} else if (p.equalsIgnoreCase("oct")
|| p.equalsIgnoreCase("october")) {
m_skipList.add("oct");
continue;
} else if (p.equalsIgnoreCase("nov")
|| p.equalsIgnoreCase("november")) {
m_skipList.add("nov");
continue;
} else if (p.equalsIgnoreCase("dec")
|| p.equalsIgnoreCase("december")) {
m_skipList.add("dec");
continue;
}
}
// try as a number (no checking is done for numbers out of
// range with respect to a given periodicity)
try {
int num = Integer.parseInt(p);
m_skipList.add(new Integer(p));
continue;
} catch (NumberFormatException n) {
}
// last of all try as a specific date (if we have a date formatting
// string)
if (dateFormat != null && dateFormat.length() > 0) {
// first check to see if there is a custom format attached to this
// entry
String datePart = p;
if (p.indexOf('@') > 0) {
String[] dateParts = p.split("@");
datePart = dateParts[0];
dateFormat = dateParts[1];
}
SimpleDateFormat sdf = new SimpleDateFormat();
sdf.applyPattern(dateFormat);
try {
Date d = sdf.parse(datePart);
m_skipList.add(d);
continue;
} catch (ParseException e) {
}
}
throw new Exception("Unrecognized skip entry string : " + p);
}
}
}
/**
* Get the delta time of the periodicity being managed
*
* @return the delta time
*/
public double deltaTime() {
return m_deltaTime;
}
/**
* Set the delta time for the periodicity being managed
*
* @param deltaTime the delta time to use
*/
public void setDeltaTime(double deltaTime) {
m_deltaTime = deltaTime;
m_handlerPeriodicity.setDeltaTime(m_deltaTime);
}
/**
* Set the first date time stamp value in the batch training data
*
* @param tsbase the first date time stamp value in the batch training data
* as a long (num milliseconds since epoch)
*/
public void setDateTimeStampInitial(long tsbase) {
m_isDateBased = true;
m_dateTimeStampInitialVal = tsbase;
GregorianCalendar c = new GregorianCalendar();
Date d = new Date(m_dateTimeStampInitialVal);
c.setTime(d);
if (m_handlerPeriodicity == Periodicity.MONTHLY
|| m_handlerPeriodicity == Periodicity.WEEKLY
|| m_handlerPeriodicity == Periodicity.QUARTERLY) {
m_dateTimeStampBaseVal = c.get(Calendar.YEAR);
} else {
m_dateTimeStampBaseVal = m_dateTimeStampInitialVal;
}
}
/**
* Get the first date time stamp value in the batch training data
*
* @return the first date time stamp value in the batch training data
* @throws Exception if the periodicity being managed is not date
* timestamp-based
*/
public long getDateTimeStampInitial() throws Exception {
if (!isDateBased()) {
throw new Exception("This periodicity is not date timestamp-based");
}
return m_dateTimeStampInitialVal;
}
/**
* Set the last date timestamp value in the batch training data
*
* @param tsfinal the last date timestamp value in the batch training data
* as a long (num milliseconds since the epoch).
*/
public void setDateTimeStampFinal(long tsfinal) {
m_isDateBased = true;
m_dateTimeStampFinalVal = tsfinal;
}
/**
* Get the last date timestamp value in the batch training data
*
* @return the last date timestamp value in the batch training data
* @throws Exception if the periodicity being managed is not date
* timestamp-based
*/
public long getDateTimeStampFinal() throws Exception {
if (!isDateBased()) {
throw new Exception("This periodicity is not date timestamp-based");
}
return m_dateTimeStampFinalVal;
}
/**
* Set whether the periodicity being managed is date timestamp-based
*
* @param isDateBased true if the periodicity being managed is date
* timestamp-based
*/
public void setIsDateBased(boolean isDateBased) {
m_isDateBased = isDateBased;
}
/**
* Returns true if the periodicity being managed is date timestamp-based
*
* @return true if the periodicity being managed is date timestamp-based
*/
public boolean isDateBased() {
return m_isDateBased;
}
/**
* Checks to see if the supplied date is in the list of time units to skip
* (i.e. should not be considered as a time increment).
*
* @param toCheck the date to check
* @return true if the date is in the skip list
*/
public boolean dateInSkipList(Date toCheck) {
if (m_skipList == null || m_skipList.size() == 0) {
return false;
}
GregorianCalendar c = new GregorianCalendar();
c.setTime(toCheck);
for (Object o : m_skipList) {
if (o instanceof String) {
if (o.toString().equals("mon")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.MONDAY) {
return true;
}
}
if (o.toString().equals("tue")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.TUESDAY) {
return true;
}
}
if (o.toString().equals("wed")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.WEDNESDAY) {
return true;
}
}
if (o.toString().equals("thu")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.THURSDAY) {
return true;
}
}
if (o.toString().equals("fri")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.FRIDAY) {
return true;
}
}
if (o.toString().equals("sat") || o.toString().equals("weekend")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.SATURDAY) {
return true;
}
}
if (o.toString().equals("sun") || o.toString().equals("weekend")) {
if (c.get(Calendar.DAY_OF_WEEK) == Calendar.SUNDAY) {
return true;
}
}
if (o.toString().equals("jan")) {
if (c.get(Calendar.MONTH) == Calendar.JANUARY) {
return true;
}
}
if (o.toString().equals("feb")) {
if (c.get(Calendar.MONTH) == Calendar.FEBRUARY) {
return true;
}
}
if (o.toString().equals("mar")) {
if (c.get(Calendar.MONTH) == Calendar.MARCH) {
return true;
}
}
if (o.toString().equals("apr")) {
if (c.get(Calendar.MONTH) == Calendar.APRIL) {
return true;
}
}
if (o.toString().equals("may")) {
if (c.get(Calendar.MONTH) == Calendar.MAY) {
return true;
}
}
if (o.toString().equals("jun")) {
if (c.get(Calendar.MONTH) == Calendar.JUNE) {
return true;
}
}
if (o.toString().equals("jul")) {
if (c.get(Calendar.MONTH) == Calendar.JULY) {
return true;
}
}
if (o.toString().equals("aug")) {
if (c.get(Calendar.MONTH) == Calendar.AUGUST) {
return true;
}
}
if (o.toString().equals("sep")) {
if (c.get(Calendar.MONTH) == Calendar.SEPTEMBER) {
return true;
}
}
if (o.toString().equals("oct")) {
if (c.get(Calendar.MONTH) == Calendar.OCTOBER) {
return true;
}
}
if (o.toString().equals("nov")) {
if (c.get(Calendar.MONTH) == Calendar.NOVEMBER) {
return true;
}
}
if (o.toString().equals("dec")) {
if (c.get(Calendar.MONTH) == Calendar.DECEMBER) {
return true;
}
}
} else if (o instanceof Integer) {
if (m_handlerPeriodicity == Periodicity.DAILY
|| m_handlerPeriodicity == Periodicity.UNKNOWN) {
// assume value is day of year
if (c.get(Calendar.DAY_OF_YEAR) == ((Integer) o).intValue()) {
return true;
}
} else if (m_handlerPeriodicity == Periodicity.HOURLY) {
// assume value is hour of day
if (c.get(Calendar.HOUR_OF_DAY) == ((Integer) o).intValue()) {
return true;
}
} else if (m_handlerPeriodicity == Periodicity.WEEKLY) {
// assume value is week of year
if (c.get(Calendar.WEEK_OF_YEAR) == ((Integer) o).intValue()) {
return true;
}
} else if (m_handlerPeriodicity == Periodicity.MONTHLY) {
// assume value is month of year
if (c.get(Calendar.MONTH) == ((Integer) o).intValue()) {
return true;
}
}
} else if (o instanceof Date) {
if (((Date) o).equals(toCheck)) {
return true;
}
}
}
return false;
}
/**
* Remaps a date timestamp to an integer starting (from the first time stamp
* seen in the data) at 0. This is makes any coefficients produced by a
* regression model for the timestamp (global trend modelling) of reasonable
* scale. It is also useful for dealing with time units that shouldn't be
* considered an increment as a negative adjustment can be accumulated for
* these.
*
* @param inst the instance containing a date timestamp to be remapped
* @param previous the immediately previous instance in the sequence (may be
* null).
* @param timeStampName the name of the timestamp attribute
* @return
* @throws Exception if an error occurs
*/
public Instance remapDateTimeStamp(Instance inst, Instance previous,
String timeStampName) throws Exception {
Instance result = inst;
if (!isDateBased()) {
throw new Exception("This periodicity is not date timestamp-based");
}
int origIndex = result.dataset().attribute(timeStampName).index();
Calendar c = new GregorianCalendar();
boolean applyTrainingSkipAdjust = true;
long localSkipAdjust = 0;
if (!result.isMissing(origIndex)) {
Date d = new Date((long) result.value(origIndex));
double origValue = result.value(origIndex);
if (m_skipList != null && m_skipList.size() > 0 && previous != null) {
// check this instance's date time stamp against the skip list -
// our fundamental assumption (for the training data) is that these
// dates
// are not actually
// present in the data (i.e. sat and sun for stock market data). If
// they
// are in the data (but with missing targets) then the missing value
// interpolation routine will have filled them in, which is the wrong
// thing to do if they are supposed to be skipped over
if (dateInSkipList(d)) {
throw new Exception(
"This instance contains a date time stamp that is "
+ "a member of the skip list - skip list entries are not time "
+ "units with respect to the model and should not be present : "
+ inst.toString());
}
if (!previous.isMissing(origIndex)) {
if (result.value(origIndex) >= previous.value(origIndex)) {
// compared to the previous date are we more than one time unit
// ahead?
double start = previous.value(origIndex);
double end = origValue;
while (start < end) {
start = weka.classifiers.timeseries.core.Utils
.advanceSuppliedTimeValue(start, this);
if (start < end) {
if (dateInSkipList(new Date((long) start))) {
m_trainingRemapSkipAdjust--;
} else {
// oh oh the difference between the current and previous
// instance
// is more than one time step but the intervening step(s)
// are
// not in the skip list!
throw new Exception("There is an increment of more than "
+ "one time step between\n" + previous.toString()
+ "\nand\n" + inst.toString() + "\n but none of the "
+ "intervening time steps are in the " + "skip list.");
}
}
}
} else {
// we have a problem here - data is not sorted in ascending order
// of the date time stamp!
throw new Exception(
"The data does not seem to be sorted in ascending order "
+ "of the date time stamp!");
}
}
}
if (m_skipList != null && m_skipList.size() > 0 && previous == null) {
// this case indicates that we are being invoked in a
// priming/forecasting context
// check that this instance does not occur before the first training
// instance!!
if (origValue < m_dateTimeStampInitialVal) {
throw new Exception(
"The timestamp for this instance occurs before the "
+ "timestamp of the first training instance!");
}
// can't prime/forecast for values that occurred before the training
// data.
double end = result.value(origIndex);
// first advance end until it is not in the skip list (this won't
// be needed for priming instances that are within the training
// date range), but might occur for closed-loop forecasting when
// the date is advanced one time unit for each step
while (dateInSkipList(new Date((long) end))) {
end = weka.classifiers.timeseries.core.Utils
.advanceSuppliedTimeValue(end, this);
}
double start = 0;
if (end < m_dateTimeStampFinalVal) {
// priming/forecasting within the range of the training data -
// will have to recompute all skips from the initial training
// time stamp up to this instance and not apply the pre-computed
// skip total for the full training period
applyTrainingSkipAdjust = false;
start = m_dateTimeStampInitialVal;
} else {
// priming/forecasting beyond the last training date time stamp seen
start = m_dateTimeStampFinalVal;
}
// now compute local skip adjust from start up to end
while (start < end) {
start = weka.classifiers.timeseries.core.Utils
.advanceSuppliedTimeValue(start, this);
if (start < end) {
if (dateInSkipList(new Date((long) start))) {
localSkipAdjust--;
}
}
}
// set end as the current value
d = new Date((long) end);
origValue = end;
}
if (m_handlerPeriodicity == Periodicity.MONTHLY
|| m_handlerPeriodicity == Periodicity.WEEKLY
|| m_handlerPeriodicity == Periodicity.QUARTERLY) {
c.setTime(d);
long year = c.get(Calendar.YEAR);
long month = c.get(Calendar.MONTH);
long week = c.get(Calendar.WEEK_OF_YEAR);
long remapped = 0;
if (m_handlerPeriodicity == Periodicity.MONTHLY) {
remapped = ((year - m_dateTimeStampBaseVal) * 12) + month;
} else if (m_handlerPeriodicity == Periodicity.WEEKLY) {
remapped = ((year - m_dateTimeStampBaseVal) * 52) + week;
// adjust for the case where week 1 of the year actually starts
// in the last week of December
if (month == Calendar.DECEMBER && week == 1) {
remapped += 52;
}
} else if (m_handlerPeriodicity == Periodicity.QUARTERLY) {
remapped = ((year - m_dateTimeStampBaseVal) * 4)
+ ((month / 3L) + 1L);
}
if (m_skipList != null && m_skipList.size() > 0) {
remapped += (applyTrainingSkipAdjust) ? m_trainingRemapSkipAdjust
: 0;
remapped += localSkipAdjust;
}
result.setValue(result.numAttributes() - 1, remapped);
} else {
double remapped = origValue - m_dateTimeStampInitialVal;
remapped /= deltaTime();// m_deltaTime;
// it might (or might not) make sense to take the floor here. For
// daily data
// I have the feeling that data arithmetic (adding 1 to day of the
// year)
// may actually add slightly more than
// a day at certain times (to account for) leap seconds/years
// remapped = Math.floor(remapped);
if (m_skipList != null && m_skipList.size() > 0) {
remapped += (applyTrainingSkipAdjust) ? m_trainingRemapSkipAdjust
: 0;
remapped += localSkipAdjust;
}
result.setValue(result.numAttributes() - 1, remapped);
}
}
return result;
}
}
/**
* Utility method that uses heuristics to identify the periodicity of the data
* with respect to a time stamp. If the time stamp is not a date then the
* periodicity is UNKNOWN with a delta set by computing the average difference
* between consecutive time stamp values. Configures the periodicity with
* first and last time stamp entries in the data.
*
* @param insts the instances to determine the periodicity from
* @param timeName the name of the time stamp attribute
* @param userHint a specific periodicity to defer to. The user should provide
* a specific periodicity when the data has non-constant differences
* in time between consecutive elements and a skip list will be used
* to correct for this. Specifying UNKNOWN as the periodicity here
* will result in the heuristic detection routine being applied.
* @return the configured Periodicity of the data.
*/
public static PeriodicityHandler determinePeriodicity(Instances insts,
String timeName, Periodicity userHint) {
double fiveMins = 300000D;
double oneHour = 3600000D;
double oneDay = oneHour * 24D;
double oneWeek = oneDay * 7D;
double thirtyDays = oneHour * 24D * 30D;
double approxQuarter = thirtyDays * 3D;
double oneYear = oneDay * 365D;
double averageDelta = Utils.missingValue();
int timeIndex = insts.attribute(timeName).index();
PeriodicityHandler result = new PeriodicityHandler();
if (timeIndex < 0) {
result.setPeriodicity(Periodicity.UNKNOWN);
result.setDeltaTime(Utils.missingValue());
return result;
}
if (userHint != Periodicity.UNKNOWN && insts.attribute(timeIndex).isDate()) {
// trust the user's indication
result.setPeriodicity(userHint);
switch (userHint) {
case HOURLY:
result.setDeltaTime(oneHour);
break;
case DAILY:
result.setDeltaTime(oneDay);
break;
case WEEKLY:
result.setDeltaTime(oneWeek);
break;
case YEARLY:
result.setDeltaTime(oneYear);
break;
// others don't matter as date arithmetic is used
}
long initialTS = (long) insts.instance(0).value(timeIndex);
long finalTS = (long) insts.instance(insts.numInstances() - 1).value(
timeIndex);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
List<Double> deltas = new ArrayList<Double>();
for (int i = 1; i < insts.numInstances(); i++) {
if (!insts.instance(i).isMissing(timeIndex)
&& !insts.instance(i - 1).isMissing(timeIndex)) {
deltas.add(new Double(insts.instance(i).value(timeIndex)
- insts.instance(i - 1).value(timeIndex)));
}
}
double previousDelta = -1;
double deltaSum = 0;
for (int i = 0; i < deltas.size(); i++) {
if (i == 0) {
previousDelta = deltas.get(i);
deltaSum += previousDelta;
} else {
double currentDelta = deltas.get(i);
if (currentDelta - previousDelta != 0) {
// nonConstant = true;
}
previousDelta = currentDelta;
deltaSum += currentDelta;
}
}
averageDelta = deltaSum /= deltas.size();
if (insts.attribute(timeIndex).isDate()) {
long initialTS = (long) insts.instance(0).value(timeIndex);
long finalTS = (long) insts.instance(insts.numInstances() - 1).value(
timeIndex);
// allow +-5mins for hourly
if (Math.abs(oneHour - averageDelta) <= fiveMins) {
result.setPeriodicity(Periodicity.HOURLY);
result.setDeltaTime(oneHour);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// allow += 1 hour for daily
if (Math.abs(oneDay - averageDelta) <= oneHour) {
result.setPeriodicity(Periodicity.DAILY);
result.setDeltaTime(oneDay);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// allow +- 6 hours for weekly
if (Math.abs(oneWeek - averageDelta) <= (oneDay / 4.0)) {
result.setPeriodicity(Periodicity.WEEKLY);
result.setDeltaTime(oneWeek);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// allow +- 3 days for monthly
if (Math.abs(thirtyDays - averageDelta) <= (oneDay * 3.0)) {
result.setPeriodicity(Periodicity.MONTHLY);
result.setDeltaTime(thirtyDays);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// allow +- 1 week for quarterly
if (Math.abs(approxQuarter - averageDelta) <= oneWeek) {
result.setPeriodicity(Periodicity.QUARTERLY);
result.setDeltaTime(approxQuarter);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// allow +- 2 days for yearly
if (Math.abs(oneYear - averageDelta) <= (oneDay * 2.0)) {
result.setPeriodicity(Periodicity.YEARLY);
result.setDeltaTime(oneYear);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// otherwise UNKNOWN but date-based
result.setPeriodicity(Periodicity.UNKNOWN);
result.setIsDateBased(true);
result.setDeltaTime(averageDelta);
result.setDateTimeStampInitial(initialTS);
result.setDateTimeStampFinal(finalTS);
return result;
}
// default for non-date-based time stamps
result.setPeriodicity(Periodicity.UNKNOWN);
result.setIsDateBased(false);
result.setDeltaTime(averageDelta);
return result;
}
protected Instances setupDerivedPeriodics(Instances insts) throws Exception {
Instances result = insts;
if (m_adjustForTrends && !m_useArtificialTimeIndex) {
m_dateBasedPeriodicity = determinePeriodicity(insts, m_timeStampName,
m_userHintPeriodicity);
if (m_skipEntries != null && m_skipEntries.length() > 0) {
m_dateBasedPeriodicity.setSkipList(m_skipEntries, m_dateFormat);
}
// int timeIndex = insts.attribute(m_timeStampName).index();
// m_deltaTime = m_dateBasedPeriodicity.deltaTime();
/*
* if (m_dateBasedPeriodicity == Periodicity.MONTHLY) {
* m_advanceTimeStampByMonth = true; }
*/
if (insts.attribute(m_timeStampName).isDate()) {
m_derivedPeriodicMakers = new ArrayList<Filter>();
// now add filters for each requested derived periodic value
if (m_am) {
// numeric binary
Add a = new Add();
a.setAttributeName("AM");
a.setInputFormat(insts);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
if (m_dayOfWeek) {
// nominal
Add a = new Add();
a.setAttributeName("DayOfWeek");
a.setNominalLabels("sun,mon,tue,wed,thu,fri,sat");
a.setInputFormat(result);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
if (m_dayOfMonth) {
// nominal
Add a = new Add();
a.setAttributeName("DayOfMonth");
a.setNominalLabels("1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,"
+ "20,21,22,23,24,25,26,27,28,29,30,31");
a.setInputFormat(result);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
if (m_numDaysInMonth) {
Add a = new Add();
a.setAttributeName("NumDaysInMonth");
a.setInputFormat(insts);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
if (m_weekend) {
// numeric binary
Add a = new Add();
a.setAttributeName("Weekend");
a.setInputFormat(result);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
if (m_monthOfYear) {
// nominal
Add a = new Add();
a.setAttributeName("Month");
a.setNominalLabels("jan,feb,mar,apr,may,jun,jul,aug,sep,oct,nov,dec");
a.setInputFormat(result);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
if (m_quarter) {
// nominal
Add a = new Add();
a.setAttributeName("Quarter");
a.setNominalLabels("Q1,Q2,Q3,Q4");
a.setInputFormat(result);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
// any custom date-derived periodics?
if (m_customPeriodics != null) {
for (String name : m_customPeriodics.keySet()) {
List<CustomPeriodicTest> l = m_customPeriodics.get(name);
// check to see if we should create a multi-label nominal
// attribute instead of a numeric binary attribute. If all
// test intervals have a non-null label then attribute will
// be nominal
boolean binary = false;
String labels = "";
Set<String> uniqueLabels = new HashSet<String>();
for (CustomPeriodicTest t : l) {
if (t.getLabel() == null || t.getLabel().length() == 0) {
binary = true;
break;
} else {
if (uniqueLabels.add(t.getLabel())) {
labels += t.getLabel() + ",";
}
}
}
Add a = new Add();
a.setAttributeName("c_" + name);
if (!binary) {
labels = labels.substring(0, labels.lastIndexOf(','));
a.setAttributeType(new SelectedTag("NOM", Add.TAGS_TYPE));
a.setNominalLabels(labels);
}
a.setInputFormat(result);
result = Filter.useFilter(result, a);
m_derivedPeriodicMakers.add(a);
}
}
// set the values for each instance in the data
for (int i = 0; i < result.numInstances(); i++) {
setDerivedPeriodicValues(result.instance(i));
}
}
}
return result;
}
protected void setDerivedPeriodicValues(Instance inst) {
if (m_adjustForTrends && !m_useArtificialTimeIndex) {
if (inst.dataset().attribute(m_timeStampName).isDate()) {
int timeIndex = inst.dataset().attribute(m_timeStampName).index();
long time = (inst.isMissing(timeIndex)) ? -1 : (long) inst
.value(timeIndex);
Date instDate = null;
GregorianCalendar cal = new GregorianCalendar();
if (time != -1) {
instDate = new Date(time);
cal.setTime(instDate);
}
if (m_am) {
if (instDate == null) {
inst.setMissing(inst.dataset().attribute("AM"));
} else {
if (cal.get(Calendar.AM_PM) == Calendar.AM) {
inst.setValue(inst.dataset().attribute("AM"), 1);
} else {
inst.setValue(inst.dataset().attribute("AM"), 0);
}
}
}
if (m_dayOfWeek || m_weekend) {
if (instDate == null) {
if (m_dayOfWeek) {
inst.setMissing(inst.dataset().attribute("DayOfWeek"));
}
if (m_weekend) {
inst.setMissing(inst.dataset().attribute("Weekend"));
}
} else {
int dow = cal.get(Calendar.DAY_OF_WEEK);
String day = "";
switch (dow) {
case Calendar.SUNDAY:
day = "sun";
break;
case Calendar.MONDAY:
day = "mon";
break;
case Calendar.TUESDAY:
day = "tue";
break;
case Calendar.WEDNESDAY:
day = "wed";
break;
case Calendar.THURSDAY:
day = "thu";
break;
case Calendar.FRIDAY:
day = "fri";
break;
case Calendar.SATURDAY:
day = "sat";
break;
}
if (day.length() > 0) {
if (m_dayOfWeek) {
inst.setValue(inst.dataset().attribute("DayOfWeek"), day);
}
if (m_weekend) {
if (day.equals("sat") || day.equals("sun")) {
inst.setValue(inst.dataset().attribute("Weekend"), 1);
} else {
inst.setValue(inst.dataset().attribute("Weekend"), 0);
}
}
} else {
if (m_dayOfWeek) {
inst.setMissing(inst.dataset().attribute("DayOfWeek"));
}
if (m_weekend) {
inst.setMissing(inst.dataset().attribute("Weekend"));
}
}
}
}
if (m_dayOfMonth) {
if (instDate == null) {
inst.setMissing(inst.dataset().attribute("DayOfWeek"));
} else {
int dom = cal.get(Calendar.DAY_OF_MONTH);
inst.setValue(inst.dataset().attribute("DayOfMonth"), (dom - 1));
}
}
if (m_numDaysInMonth) {
if (instDate == null) {
inst.setMissing(inst.dataset().attribute("NumDaysInMonth"));
} else {
boolean isLeap = cal.isLeapYear(cal.get(Calendar.YEAR));
int daysInMonth = 0;
int month = cal.get(Calendar.MONTH);
if (month == Calendar.FEBRUARY) {
daysInMonth = 28;
if (isLeap) {
daysInMonth++;
}
} else if (month == Calendar.APRIL || month == Calendar.JUNE
|| month == Calendar.SEPTEMBER || month == Calendar.NOVEMBER) {
daysInMonth = 30;
} else {
daysInMonth = 31;
}
inst.setValue(inst.dataset().attribute("NumDaysInMonth"),
daysInMonth);
}
}
if (m_monthOfYear || m_quarter) {
if (instDate == null) {
if (m_monthOfYear) {
inst.setMissing(inst.dataset().attribute("Month"));
}
if (m_quarter) {
inst.setMissing(inst.dataset().attribute("Quarter"));
}
} else {
int moy = cal.get(Calendar.MONTH);
if (m_monthOfYear) {
String month = inst.dataset().attribute("Month").value(moy);
inst.setValue(inst.dataset().attribute("Month"), month);
}
if (m_quarter) {
String quarter = "";
if (moy == 0 || moy == 1 || moy == 2) {
quarter = "Q1";
} else if (moy == 3 || moy == 4 || moy == 5) {
quarter = "Q2";
} else if (moy == 6 || moy == 7 || moy == 8) {
quarter = "Q3";
} else {
quarter = "Q4";
}
inst.setValue(inst.dataset().attribute("Quarter"), quarter);
}
}
}
if (m_customPeriodics != null) {
for (String name : m_customPeriodics.keySet()) {
Attribute att = inst.dataset().attribute("c_" + name);
if (att != null) {
if (instDate == null) {
inst.setMissing(att);
} else {
// evaluate for this periodic
List<CustomPeriodicTest> l = m_customPeriodics.get(name);
boolean result = false;
String label = null;
for (CustomPeriodicTest t : l) {
result = (result || t.evaluate(instDate));
// match?
if (result) {
label = t.getLabel();
break;
} else {
label = null;
}
}
if (result) {
if (att.isNominal()) {
if (label == null) {
// inst.setMissing(att);
System.err.println("This shouldn't happen!!");
} else {
inst.setValue(att, att.indexOfValue(label));
}
} else {
// numeric binary attribute
inst.setValue(att, 1);
}
} else {
if (att.isNominal()) {
inst.setMissing(att);
} else {
inst.setValue(att, 0);
}
}
}
} else {
System.err.println("WARNING: custom periodic att c_" + name
+ " not found in instances!");
}
}
}
}
}
}
protected void setupPeriodicMaps(Instances insts) {
m_primaryPeriodicSequence = null;
m_secondaryPeriodicLookups = null;
if (m_primaryPeriodicName != null && m_primaryPeriodicName.length() > 0) {
int primaryIndex = insts.attribute(m_primaryPeriodicName).index();
if (primaryIndex < 0) {
return;
}
m_primaryPeriodicSequence = new HashMap<String, String>();
for (int i = 0; i < insts.numInstances() - 1; i++) {
Instance current = insts.instance(i);
Instance next = insts.instance(i + 1);
if (!Utils.isMissingValue(current.value(primaryIndex))
&& !Utils.isMissingValue(next.value(primaryIndex))) {
String key = current.stringValue(primaryIndex);
String value = next.stringValue(primaryIndex);
if (m_primaryPeriodicSequence.get(key) == null) {
m_primaryPeriodicSequence.put(key, value);
} else {
// check to see if this value is consistent with
// what we've seen previously
String previous = m_primaryPeriodicSequence.get(key);
if (!previous.equals(value)) {
// we don't have a consistent sequence, so can't
// use this as the main periodic sequence
m_primaryPeriodicSequence = null;
break;
}
}
}
}
if (m_primaryPeriodicSequence != null) {
// now look for any other nominal attributes that
// might be secondary periodic sequences at a higher
// granularity than the primary sequence
m_secondaryPeriodicLookups = new HashMap<Attribute, Map<String, String>>();
for (int i = 0; i < insts.numAttributes(); i++) {
if (insts.attribute(i).isNominal() && i != primaryIndex) {
Attribute candidate = insts.attribute(i);
Map<String, String> candidateMap = new HashMap<String, String>();
for (int j = 0; j < insts.numInstances(); j++) {
Instance current = insts.instance(j);
if (!Utils.isMissingValue(current.value(primaryIndex))
&& !Utils.isMissingValue(j)) {
String key = current.stringValue(primaryIndex);
String value = current.stringValue(j);
if (candidateMap.get(key) == null) {
candidateMap.put(key, value);
} else {
// check to see if this value is consistent with what
// we've seen previously
String previous = candidateMap.get(key);
if (!previous.equals(value)) {
// we need one unique value of the secondary to occur
// in conjunction for each primary (e.g. months of the year
// and quarters - each month is associated with only one
// quarter
// of the year)
candidateMap = null;
break;
}
}
}
}
if (candidateMap != null) {
m_secondaryPeriodicLookups.put(candidate, candidateMap);
}
}
}
}
}
}
private void setPeriodicValues(Instance inst) throws Exception {
if (m_primaryPeriodicName != null && m_primaryPeriodicName.length() > 0) {
int primaryIndex = m_originalHeader.attribute(m_primaryPeriodicName)
.index();
if (primaryIndex < 0) {
throw new Exception(
"Can't find the primary periodic variable in the data!");
}
// determine the next value in the sequence
double lastPeriodicIndex = m_lastHistoricInstance.value(primaryIndex);
if (!Utils.isMissingValue(lastPeriodicIndex)) {
String lastPeriodicValue = m_lastHistoricInstance
.stringValue(primaryIndex);
String successor = m_primaryPeriodicSequence.get(lastPeriodicValue);
if (successor != null) {
// newVals[primaryIndex] =
// m_originalHeader.attribute(primaryIndex).indexOfValue(successor);
inst.setValue(primaryIndex, m_originalHeader.attribute(primaryIndex)
.indexOfValue(successor));
// now we can look for secondary periodic attributes
if (m_secondaryPeriodicLookups != null) {
for (int i = 0; i < m_originalHeader.numAttributes(); i++) {
Attribute current = m_originalHeader.attribute(i);
Map<String, String> correspondingL = m_secondaryPeriodicLookups
.get(current);
if (correspondingL != null) {
String correspondingV = correspondingL.get(successor);
if (correspondingV != null) {
// newVals[i] =
// m_originalHeader.attribute(i).indexOfValue(correspondingV);
inst.setValue(i,
m_originalHeader.attribute(i)
.indexOfValue(correspondingV));
} else {
// Set a missing value
// newVals[i] = Utils.missingValue();
inst.setMissing(i);
}
}
}
}
} else {
// TODO
// We can either set a missing value here if we don't have a successor
// in the map
// or we can look at the order that the values are declared in the
// header for
// the primary periodic sequence and assume that this order is
// correct.
// newVals[primaryIndex] = Utils.missingValue();
inst.setMissing(primaryIndex);
}
} else {
// newVals[primaryIndex] = Utils.missingValue();
inst.setMissing(primaryIndex);
}
}
}
protected Instances removeExtraneousAttributes(Instances insts)
throws Exception {
int primaryIndex = -1;
String removeList = "";
if (m_primaryPeriodicName != null && m_primaryPeriodicName.length() > 0) {
primaryIndex = insts.attribute(m_primaryPeriodicName).index();
}
for (int i = 0; i < insts.numAttributes(); i++) {
if (i == primaryIndex) {
continue;
}
if (m_secondaryPeriodicLookups != null) {
if (m_secondaryPeriodicLookups.containsKey(insts.attribute(i))) {
continue;
}
}
boolean target = false;
for (String s : m_fieldsToLag) {
if (insts.attribute(i).name().equals(s)) {
target = true;
break;
}
}
if (target) {
continue;
}
if (m_overlayFields != null) {
boolean overlay = false;
for (String s : m_overlayFields) {
if (insts.attribute(i).name().equals(s)) {
overlay = true;
break;
}
}
if (overlay) {
continue;
}
}
if (m_adjustForTrends && m_timeStampName != null
&& m_timeStampName.length() > 0) {
if (i == insts.attribute(m_timeStampName).index()) {
continue;
}
}
// otherwise, this is some attribute that we are not predicting and
// wont be able to determine the value for when forecasting future
// instances. So we can't let the model use it.
removeList += "" + (i + 1) + ",";
}
if (removeList.length() > 0) {
removeList = removeList.substring(0, removeList.lastIndexOf(','));
m_extraneousAttributeRemover = new Remove();
m_extraneousAttributeRemover.setAttributeIndices(removeList);
m_extraneousAttributeRemover.setInputFormat(insts);
insts = Filter.useFilter(insts, m_extraneousAttributeRemover);
}
return insts;
}
/**
* Creates a transformed data set based on the user's settings
*
* @param insts the instances to transform
* @return a transformed data set
* @throws Exception if a problem occurs during the creation of lagged and
* auxiliary attributes.
*/
public Instances getTransformedData(Instances insts) throws Exception {
m_originalHeader = new Instances(insts, 0);
Instances result = insts;
m_lastHistoricInstance = result.instance(result.numInstances() - 1);
setupPeriodicMaps(result);
result = removeExtraneousAttributes(insts);
// m_lastArtificialTimeValue = -1;
m_lastTimeValue = -1;
if (m_adjustForTrends
&& (m_timeStampName == null || m_timeStampName.length() == 0 || insts
.attribute(m_timeStampName) == null)) {
// add an artificial time index. This will be problematic when
// using the built model to forecast for future time points that do
// not occur immediately after the last training event. Since the time
// index is artificial, all we can do for future predictions is assume
// that the n instances provided to the primeForecaster() method overlap
// the last n instances of the training data and that future predictions
// occur from the last known artificial time value + 1.
m_artificialTimeMaker = new AddID();
m_artificialTimeMaker.setAttributeName("ArtificialTimeIndex");
m_artificialTimeMaker.setIDIndex("last");
m_artificialTimeMaker.setInputFormat(result);
result = Filter.useFilter(result, m_artificialTimeMaker);
m_useArtificialTimeIndex = true;
m_timeStampName = "ArtificialTimeIndex";
/*
* m_lastArtificialTimeValue = result.instance(result.numInstances() -
* 1).value(result.numAttributes() - 1);
*/
/*
* m_lastTimeValue = result.instance(result.numInstances() -
* 1).value(result.numAttributes() - 1);
*/
} else {
m_useArtificialTimeIndex = false;
}
if (m_adjustForTrends) {
int timeStampIndex = result.attribute(m_timeStampName).index();
m_lastTimeValue = result.instance(result.numInstances() - 1).value(
timeStampIndex);
Instance last = result.instance(result.numInstances() - 1);
Instance secondToLast = result.instance(result.numInstances() - 2);
/*
* m_deltaTime = last.value(timeStampIndex) -
* secondToLast.value(timeStampIndex);
*/
result = setupDerivedPeriodics(result);
// remap timestamp if it is a date
result = createDateTimestampRemap(result);
}
result = createVarianceAdjusters(result);
result = createLags(result);
result = createAveragedLags(result);
result = createTimeIndexes(result);
if (m_includeTimeLagCrossProducts) {
result = createTimeLagCrossProducts(result);
}
// remove all instances with missing values at the
// start of the series?
if (m_deleteMissingFromStartOfSeries) {
int start = 0;
for (int i = 0; i <= m_maxLag; i++) {
boolean ok = true;
for (int j = 0; j < result.numAttributes(); j++) {
if (result.instance(i).isMissing(j)) {
ok = false;
break;
}
}
if (!ok) {
start++;
} else {
break;
}
}
System.err.println("******** Discarding " + start
+ " instances from the start.");
result = new Instances(result, start, result.numInstances() - start);
}
// System.err.println(result);
return result;
}
public Instance processInstance(Instance source, boolean incrementTime,
boolean setAnyPeriodic) throws Exception {
return processInstance(source, incrementTime, setAnyPeriodic, false);
}
public Instance processInstancePreview(Instance source,
boolean incrementTime, boolean setAnyPeriodic) throws Exception {
return processInstance(source, incrementTime, setAnyPeriodic, true);
}
/**
* Process an instance in the original format and produce a transformed
* instance as output. Assumes that the lag maker has been configured an
* initialized with a call to getTransformedDataset()
*
* @param source an instance in original format
* @param incrementTime true if any time stamp value should be incremented
* based on the time stamp value from the last instance seen and set
* in the outputted instance
* @param setAnyPeriodic true if any user-specified periodic value should be
* set in the transformed instance based on the value from the last
* instance seen.
* @return a transformed instance
* @throws Exception if something goes wrong.
*/
public Instance processInstance(Instance source, boolean incrementTime,
boolean setAnyPeriodic, boolean temporary) throws Exception {
String message = null;
if ((message = source.dataset().equalHeadersMsg(m_originalHeader)) != null) {
throw new Exception("[TSLagMaker] cannot process instance because the "
+ "structure\ndiffers from what we were configured with:\n\n"
+ message);
}
Instance result = source;
if (setAnyPeriodic) {
setPeriodicValues(result);
}
m_lastHistoricInstance = new DenseInstance(result);
m_lastHistoricInstance.setDataset(result.dataset());
if (m_extraneousAttributeRemover != null) {
m_extraneousAttributeRemover.input(result);
result = m_extraneousAttributeRemover.output();
}
if (m_artificialTimeMaker != null) {
m_artificialTimeMaker.input(result);
result = m_artificialTimeMaker.output();
// set the correct value here - it can't be done after the fact because
// of other filters that create the product of time and something else.
if (incrementTime) {
double newTime = m_lastTimeValue + 1;
int timeIndex = result.dataset().attribute(m_timeStampName).index();
result.setValue(timeIndex, newTime);
m_lastTimeValue = newTime;
}
} else {
// if we have a genuine time stamp field then make sure
// that we keep track of the most recent time value
if (m_adjustForTrends) {
int timeIndex = result.dataset().attribute(m_timeStampName).index();
if (incrementTime) {
double newTime = weka.classifiers.timeseries.core.Utils
.advanceSuppliedTimeValue(m_lastTimeValue, m_dateBasedPeriodicity);
// default to add the delta
/*
* double newTime = m_lastTimeValue +
* m_dateBasedPeriodicity.deltaTime();//m_deltaTime; Date d = new
* Date((long)m_lastTimeValue); Calendar c = new GregorianCalendar();
* c.setTime(d); if (m_dateBasedPeriodicity == Periodicity.MONTHLY) {
* c.add(Calendar.MONTH, 1); newTime = (double)c.getTimeInMillis(); }
* else if (m_dateBasedPeriodicity == Periodicity.WEEKLY) {
* c.add(Calendar.WEEK_OF_YEAR, 1); newTime =
* (double)c.getTimeInMillis(); } else if (m_dateBasedPeriodicity ==
* Periodicity.QUARTERLY) { c.add(Calendar.MONTH, 3); newTime =
* (double)c.getTimeInMillis(); } else if (m_dateBasedPeriodicity ==
* Periodicity.DAILY) { c.add(Calendar.DAY_OF_YEAR, 1); newTime =
* (double)c.getTimeInMillis(); }
*/
result.setValue(timeIndex, newTime);
// if (!temporary) {
m_lastTimeValue = newTime;
// }
} else {
// if (!temporary) {
// if we have a value, just store it
if (!result.isMissing(timeIndex)) {
m_lastTimeValue = result.value(timeIndex);
}/*
* else { System.err.println("*****WARNING missing time..."); }
*/
// }
}
// set any derived periodic values
if (m_derivedPeriodicMakers != null
&& m_derivedPeriodicMakers.size() > 0) {
for (Filter f : m_derivedPeriodicMakers) {
f.input(result);
result = f.output();
}
setDerivedPeriodicValues(result);
}
// remap the timestamp if necessary
result = remapDateTimeStamp(result);
}
}
if (m_adjustForVariance) {
for (Filter f : m_varianceAdjusters) {
f.input(result);
result = f.output();
}
}
for (Filter f : m_lagMakers) {
if (temporary && f instanceof TimeSeriesTranslate) {
result = ((TimeSeriesTranslate) f).inputOneTemporarily(result);
} else {
f.input(result);
result = f.output();
}
}
if (m_averagedLagMakers != null) {
for (Filter f : m_averagedLagMakers) {
f.input(result);
result = f.output();
}
}
if (m_timeIndexMakers != null) {
for (Filter f : m_timeIndexMakers) {
f.input(result);
result = f.output();
}
}
if (m_includeTimeLagCrossProducts && m_timeLagCrossProductMakers != null) {
for (Filter f : m_timeLagCrossProductMakers) {
f.input(result);
result = f.output();
}
}
return result;
}
/**
* Clears any history accumulated in the lag creating filters.
*
* @throws Exception if something goes wrong.
*/
public void clearLagHistories() throws Exception {
if (m_artificialTimeMaker != null) {
m_artificialTimeMaker.batchFinished();
}
for (Filter f : m_lagMakers) {
f.batchFinished();
}
if (m_averagedLagMakers != null) {
for (Filter f : m_averagedLagMakers) {
f.batchFinished();
}
}
if (m_timeIndexMakers != null) {
for (Filter f : m_timeIndexMakers) {
f.batchFinished();
}
}
if (m_includeTimeLagCrossProducts && m_timeLagCrossProductMakers != null) {
for (Filter f : m_timeLagCrossProductMakers) {
f.batchFinished();
}
}
}
/**
* Utility method to advance a supplied time value by one unit according to
* the periodicity set for this LagMaker.
*
* @param valueToAdvance the time value to advance
* @return the advanced value or the original value if this lag maker is not
* adjusting for trends
*/
public double advanceSuppliedTimeValue(double valueToAdvance) {
return weka.classifiers.timeseries.core.Utils.advanceSuppliedTimeValue(
valueToAdvance, m_dateBasedPeriodicity);
}
public double decrementSuppliedTimeValue(double valueToDecrement) {
return weka.classifiers.timeseries.core.Utils.decrementSuppliedTimeValue(
valueToDecrement, m_dateBasedPeriodicity);
}
}