/*
* copyright: Anthony Bagnall
* NOTE: As shapelet extraction can be time consuming, there is an option to output shapelets
* to a text file (Default location is in the root dir of the project, file name "defaultShapeletOutput.txt").
*
* Default settings are TO NOT PRODUCE OUTPUT FILE - unless file name is changed, each successive filter will
* overwrite the output (see "setLogOutputFile(String fileName)" to change file dir and name).
*
* To reconstruct a filter from this output, please see the method "createFilterFromFile(String fileName)".
*/
package weka.filters.timeseries.shapelet_transforms;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.ListIterator;
import java.util.Scanner;
import java.util.TreeMap;
import weka.core.*;
import weka.core.shapelet.*;
/**
* A filter to transform a dataset by k shapelets. Once built on a training set,
* the filter can be used to transform subsequent datasets using the extracted
* shapelets.
* <p>
* See <a
* href="http://delivery.acm.org/10.1145/2340000/2339579/p289-lines.pdf?ip=139.222.14.198&acc=ACTIVE%20SERVICE&CFID=221649628&CFTOKEN=31860141&__acm__=1354814450_3dacfa9c5af84445ea2bfd7cc48180c8">Lines,
* J., Davis, L., Hills, J., Bagnall, A.: A shapelet transform for time series
* classification. In: Proc. 18th ACM SIGKDD (2012)</a>
*
* @author Jason Lines
*/
public class FullShapeletTransform2 extends FullShapeletTransform
{
protected boolean cacheDoubleArrays = false;
protected double[][] cachedDoubleArray;
//Variables for experiments
protected static long subseqDistOpCount;
protected TreeMap<Double, Integer> classDistributions;
/**
* Default constructor; Quality measure defaults to information gain.
*/
public FullShapeletTransform2()
{
this(DEFAULT_NUMSHAPELETS, DEFAULT_MINSHAPELETLENGTH, DEFAULT_MAXSHAPELETLENGTH, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN);
}
/**
* Constructor for generating a shapelet transform from an ArrayList of
* Shapelets.
*
* @param shapes
*/
public FullShapeletTransform2(ArrayList<Shapelet> shapes)
{
this();
this.shapelets = shapes;
this.shapeletsTrained = true;
this.numShapelets = shapelets.size();
}
/**
* Single param constructor: Quality measure defaults to information gain.
*
* @param k the number of shapelets to be generated
*/
public FullShapeletTransform2(int k)
{
this(k, DEFAULT_MINSHAPELETLENGTH, DEFAULT_MAXSHAPELETLENGTH, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN);
}
/**
* Full constructor to create a usable filter. Quality measure defaults to
* information gain.
*
* @param k the number of shapelets to be generated
* @param minShapeletLength minimum length of shapelets
* @param maxShapeletLength maximum length of shapelets
*/
public FullShapeletTransform2(int k, int minShapeletLength, int maxShapeletLength)
{
this(k, minShapeletLength, maxShapeletLength, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN);
}
/**
* Full, exhaustive, constructor for a filter. Quality measure set via enum,
* invalid selection defaults to information gain.
*
* @param k the number of shapelets to be generated
* @param minShapeletLength minimum length of shapelets
* @param maxShapeletLength maximum length of shapelets
* @param qualityChoice the shapelet quality measure to be used with this
* filter
*/
public FullShapeletTransform2(int k, int minShapeletLength, int maxShapeletLength, weka.core.shapelet.QualityMeasures.ShapeletQualityChoice qualityChoice)
{
this.minShapeletLength = minShapeletLength;
this.maxShapeletLength = maxShapeletLength;
this.numShapelets = k;
this.shapelets = new ArrayList<>();
this.shapeletsTrained = false;
this.useCandidatePruning = false;
this.qualityChoice = qualityChoice;
setQualityMeasure(qualityChoice);
}
/**
*
* @param f
*/
@Override
public void setCandidatePruning(boolean f)
{
this.useCandidatePruning = f;
this.candidatePruningStartPercentage = f ? 10 : 100;
}
/**
* Sets the format of the filtered instances that are output. I.e. will
* include k attributes each shapelet distance and a class value
*
* @param inputFormat the format of the input data
* @return a new Instances object in the desired output format
*/
//TODO: Fix depecrated FastVector
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws IllegalArgumentException
{
if (this.numShapelets < 1)
{
throw new IllegalArgumentException("ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1");
}
//Set up instances size and format.
//int length = this.numShapelets;
int length = this.shapelets.size();
FastVector atts = new FastVector();
String name;
for (int i = 0; i < length; i++)
{
name = "Shapelet_" + i;
atts.addElement(new Attribute(name));
}
if (inputFormat.classIndex() >= 0)
{
//Classification set, set class
//Get the class values as a fast vector
Attribute target = inputFormat.attribute(inputFormat.classIndex());
FastVector vals = new FastVector(target.numValues());
for (int i = 0; i < target.numValues(); i++)
{
vals.addElement(target.value(i));
}
atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
}
Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances());
if (inputFormat.classIndex() >= 0)
{
result.setClassIndex(result.numAttributes() - 1);
}
return result;
}
protected void inputCheck(Instances dataInst) throws IllegalArgumentException
{
if (numShapelets < 1)
{
throw new IllegalArgumentException("Number of shapelets initialised incorrectly - please select value of k (Usage: setNumberOfShapelets");
}
int maxPossibleLength;
maxPossibleLength = dataInst.instance(0).numAttributes();
if (dataInst.classIndex() >= 0)
{
maxPossibleLength -= 1;
}
if (minShapeletLength < 1 || maxShapeletLength < 1 || maxShapeletLength < minShapeletLength || maxShapeletLength > maxPossibleLength)
{
throw new IllegalArgumentException("Shapelet length parameters initialised incorrectly");
}
}
/**
* The main logic of the filter; when called for the first time, k shapelets
* are extracted from the input Instances 'data'. The input 'data' is
* transformed by the k shapelets, and the filtered data is returned as an
* output.
* <p>
* If called multiple times, shapelet extraction DOES NOT take place again;
* once k shapelets are established from the initial call to process(), the
* k shapelets are used to transform subsequent Instances.
* <p>
* Intended use:
* <p>
* 1. Extract k shapelets from raw training data to build filter;
* <p>
* 2. Use the filter to transform the raw training data into transformed
* training data;
* <p>
* 3. Use the filter to transform the raw testing data into transformed
* testing data (e.g. filter never extracts shapelets from training data,
* therefore avoiding bias);
* <p>
* 4. Build a classifier using transformed training data, perform
* classification on transformed test data.
*
* @param data the input data to be transformed (and to find the shapelets
* if this is the first run)
* @return the transformed representation of data, according to the
* distances from each instance to each of the k shapelets
*/
@Override
public Instances process(Instances data) throws IllegalArgumentException
{
//check the input data is correct and assess whether the filter has been setup correctly.
inputCheck(data);
//instantiate the caching array here, so it gets refreshed if we're using a test set.
int dataSize = data.numInstances();
if(cacheDoubleArrays)
cachedDoubleArray = new double[dataSize][];
//checks if the shapelets haven't been found yet, finds them if it needs too.
if (!shapeletsTrained)
trainShapelets(data);
//build the transformed dataset with the shapelets we've found either on this data, or the previous training data
return buildTansformedDataset(data);
}
protected void trainShapelets(Instances data)
{
int dataSize = data.numInstances();
// shapelets discovery has not yet been caried out, so this must be training data
dataSourceIDs = new int[dataSize];
if (roundRobin)
{
//Reorder the data in round robin order
data = roundRobinData(data, dataSourceIDs);
}
else
{
for (int i = 0; i < dataSize; i++)
{
dataSourceIDs[i] = i;
}
}
shapelets = findBestKShapeletsCache(data); // get k shapelets
shapeletsTrained = true;
outputPrint(shapelets.size() + " Shapelets have been generated");
//Reorder the training data and reset the shapelet indexes
if (roundRobin)
{
resetDataOrder(data, dataSourceIDs);
resetShapeletIndices(shapelets, dataSourceIDs);
}
}
protected Instances buildTansformedDataset(Instances data)
{
Instances output = determineOutputFormat(data);
int dataSize = data.numInstances();
// for each data, get distance to each shapelet and create new instance
for (int i = 0; i < dataSize; i++)
{ // for each data
Instance toAdd = new DenseInstance(shapelets.size() + 1);
int shapeletNum = 0;
for (Shapelet s : shapelets)
{
double dist = subsequenceDistance(s.content, getToDoubleArrayOfInstance(data, i));
toAdd.setValue(shapeletNum++, dist);
}
toAdd.setValue(shapelets.size(), data.instance(i).classValue());
output.add(toAdd);
}
return output;
}
/**
* protected method for extracting k shapelets.
*
* @param data the data that the shapelets will be taken from
* @return an ArrayList of FullShapeletTransform objects in order of their
* fitness (by infoGain, seperationGap then shortest length)
*/
public ArrayList<Shapelet> findBestKShapeletsCache(Instances data)
{
ArrayList<Shapelet> kShapelets = new ArrayList<>();
ArrayList<Shapelet> seriesShapelets; // temp store of all shapelets for each time series
classDistributions = getClassDistributions(data); // used to calc info gain
//for all time series
outputPrint("Processing data: ");
int dataSize = data.numInstances();
//for all possible time series.
for (int i = 0; i < dataSize; i++)
{
outputPrint("data : " + i);
double[] wholeCandidate = getToDoubleArrayOfInstance(data, i);
seriesShapelets = findShapeletCandidates(data, i, wholeCandidate, kShapelets);
//START TEST
Comparator comp = useSeparationGap ? new Shapelet.ReverseSeparationGap(): new Shapelet.ReverseOrder();
Collections.sort(seriesShapelets,comp);
//END TEST
seriesShapelets = removeSelfSimilar(seriesShapelets);
//kShapelets = sortedCombine(numShapelets, kShapelets, seriesShapelets);
//test
kShapelets = combine(numShapelets, kShapelets, seriesShapelets);
}
this.numShapelets = kShapelets.size();
recordShapelets(kShapelets);
printShapelets(kShapelets);
return kShapelets;
}
protected ArrayList<Shapelet> findShapeletCandidates(Instances data, int i, double[] wholeCandidate, ArrayList<Shapelet> kShapelets)
{
//get our time series as a double array.
ArrayList<Shapelet> seriesShapelets = new ArrayList<>();
//for all possible lengths
for (int length = minShapeletLength; length <= maxShapeletLength; length++)
{
double[] candidate = new double[length];
//for all possible starting positions of that length
for (int start = 0; start <= wholeCandidate.length - length - 1; start++)
{
//-1 = avoid classVal - handle later for series with no class val
// CANDIDATE ESTABLISHED - got original series, length and starting position
// extract relevant part into a double[] for processing
System.arraycopy(wholeCandidate, start, candidate, 0, length);
// znorm candidate here so it's only done once, rather than in each distance calculation
candidate = zNorm(candidate, false);
//Initialize bounding algorithm for current candidate
QualityBound.ShapeletQualityBound qualityBound = initializeQualityBound(classDistributions);
//Set bound of the bounding algorithm
if (qualityBound != null && kShapelets.size() == numShapelets)
{
qualityBound.setBsfQuality(kShapelets.get(numShapelets - 1).qualityValue);
}
//compare the shapelet candidate to the other time series.
Shapelet candidateShapelet = checkCandidate(candidate, data, i, start, qualityBound);
if(candidateShapelet != null)
seriesShapelets.add(candidateShapelet);
}
}
return seriesShapelets;
}
protected void recordShapelets(ArrayList<Shapelet> kShapelets)
{
if (this.recordShapelets)
{
try
{
//just in case the file doesn't exist or the directories.
File file = new File(this.ouputFileLocation);
file.getParentFile().mkdirs();
FileWriter out = new FileWriter(file);
for (Shapelet kShapelet : kShapelets)
{
out.append(kShapelet.qualityValue + "," + kShapelet.seriesId + "," + kShapelet.startPos + "\n");
double[] shapeletContent = kShapelet.content;
for (int j = 0; j < shapeletContent.length; j++)
{
out.append(shapeletContent[j] + ",");
}
out.append("\n");
}
out.close();
}
catch(IOException ex)
{
System.out.println("IOException: " + ex);
}
}
}
protected void printShapelets(ArrayList<Shapelet> kShapelets)
{
if (!supressOutput)
{
System.out.println();
System.out.println("Output Shapelets:");
System.out.println("-------------------");
System.out.println("informationGain,seriesId,startPos");
System.out.println("<shapelet>");
System.out.println("-------------------");
System.out.println();
for (Shapelet kShapelet : kShapelets)
{
System.out.println(kShapelet.qualityValue + "," + kShapelet.seriesId + "," + kShapelet.startPos);
double[] shapeletContent = kShapelet.content;
for (int j = 0; j < shapeletContent.length; j++)
{
System.out.print(shapeletContent[j] + ",");
}
System.out.println();
}
}
}
/**
* Private method to combine two ArrayList collections of FullShapeletTransform objects.
*
* @param k the maximum number of shapelets to be returned after combining the two lists
* @param kBestSoFar the (up to) k best shapelets that have been observed so far, passed in to combine with shapelets from a new series
* @param timeSeriesShapelets the shapelets taken from a new series that are to be merged in descending order of fitness with the kBestSoFar
* @return an ordered ArrayList of the best k (or less) FullShapeletTransform objects from the union of the input ArrayLists
*/
//NOTE: could be more efficient here
@Override
protected ArrayList<Shapelet> combine(int k, ArrayList<Shapelet> kBestSoFar, ArrayList<Shapelet> timeSeriesShapelets){
ArrayList<Shapelet> newBestSoFar = new ArrayList<>();
kBestSoFar.addAll(timeSeriesShapelets);
Comparator comp = useSeparationGap ? new Shapelet.ReverseSeparationGap(): new Shapelet.ReverseOrder();
Collections.sort(kBestSoFar,comp);
if(kBestSoFar.size()<k) { // no need to return up to k, as there are not k shapelets yet
return kBestSoFar;
}
for(int i = 0; i < k; i++){
newBestSoFar.add(kBestSoFar.get(i));
}
return newBestSoFar;
}
//this is the caching system.
protected double[] getToDoubleArrayOfInstance(Instances data, int pos)
{
if(!cacheDoubleArrays)
return data.get(pos).toDoubleArray();
if(cachedDoubleArray[pos] == null)
cachedDoubleArray[pos] = data.get(pos).toDoubleArray();
return cachedDoubleArray[pos];
}
/**
* protected method to remove self-similar shapelets from an ArrayList (i.e.
* if they come from the same series and have overlapping indicies)
*
* @param shapelets the input Shapelets to remove self similar
* FullShapeletTransform objects from
* @return a copy of the input ArrayList with self-similar shapelets removed
*/
protected static ArrayList<Shapelet> removeSelfSimilar(ArrayList<Shapelet> shapelets)
{
// return a new pruned array list - more efficient than removing
// self-similar entries on the fly and constantly reindexing
ArrayList<Shapelet> outputShapelets = new ArrayList<>();
int size = shapelets.size();
boolean[] selfSimilar = new boolean[size];
for (int i = 0; i < size; i++)
{
if (selfSimilar[i])
continue;
outputShapelets.add(shapelets.get(i));
for (int j = i + 1; j < size; j++)
{
// no point recalc'ing if already self similar to something
if ((!selfSimilar[j]) && selfSimilarity(shapelets.get(i), shapelets.get(j)))
selfSimilar[j] = true;
}
}
return outputShapelets;
}
/**
* Private method to calculate the class distributions of a dataset. Main
* purpose is for computing shapelet qualities.
*
* @param data the input data set that the class distributions are to be
* derived from
* @return a TreeMap<Double, Integer> in the form of <Class Value,
* Frequency>
*/
public static TreeMap<Double, Integer> getClassDistributions(Instances data)
{
TreeMap<Double, Integer> classDistribution = new TreeMap<>();
ListIterator<Instance> it = data.listIterator();
double classValue;
while (it.hasNext())
{
classValue = it.next().classValue();
Integer val = classDistribution.get(classValue);
val = (val != null) ? val+1 : 1;
classDistribution.put(classValue, val);
}
return classDistribution;
}
/**
* protected method to check a candidate shapelet. Functions by passing in
* the raw data, and returning an assessed Shapelet object.
*
* @param candidate the data from the candidate FullShapeletTransform
* @param data the entire data set to compare the candidate to
* @param seriesId series id from the dataset that the candidate came from
* @param startPos start position in the series where the candidate came
* from
* @param qualityBound
* @return a fully-computed FullShapeletTransform, including the quality of
* this candidate
*/
protected Shapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos, QualityBound.ShapeletQualityBound qualityBound)
{
// create orderline by looping through data set and calculating the subsequence
// distance from candidate to all data, inserting in order.
ArrayList<OrderLineObj> orderline = new ArrayList<>();
boolean pruned = false;
int dataSize = data.numInstances();
for (int i = 0; i < dataSize; i++)
{
//Check if it is possible to prune the candidate
if (qualityBound != null && qualityBound.pruneCandidate())
{
pruned = true;
break;
}
double distance = 0.0;
//don't compare the shapelet to the the time series it came from.
if (i != seriesId)
{
distance = subsequenceDistance(candidate, getToDoubleArrayOfInstance(data, i));
}
double classVal = data.instance(i).classValue();
// without early abandon, it is faster to just add and sort at the end
orderline.add(new OrderLineObj(distance, classVal));
//Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure.
if (qualityBound != null)
{
qualityBound.updateOrderLine(orderline.get(orderline.size() - 1));
}
}
// note: early abandon entropy pruning would appear here, but has been ommitted
// in favour of a clear multi-class information gain calculation. Could be added in
// this method in the future for speed up, but distance early abandon is more important
// If shapelet is pruned then it should no longer be considered in further processing
if (!pruned)
{
// create a shapelet object to store all necessary info, i.e.
Shapelet shapelet = new Shapelet(candidate, dataSourceIDs[seriesId], startPos, this.qualityMeasure);
shapelet.calculateQuality(orderline, classDistributions);
return shapelet;
}
return null;
}
public static double[] getInfoGain(Instances trans)
{
double[] quals = new double[trans.numAttributes() - 1];
TreeMap map = getClassDistributions(trans);
for (int i = 0; i < quals.length; i++)
{
ArrayList<OrderLineObj> orderline = new ArrayList<>();
double[] dists = trans.attributeToDoubleArray(i);
for (int j = 0; j < dists.length; j++)
{
double distance = dists[j];
double classVal = trans.instance(j).classValue();
orderline.add(new OrderLineObj(distance, classVal));
}
QualityMeasures.InformationGain ig = new QualityMeasures.InformationGain();
double qual = ig.calculateQuality(orderline, map);
quals[i] = qual;
}
return quals;
}
/**
* Calculate the distance between a candidate series and an Instance object
*
* @param candidate a double[] representation of a shapelet candidate
* @param timeSeriesIns an Instance object of a whole time series
* @return the distance between a candidate and a time series
*/
@Override
protected double subseqDistance(double[] candidate, Instance timeSeriesIns)
{
return subsequenceDistance(candidate, timeSeriesIns.toDoubleArray());
}
/**
* Calculate the distance between a shapelet candidate and a full time
* series (both double[]).
*
* @param candidate a double[] representation of a shapelet candidate
* @param timeSeries a double[] representation of a whole time series (inc.
* class value)
* @return the distance between a candidate and a time series
*/
public static double subsequenceDistance(double[] candidate, double[] timeSeries)
{
double bestSum = Double.MAX_VALUE;
double sum;
double[] subseq;
double temp;
// for all possible subsequences of two
for (int i = 0; i < timeSeries.length - candidate.length; i++)
{
sum = 0;
// get subsequence of two that is the same lengh as one
subseq = new double[candidate.length];
System.arraycopy(timeSeries, i, subseq, 0, candidate.length);
subseqDistOpCount+=candidate.length;
subseq = zNormalise(subseq, false); // Z-NORM HERE
//Keep count of fundamental ops for experiment
subseqDistOpCount += 3 * subseq.length;
for (int j = 0; j < candidate.length; j++)
{
temp = (candidate[j] - subseq[j]);
sum += temp * temp;
}
subseqDistOpCount+=candidate.length;
if (sum < bestSum)
{
bestSum = sum;
}
}
return (bestSum == 0.0) ? 0.0 : (1.0 / candidate.length * bestSum);
}
/**
*
* @param input
* @param classValOn
* @return
*/
@Override
protected double[] zNorm(double[] input, boolean classValOn)
{
return FullShapeletTransform2.zNormalise(input, classValOn);
}
/**
* Z-Normalise a time series
*
* @param input the input time series to be z-normalised
* @param classValOn specify whether the time series includes a class value
* (e.g. an full instance might, a candidate shapelet wouldn't)
* @return a z-normalised version of input
*/
public static double[] zNormalise(double[] input, boolean classValOn)
{
double mean;
double stdv;
int classValPenalty = classValOn ? 1 : 0;
int inputLength = input.length - classValPenalty;
double[] output = new double[input.length];
double seriesTotal = 0;
for (int i = 0; i < inputLength; i++)
{
seriesTotal += input[i];
}
mean = seriesTotal / (double) inputLength;
stdv = 0;
double temp;
for (int i = 0; i < inputLength; i++)
{
temp = (input[i] - mean);
stdv += temp * temp;
}
stdv /= (double) inputLength;
// if the variance is less than the error correction, just set it to 0, else calc stdv.
stdv = (stdv < ROUNDING_ERROR_CORRECTION) ? 0.0 : Math.sqrt(stdv);
for (int i = 0; i < inputLength; i++)
{
//if the stdv is 0 then set to 0, else normalise.
output[i] = (stdv == 0.0) ? 0.0 : ((input[i] - mean) / stdv);
}
if (classValOn)
{
output[output.length - 1] = input[input.length - 1];
}
return output;
}
/**
* Load a set of Instances from an ARFF
*
* @param fileName the file name of the ARFF
* @return a set of Instances from the ARFF
*/
public static Instances loadData(String fileName)
{
Instances data = null;
try
{
FileReader r;
r = new FileReader(fileName);
data = new Instances(r);
data.setClassIndex(data.numAttributes() - 1);
}
catch (IOException e)
{
System.out.println(" Error =" + e + " in method loadData");
}
return data;
}
/**
* A private method to assess the self similarity of two
* FullShapeletTransform objects (i.e. whether they have overlapping
* indicies and are taken from the same time series).
*
* @param shapelet the first FullShapeletTransform object (in practice, this
* will be the dominant shapelet with quality >= candidate)
* @param candidate the second FullShapeletTransform
* @return
*/
private static boolean selfSimilarity(Shapelet shapelet, Shapelet candidate)
{
if (candidate.seriesId == shapelet.seriesId)
{
if (candidate.startPos >= shapelet.startPos && candidate.startPos < shapelet.startPos + shapelet.content.length)
{ //candidate starts within exisiting shapelet
return true;
}
if (shapelet.startPos >= candidate.startPos && shapelet.startPos < candidate.startPos + candidate.content.length)
{
return true;
}
}
return false;
}
/**
* A method to read in a FullShapeletTransform log file to reproduce a
* FullShapeletTransform
* <p>
* NOTE: assumes shapelets from log are Z-NORMALISED
*
* @param fileName the name and path of the log file
* @return a duplicate FullShapeletTransform to the object that created the
* original log file
* @throws Exception
*/
public static FullShapeletTransform createFilterFromFile(String fileName) throws Exception
{
return createFilterFromFile(fileName, Integer.MAX_VALUE);
}
/**
* Returns a list of the lengths of the shapelets found by this transform.
*
* @return An ArrayList of Integers representing the lengths of the
* shapelets.
*/
@Override
public ArrayList<Integer> getShapeletLengths()
{
ArrayList<Integer> shapeletLengths = new ArrayList<>();
if (this.shapeletsTrained)
{
for (Shapelet s : this.shapelets)
{
shapeletLengths.add(s.content.length);
}
}
return shapeletLengths;
}
/**
* A method to read in a FullShapeletTransform log file to reproduce a
* FullShapeletTransform,
* <p>
* NOTE: assumes shapelets from log are Z-NORMALISED
*
* @param fileName the name and path of the log file
* @param maxShapelets
* @return a duplicate FullShapeletTransform to the object that created the
* original log file
* @throws Exception
*/
public static FullShapeletTransform createFilterFromFile(String fileName, int maxShapelets) throws Exception
{
File input = new File(fileName);
Scanner scan = new Scanner(input);
scan.useDelimiter("\n");
FullShapeletTransform sf = new FullShapeletTransform();
ArrayList<Shapelet> shapelets = new ArrayList<>();
String shapeletContentString;
String shapeletStatsString;
ArrayList<Double> content;
double[] contentArray;
Scanner lineScan;
Scanner statScan;
double qualVal;
int serID;
int starPos;
int shapeletCount = 0;
while (shapeletCount < maxShapelets && scan.hasNext())
{
shapeletStatsString = scan.next();
shapeletContentString = scan.next();
//Get the shapelet stats
statScan = new Scanner(shapeletStatsString);
statScan.useDelimiter(",");
qualVal = Double.parseDouble(statScan.next().trim());
serID = Integer.parseInt(statScan.next().trim());
starPos = Integer.parseInt(statScan.next().trim());
//End of shapelet stats
lineScan = new Scanner(shapeletContentString);
// System.out.println(shapeletContentString);
lineScan.useDelimiter(",");
content = new ArrayList<>();
while (lineScan.hasNext())
{
String next = lineScan.next().trim();
if (!next.isEmpty())
{
content.add(Double.parseDouble(next));
}
}
contentArray = new double[content.size()];
for (int i = 0; i < content.size(); i++)
{
contentArray[i] = content.get(i);
}
contentArray = zNormalise(contentArray, false);
Shapelet s = new Shapelet(contentArray, qualVal, serID, starPos);
shapelets.add(s);
shapeletCount++;
}
sf.shapelets = shapelets;
sf.shapeletsTrained = true;
sf.numShapelets = shapelets.size();
sf.setShapeletMinAndMax(1, 1);
return sf;
}
/**
* Outputs the log file to the appropriate location.
*
* @throws Exception
*/
@Override
public void outputLog() throws Exception
{
//just in case the file doesn't exist, or the directories.
File file = new File(this.ouputFileLocation);
file.getParentFile().mkdirs();
FileWriter out = new FileWriter(this.ouputFileLocation, file.exists());
for (Shapelet shapelet : this.shapelets)
{
out.append(shapelet.qualityValue + "," + shapelet.seriesId + "," + shapelet.startPos + "\n");
double[] shapeletContent = shapelet.content;
for (int j = 0; j < shapeletContent.length; j++)
{
out.append(shapeletContent[j] + ",");
}
out.append("\n");
}
out.close();
}
/**
* Method to reset shapelet indices into the values given in sourcePos
*
* @param shapelets
* @param sourcePos Pointer to array of ints, where old positions of
* instances are to be stored.
*/
public static void resetShapeletIndices(ArrayList<Shapelet> shapelets, int[] sourcePos)
{
for (Shapelet s : shapelets)
{
int pos = s.getSeriesId();
s.setSeriesID(sourcePos[pos]);
}
}
/**
* Method to reorder the given Instances into the order given in sourcePos
*
* @param data Instances to be reordered
* @param sourcePos Pointer to array of ints, where old positions of
* instances are to be stored.
*/
public static void resetDataOrder(Instances data, int[] sourcePos)
{
int dataSize = data.numInstances();
if (dataSize != sourcePos.length)
{//ERROR
System.out.println(" ERROR, cannot reorder, because the series are different lengths");
return;
}
Instance[] newOrder = new Instance[sourcePos.length];
for (int i = 0; i < sourcePos.length; i++)
{
newOrder[sourcePos[i]] = data.instance(i);
}
for (int i = 0; i < dataSize; i++)
{
data.set(i, newOrder[i]);
}
}
/**
* Method to reorder the given Instances in round robin order
*
* @param data Instances to be reordered
* @param sourcePos Pointer to array of ints, where old positions of
* instances are to be stored.
* @return Instances in round robin order
*/
public static Instances roundRobinData(Instances data, int[] sourcePos)
{
//Count number of classes
TreeMap<Double, ArrayList<Instance>> instancesByClass = new TreeMap<>();
TreeMap<Double, ArrayList<Integer>> positionsByClass = new TreeMap<>();
//Get class distributions
TreeMap<Double, Integer> classDistribution = FullShapeletTransform2.getClassDistributions(data);
//Allocate arrays for instances of every class
for (Double key : classDistribution.keySet())
{
int frequency = classDistribution.get(key);
instancesByClass.put(key, new ArrayList<Instance>(frequency));
positionsByClass.put(key, new ArrayList<Integer>(frequency));
}
int dataSize = data.numInstances();
//Split data according to their class memebership
for (int i = 0; i < dataSize; i++)
{
Instance inst = data.instance(i);
instancesByClass.get(inst.classValue()).add(inst);
positionsByClass.get(inst.classValue()).add(i);
}
//Merge data into single list in round robin order
Instances roundRobinData = new Instances(data, dataSize);
for (int i = 0; i < dataSize;)
{
//Allocate arrays for instances of every class
for (Double key : classDistribution.keySet())
{
ArrayList<Instance> currentList = instancesByClass.get(key);
ArrayList<Integer> currentPositions = positionsByClass.get(key);
if (!currentList.isEmpty())
{
roundRobinData.add(currentList.remove(currentList.size() - 1));
if (sourcePos != null && sourcePos.length == dataSize)
{
sourcePos[i] = currentPositions.remove(currentPositions.size() - 1);
}
i++;
}
}
}
return roundRobinData;
}
public void outputPrint(String val)
{
if(!this.supressOutput)
System.out.println(val);
}
@Override
public String toString()
{
String str = "Shapelets: ";
for (Shapelet s : shapelets)
{
str += s.toString() + "\n";
}
return str;
}
/**
* An example use of a FullShapeletTransform
*
* @param args command line args. arg[0] should spcify a set of training
* instances to transform
*/
public static void main(String[] args)
{
try
{
// mandatory requirements: numShapelets (k), min shapelet length, max shapelet length, input data
// additional information: log output dir
// example filter, k = 10, minLength = 20, maxLength = 40, data = , output = exampleOutput.txt
int k = 10;
int minLength = 10;
int maxLength = 20;
// Instances data= FullShapeletTransform2.loadData("ItalyPowerDemand_TRAIN.arff"); // for example
Instances data = FullShapeletTransform2.loadData(args[0]);
FullShapeletTransform sf = new FullShapeletTransform(k, minLength, maxLength);
sf.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN);
sf.setLogOutputFile("exampleOutput.txt"); // log file stores shapelet output
// Note: sf.process returns a transformed set of Instances. The first time that
// thisFilter.process(data) is called, shapelet extraction occurs. Subsequent calls to process
// uses the previously extracted shapelets to transform the data. For example:
//
// Instances transformedTrain = sf.process(trainingData); -> extracts shapelets and can be used to transform training data
// Instances transformedTest = sf.process(testData); -> uses shapelets extracted from trainingData to transform testData
Instances transformed = sf.process(data);
}
catch (Exception e)
{
e.printStackTrace();
}
}
}