/* * copyright: Anthony Bagnall * NOTE: As shapelet extraction can be time consuming, there is an option to output shapelets * to a text file (Default location is in the root dir of the project, file name "defaultShapeletOutput.txt"). * * Default settings are TO NOT PRODUCE OUTPUT FILE - unless file name is changed, each successive filter will * overwrite the output (see "setLogOutputFile(String fileName)" to change file dir and name). * * To reconstruct a filter from this output, please see the method "createFilterFromFile(String fileName)". */ package weka.filters.timeseries.shapelet_transforms; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.ListIterator; import java.util.Scanner; import java.util.TreeMap; import weka.core.*; import weka.core.shapelet.*; /** * A filter to transform a dataset by k shapelets. Once built on a training set, * the filter can be used to transform subsequent datasets using the extracted * shapelets. * <p> * See <a * href="http://delivery.acm.org/10.1145/2340000/2339579/p289-lines.pdf?ip=139.222.14.198&acc=ACTIVE%20SERVICE&CFID=221649628&CFTOKEN=31860141&__acm__=1354814450_3dacfa9c5af84445ea2bfd7cc48180c8">Lines, * J., Davis, L., Hills, J., Bagnall, A.: A shapelet transform for time series * classification. In: Proc. 18th ACM SIGKDD (2012)</a> * * @author Jason Lines */ public class FullShapeletTransform2 extends FullShapeletTransform { protected boolean cacheDoubleArrays = false; protected double[][] cachedDoubleArray; //Variables for experiments protected static long subseqDistOpCount; protected TreeMap<Double, Integer> classDistributions; /** * Default constructor; Quality measure defaults to information gain. */ public FullShapeletTransform2() { this(DEFAULT_NUMSHAPELETS, DEFAULT_MINSHAPELETLENGTH, DEFAULT_MAXSHAPELETLENGTH, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); } /** * Constructor for generating a shapelet transform from an ArrayList of * Shapelets. * * @param shapes */ public FullShapeletTransform2(ArrayList<Shapelet> shapes) { this(); this.shapelets = shapes; this.shapeletsTrained = true; this.numShapelets = shapelets.size(); } /** * Single param constructor: Quality measure defaults to information gain. * * @param k the number of shapelets to be generated */ public FullShapeletTransform2(int k) { this(k, DEFAULT_MINSHAPELETLENGTH, DEFAULT_MAXSHAPELETLENGTH, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); } /** * Full constructor to create a usable filter. Quality measure defaults to * information gain. * * @param k the number of shapelets to be generated * @param minShapeletLength minimum length of shapelets * @param maxShapeletLength maximum length of shapelets */ public FullShapeletTransform2(int k, int minShapeletLength, int maxShapeletLength) { this(k, minShapeletLength, maxShapeletLength, QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); } /** * Full, exhaustive, constructor for a filter. Quality measure set via enum, * invalid selection defaults to information gain. * * @param k the number of shapelets to be generated * @param minShapeletLength minimum length of shapelets * @param maxShapeletLength maximum length of shapelets * @param qualityChoice the shapelet quality measure to be used with this * filter */ public FullShapeletTransform2(int k, int minShapeletLength, int maxShapeletLength, weka.core.shapelet.QualityMeasures.ShapeletQualityChoice qualityChoice) { this.minShapeletLength = minShapeletLength; this.maxShapeletLength = maxShapeletLength; this.numShapelets = k; this.shapelets = new ArrayList<>(); this.shapeletsTrained = false; this.useCandidatePruning = false; this.qualityChoice = qualityChoice; setQualityMeasure(qualityChoice); } /** * * @param f */ @Override public void setCandidatePruning(boolean f) { this.useCandidatePruning = f; this.candidatePruningStartPercentage = f ? 10 : 100; } /** * Sets the format of the filtered instances that are output. I.e. will * include k attributes each shapelet distance and a class value * * @param inputFormat the format of the input data * @return a new Instances object in the desired output format */ //TODO: Fix depecrated FastVector @Override protected Instances determineOutputFormat(Instances inputFormat) throws IllegalArgumentException { if (this.numShapelets < 1) { throw new IllegalArgumentException("ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1"); } //Set up instances size and format. //int length = this.numShapelets; int length = this.shapelets.size(); FastVector atts = new FastVector(); String name; for (int i = 0; i < length; i++) { name = "Shapelet_" + i; atts.addElement(new Attribute(name)); } if (inputFormat.classIndex() >= 0) { //Classification set, set class //Get the class values as a fast vector Attribute target = inputFormat.attribute(inputFormat.classIndex()); FastVector vals = new FastVector(target.numValues()); for (int i = 0; i < target.numValues(); i++) { vals.addElement(target.value(i)); } atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals)); } Instances result = new Instances("Shapelets" + inputFormat.relationName(), atts, inputFormat.numInstances()); if (inputFormat.classIndex() >= 0) { result.setClassIndex(result.numAttributes() - 1); } return result; } protected void inputCheck(Instances dataInst) throws IllegalArgumentException { if (numShapelets < 1) { throw new IllegalArgumentException("Number of shapelets initialised incorrectly - please select value of k (Usage: setNumberOfShapelets"); } int maxPossibleLength; maxPossibleLength = dataInst.instance(0).numAttributes(); if (dataInst.classIndex() >= 0) { maxPossibleLength -= 1; } if (minShapeletLength < 1 || maxShapeletLength < 1 || maxShapeletLength < minShapeletLength || maxShapeletLength > maxPossibleLength) { throw new IllegalArgumentException("Shapelet length parameters initialised incorrectly"); } } /** * The main logic of the filter; when called for the first time, k shapelets * are extracted from the input Instances 'data'. The input 'data' is * transformed by the k shapelets, and the filtered data is returned as an * output. * <p> * If called multiple times, shapelet extraction DOES NOT take place again; * once k shapelets are established from the initial call to process(), the * k shapelets are used to transform subsequent Instances. * <p> * Intended use: * <p> * 1. Extract k shapelets from raw training data to build filter; * <p> * 2. Use the filter to transform the raw training data into transformed * training data; * <p> * 3. Use the filter to transform the raw testing data into transformed * testing data (e.g. filter never extracts shapelets from training data, * therefore avoiding bias); * <p> * 4. Build a classifier using transformed training data, perform * classification on transformed test data. * * @param data the input data to be transformed (and to find the shapelets * if this is the first run) * @return the transformed representation of data, according to the * distances from each instance to each of the k shapelets */ @Override public Instances process(Instances data) throws IllegalArgumentException { //check the input data is correct and assess whether the filter has been setup correctly. inputCheck(data); //instantiate the caching array here, so it gets refreshed if we're using a test set. int dataSize = data.numInstances(); if(cacheDoubleArrays) cachedDoubleArray = new double[dataSize][]; //checks if the shapelets haven't been found yet, finds them if it needs too. if (!shapeletsTrained) trainShapelets(data); //build the transformed dataset with the shapelets we've found either on this data, or the previous training data return buildTansformedDataset(data); } protected void trainShapelets(Instances data) { int dataSize = data.numInstances(); // shapelets discovery has not yet been caried out, so this must be training data dataSourceIDs = new int[dataSize]; if (roundRobin) { //Reorder the data in round robin order data = roundRobinData(data, dataSourceIDs); } else { for (int i = 0; i < dataSize; i++) { dataSourceIDs[i] = i; } } shapelets = findBestKShapeletsCache(data); // get k shapelets shapeletsTrained = true; outputPrint(shapelets.size() + " Shapelets have been generated"); //Reorder the training data and reset the shapelet indexes if (roundRobin) { resetDataOrder(data, dataSourceIDs); resetShapeletIndices(shapelets, dataSourceIDs); } } protected Instances buildTansformedDataset(Instances data) { Instances output = determineOutputFormat(data); int dataSize = data.numInstances(); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < dataSize; i++) { // for each data Instance toAdd = new DenseInstance(shapelets.size() + 1); int shapeletNum = 0; for (Shapelet s : shapelets) { double dist = subsequenceDistance(s.content, getToDoubleArrayOfInstance(data, i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; } /** * protected method for extracting k shapelets. * * @param data the data that the shapelets will be taken from * @return an ArrayList of FullShapeletTransform objects in order of their * fitness (by infoGain, seperationGap then shortest length) */ public ArrayList<Shapelet> findBestKShapeletsCache(Instances data) { ArrayList<Shapelet> kShapelets = new ArrayList<>(); ArrayList<Shapelet> seriesShapelets; // temp store of all shapelets for each time series classDistributions = getClassDistributions(data); // used to calc info gain //for all time series outputPrint("Processing data: "); int dataSize = data.numInstances(); //for all possible time series. for (int i = 0; i < dataSize; i++) { outputPrint("data : " + i); double[] wholeCandidate = getToDoubleArrayOfInstance(data, i); seriesShapelets = findShapeletCandidates(data, i, wholeCandidate, kShapelets); //START TEST Comparator comp = useSeparationGap ? new Shapelet.ReverseSeparationGap(): new Shapelet.ReverseOrder(); Collections.sort(seriesShapelets,comp); //END TEST seriesShapelets = removeSelfSimilar(seriesShapelets); //kShapelets = sortedCombine(numShapelets, kShapelets, seriesShapelets); //test kShapelets = combine(numShapelets, kShapelets, seriesShapelets); } this.numShapelets = kShapelets.size(); recordShapelets(kShapelets); printShapelets(kShapelets); return kShapelets; } protected ArrayList<Shapelet> findShapeletCandidates(Instances data, int i, double[] wholeCandidate, ArrayList<Shapelet> kShapelets) { //get our time series as a double array. ArrayList<Shapelet> seriesShapelets = new ArrayList<>(); //for all possible lengths for (int length = minShapeletLength; length <= maxShapeletLength; length++) { double[] candidate = new double[length]; //for all possible starting positions of that length for (int start = 0; start <= wholeCandidate.length - length - 1; start++) { //-1 = avoid classVal - handle later for series with no class val // CANDIDATE ESTABLISHED - got original series, length and starting position // extract relevant part into a double[] for processing System.arraycopy(wholeCandidate, start, candidate, 0, length); // znorm candidate here so it's only done once, rather than in each distance calculation candidate = zNorm(candidate, false); //Initialize bounding algorithm for current candidate QualityBound.ShapeletQualityBound qualityBound = initializeQualityBound(classDistributions); //Set bound of the bounding algorithm if (qualityBound != null && kShapelets.size() == numShapelets) { qualityBound.setBsfQuality(kShapelets.get(numShapelets - 1).qualityValue); } //compare the shapelet candidate to the other time series. Shapelet candidateShapelet = checkCandidate(candidate, data, i, start, qualityBound); if(candidateShapelet != null) seriesShapelets.add(candidateShapelet); } } return seriesShapelets; } protected void recordShapelets(ArrayList<Shapelet> kShapelets) { if (this.recordShapelets) { try { //just in case the file doesn't exist or the directories. File file = new File(this.ouputFileLocation); file.getParentFile().mkdirs(); FileWriter out = new FileWriter(file); for (Shapelet kShapelet : kShapelets) { out.append(kShapelet.qualityValue + "," + kShapelet.seriesId + "," + kShapelet.startPos + "\n"); double[] shapeletContent = kShapelet.content; for (int j = 0; j < shapeletContent.length; j++) { out.append(shapeletContent[j] + ","); } out.append("\n"); } out.close(); } catch(IOException ex) { System.out.println("IOException: " + ex); } } } protected void printShapelets(ArrayList<Shapelet> kShapelets) { if (!supressOutput) { System.out.println(); System.out.println("Output Shapelets:"); System.out.println("-------------------"); System.out.println("informationGain,seriesId,startPos"); System.out.println("<shapelet>"); System.out.println("-------------------"); System.out.println(); for (Shapelet kShapelet : kShapelets) { System.out.println(kShapelet.qualityValue + "," + kShapelet.seriesId + "," + kShapelet.startPos); double[] shapeletContent = kShapelet.content; for (int j = 0; j < shapeletContent.length; j++) { System.out.print(shapeletContent[j] + ","); } System.out.println(); } } } /** * Private method to combine two ArrayList collections of FullShapeletTransform objects. * * @param k the maximum number of shapelets to be returned after combining the two lists * @param kBestSoFar the (up to) k best shapelets that have been observed so far, passed in to combine with shapelets from a new series * @param timeSeriesShapelets the shapelets taken from a new series that are to be merged in descending order of fitness with the kBestSoFar * @return an ordered ArrayList of the best k (or less) FullShapeletTransform objects from the union of the input ArrayLists */ //NOTE: could be more efficient here @Override protected ArrayList<Shapelet> combine(int k, ArrayList<Shapelet> kBestSoFar, ArrayList<Shapelet> timeSeriesShapelets){ ArrayList<Shapelet> newBestSoFar = new ArrayList<>(); kBestSoFar.addAll(timeSeriesShapelets); Comparator comp = useSeparationGap ? new Shapelet.ReverseSeparationGap(): new Shapelet.ReverseOrder(); Collections.sort(kBestSoFar,comp); if(kBestSoFar.size()<k) { // no need to return up to k, as there are not k shapelets yet return kBestSoFar; } for(int i = 0; i < k; i++){ newBestSoFar.add(kBestSoFar.get(i)); } return newBestSoFar; } //this is the caching system. protected double[] getToDoubleArrayOfInstance(Instances data, int pos) { if(!cacheDoubleArrays) return data.get(pos).toDoubleArray(); if(cachedDoubleArray[pos] == null) cachedDoubleArray[pos] = data.get(pos).toDoubleArray(); return cachedDoubleArray[pos]; } /** * protected method to remove self-similar shapelets from an ArrayList (i.e. * if they come from the same series and have overlapping indicies) * * @param shapelets the input Shapelets to remove self similar * FullShapeletTransform objects from * @return a copy of the input ArrayList with self-similar shapelets removed */ protected static ArrayList<Shapelet> removeSelfSimilar(ArrayList<Shapelet> shapelets) { // return a new pruned array list - more efficient than removing // self-similar entries on the fly and constantly reindexing ArrayList<Shapelet> outputShapelets = new ArrayList<>(); int size = shapelets.size(); boolean[] selfSimilar = new boolean[size]; for (int i = 0; i < size; i++) { if (selfSimilar[i]) continue; outputShapelets.add(shapelets.get(i)); for (int j = i + 1; j < size; j++) { // no point recalc'ing if already self similar to something if ((!selfSimilar[j]) && selfSimilarity(shapelets.get(i), shapelets.get(j))) selfSimilar[j] = true; } } return outputShapelets; } /** * Private method to calculate the class distributions of a dataset. Main * purpose is for computing shapelet qualities. * * @param data the input data set that the class distributions are to be * derived from * @return a TreeMap<Double, Integer> in the form of <Class Value, * Frequency> */ public static TreeMap<Double, Integer> getClassDistributions(Instances data) { TreeMap<Double, Integer> classDistribution = new TreeMap<>(); ListIterator<Instance> it = data.listIterator(); double classValue; while (it.hasNext()) { classValue = it.next().classValue(); Integer val = classDistribution.get(classValue); val = (val != null) ? val+1 : 1; classDistribution.put(classValue, val); } return classDistribution; } /** * protected method to check a candidate shapelet. Functions by passing in * the raw data, and returning an assessed Shapelet object. * * @param candidate the data from the candidate FullShapeletTransform * @param data the entire data set to compare the candidate to * @param seriesId series id from the dataset that the candidate came from * @param startPos start position in the series where the candidate came * from * @param qualityBound * @return a fully-computed FullShapeletTransform, including the quality of * this candidate */ protected Shapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos, QualityBound.ShapeletQualityBound qualityBound) { // create orderline by looping through data set and calculating the subsequence // distance from candidate to all data, inserting in order. ArrayList<OrderLineObj> orderline = new ArrayList<>(); boolean pruned = false; int dataSize = data.numInstances(); for (int i = 0; i < dataSize; i++) { //Check if it is possible to prune the candidate if (qualityBound != null && qualityBound.pruneCandidate()) { pruned = true; break; } double distance = 0.0; //don't compare the shapelet to the the time series it came from. if (i != seriesId) { distance = subsequenceDistance(candidate, getToDoubleArrayOfInstance(data, i)); } double classVal = data.instance(i).classValue(); // without early abandon, it is faster to just add and sort at the end orderline.add(new OrderLineObj(distance, classVal)); //Update qualityBound - presumably each bounding method for different quality measures will have a different update procedure. if (qualityBound != null) { qualityBound.updateOrderLine(orderline.get(orderline.size() - 1)); } } // note: early abandon entropy pruning would appear here, but has been ommitted // in favour of a clear multi-class information gain calculation. Could be added in // this method in the future for speed up, but distance early abandon is more important // If shapelet is pruned then it should no longer be considered in further processing if (!pruned) { // create a shapelet object to store all necessary info, i.e. Shapelet shapelet = new Shapelet(candidate, dataSourceIDs[seriesId], startPos, this.qualityMeasure); shapelet.calculateQuality(orderline, classDistributions); return shapelet; } return null; } public static double[] getInfoGain(Instances trans) { double[] quals = new double[trans.numAttributes() - 1]; TreeMap map = getClassDistributions(trans); for (int i = 0; i < quals.length; i++) { ArrayList<OrderLineObj> orderline = new ArrayList<>(); double[] dists = trans.attributeToDoubleArray(i); for (int j = 0; j < dists.length; j++) { double distance = dists[j]; double classVal = trans.instance(j).classValue(); orderline.add(new OrderLineObj(distance, classVal)); } QualityMeasures.InformationGain ig = new QualityMeasures.InformationGain(); double qual = ig.calculateQuality(orderline, map); quals[i] = qual; } return quals; } /** * Calculate the distance between a candidate series and an Instance object * * @param candidate a double[] representation of a shapelet candidate * @param timeSeriesIns an Instance object of a whole time series * @return the distance between a candidate and a time series */ @Override protected double subseqDistance(double[] candidate, Instance timeSeriesIns) { return subsequenceDistance(candidate, timeSeriesIns.toDoubleArray()); } /** * Calculate the distance between a shapelet candidate and a full time * series (both double[]). * * @param candidate a double[] representation of a shapelet candidate * @param timeSeries a double[] representation of a whole time series (inc. * class value) * @return the distance between a candidate and a time series */ public static double subsequenceDistance(double[] candidate, double[] timeSeries) { double bestSum = Double.MAX_VALUE; double sum; double[] subseq; double temp; // for all possible subsequences of two for (int i = 0; i < timeSeries.length - candidate.length; i++) { sum = 0; // get subsequence of two that is the same lengh as one subseq = new double[candidate.length]; System.arraycopy(timeSeries, i, subseq, 0, candidate.length); subseqDistOpCount+=candidate.length; subseq = zNormalise(subseq, false); // Z-NORM HERE //Keep count of fundamental ops for experiment subseqDistOpCount += 3 * subseq.length; for (int j = 0; j < candidate.length; j++) { temp = (candidate[j] - subseq[j]); sum += temp * temp; } subseqDistOpCount+=candidate.length; if (sum < bestSum) { bestSum = sum; } } return (bestSum == 0.0) ? 0.0 : (1.0 / candidate.length * bestSum); } /** * * @param input * @param classValOn * @return */ @Override protected double[] zNorm(double[] input, boolean classValOn) { return FullShapeletTransform2.zNormalise(input, classValOn); } /** * Z-Normalise a time series * * @param input the input time series to be z-normalised * @param classValOn specify whether the time series includes a class value * (e.g. an full instance might, a candidate shapelet wouldn't) * @return a z-normalised version of input */ public static double[] zNormalise(double[] input, boolean classValOn) { double mean; double stdv; int classValPenalty = classValOn ? 1 : 0; int inputLength = input.length - classValPenalty; double[] output = new double[input.length]; double seriesTotal = 0; for (int i = 0; i < inputLength; i++) { seriesTotal += input[i]; } mean = seriesTotal / (double) inputLength; stdv = 0; double temp; for (int i = 0; i < inputLength; i++) { temp = (input[i] - mean); stdv += temp * temp; } stdv /= (double) inputLength; // if the variance is less than the error correction, just set it to 0, else calc stdv. stdv = (stdv < ROUNDING_ERROR_CORRECTION) ? 0.0 : Math.sqrt(stdv); for (int i = 0; i < inputLength; i++) { //if the stdv is 0 then set to 0, else normalise. output[i] = (stdv == 0.0) ? 0.0 : ((input[i] - mean) / stdv); } if (classValOn) { output[output.length - 1] = input[input.length - 1]; } return output; } /** * Load a set of Instances from an ARFF * * @param fileName the file name of the ARFF * @return a set of Instances from the ARFF */ public static Instances loadData(String fileName) { Instances data = null; try { FileReader r; r = new FileReader(fileName); data = new Instances(r); data.setClassIndex(data.numAttributes() - 1); } catch (IOException e) { System.out.println(" Error =" + e + " in method loadData"); } return data; } /** * A private method to assess the self similarity of two * FullShapeletTransform objects (i.e. whether they have overlapping * indicies and are taken from the same time series). * * @param shapelet the first FullShapeletTransform object (in practice, this * will be the dominant shapelet with quality >= candidate) * @param candidate the second FullShapeletTransform * @return */ private static boolean selfSimilarity(Shapelet shapelet, Shapelet candidate) { if (candidate.seriesId == shapelet.seriesId) { if (candidate.startPos >= shapelet.startPos && candidate.startPos < shapelet.startPos + shapelet.content.length) { //candidate starts within exisiting shapelet return true; } if (shapelet.startPos >= candidate.startPos && shapelet.startPos < candidate.startPos + candidate.content.length) { return true; } } return false; } /** * A method to read in a FullShapeletTransform log file to reproduce a * FullShapeletTransform * <p> * NOTE: assumes shapelets from log are Z-NORMALISED * * @param fileName the name and path of the log file * @return a duplicate FullShapeletTransform to the object that created the * original log file * @throws Exception */ public static FullShapeletTransform createFilterFromFile(String fileName) throws Exception { return createFilterFromFile(fileName, Integer.MAX_VALUE); } /** * Returns a list of the lengths of the shapelets found by this transform. * * @return An ArrayList of Integers representing the lengths of the * shapelets. */ @Override public ArrayList<Integer> getShapeletLengths() { ArrayList<Integer> shapeletLengths = new ArrayList<>(); if (this.shapeletsTrained) { for (Shapelet s : this.shapelets) { shapeletLengths.add(s.content.length); } } return shapeletLengths; } /** * A method to read in a FullShapeletTransform log file to reproduce a * FullShapeletTransform, * <p> * NOTE: assumes shapelets from log are Z-NORMALISED * * @param fileName the name and path of the log file * @param maxShapelets * @return a duplicate FullShapeletTransform to the object that created the * original log file * @throws Exception */ public static FullShapeletTransform createFilterFromFile(String fileName, int maxShapelets) throws Exception { File input = new File(fileName); Scanner scan = new Scanner(input); scan.useDelimiter("\n"); FullShapeletTransform sf = new FullShapeletTransform(); ArrayList<Shapelet> shapelets = new ArrayList<>(); String shapeletContentString; String shapeletStatsString; ArrayList<Double> content; double[] contentArray; Scanner lineScan; Scanner statScan; double qualVal; int serID; int starPos; int shapeletCount = 0; while (shapeletCount < maxShapelets && scan.hasNext()) { shapeletStatsString = scan.next(); shapeletContentString = scan.next(); //Get the shapelet stats statScan = new Scanner(shapeletStatsString); statScan.useDelimiter(","); qualVal = Double.parseDouble(statScan.next().trim()); serID = Integer.parseInt(statScan.next().trim()); starPos = Integer.parseInt(statScan.next().trim()); //End of shapelet stats lineScan = new Scanner(shapeletContentString); // System.out.println(shapeletContentString); lineScan.useDelimiter(","); content = new ArrayList<>(); while (lineScan.hasNext()) { String next = lineScan.next().trim(); if (!next.isEmpty()) { content.add(Double.parseDouble(next)); } } contentArray = new double[content.size()]; for (int i = 0; i < content.size(); i++) { contentArray[i] = content.get(i); } contentArray = zNormalise(contentArray, false); Shapelet s = new Shapelet(contentArray, qualVal, serID, starPos); shapelets.add(s); shapeletCount++; } sf.shapelets = shapelets; sf.shapeletsTrained = true; sf.numShapelets = shapelets.size(); sf.setShapeletMinAndMax(1, 1); return sf; } /** * Outputs the log file to the appropriate location. * * @throws Exception */ @Override public void outputLog() throws Exception { //just in case the file doesn't exist, or the directories. File file = new File(this.ouputFileLocation); file.getParentFile().mkdirs(); FileWriter out = new FileWriter(this.ouputFileLocation, file.exists()); for (Shapelet shapelet : this.shapelets) { out.append(shapelet.qualityValue + "," + shapelet.seriesId + "," + shapelet.startPos + "\n"); double[] shapeletContent = shapelet.content; for (int j = 0; j < shapeletContent.length; j++) { out.append(shapeletContent[j] + ","); } out.append("\n"); } out.close(); } /** * Method to reset shapelet indices into the values given in sourcePos * * @param shapelets * @param sourcePos Pointer to array of ints, where old positions of * instances are to be stored. */ public static void resetShapeletIndices(ArrayList<Shapelet> shapelets, int[] sourcePos) { for (Shapelet s : shapelets) { int pos = s.getSeriesId(); s.setSeriesID(sourcePos[pos]); } } /** * Method to reorder the given Instances into the order given in sourcePos * * @param data Instances to be reordered * @param sourcePos Pointer to array of ints, where old positions of * instances are to be stored. */ public static void resetDataOrder(Instances data, int[] sourcePos) { int dataSize = data.numInstances(); if (dataSize != sourcePos.length) {//ERROR System.out.println(" ERROR, cannot reorder, because the series are different lengths"); return; } Instance[] newOrder = new Instance[sourcePos.length]; for (int i = 0; i < sourcePos.length; i++) { newOrder[sourcePos[i]] = data.instance(i); } for (int i = 0; i < dataSize; i++) { data.set(i, newOrder[i]); } } /** * Method to reorder the given Instances in round robin order * * @param data Instances to be reordered * @param sourcePos Pointer to array of ints, where old positions of * instances are to be stored. * @return Instances in round robin order */ public static Instances roundRobinData(Instances data, int[] sourcePos) { //Count number of classes TreeMap<Double, ArrayList<Instance>> instancesByClass = new TreeMap<>(); TreeMap<Double, ArrayList<Integer>> positionsByClass = new TreeMap<>(); //Get class distributions TreeMap<Double, Integer> classDistribution = FullShapeletTransform2.getClassDistributions(data); //Allocate arrays for instances of every class for (Double key : classDistribution.keySet()) { int frequency = classDistribution.get(key); instancesByClass.put(key, new ArrayList<Instance>(frequency)); positionsByClass.put(key, new ArrayList<Integer>(frequency)); } int dataSize = data.numInstances(); //Split data according to their class memebership for (int i = 0; i < dataSize; i++) { Instance inst = data.instance(i); instancesByClass.get(inst.classValue()).add(inst); positionsByClass.get(inst.classValue()).add(i); } //Merge data into single list in round robin order Instances roundRobinData = new Instances(data, dataSize); for (int i = 0; i < dataSize;) { //Allocate arrays for instances of every class for (Double key : classDistribution.keySet()) { ArrayList<Instance> currentList = instancesByClass.get(key); ArrayList<Integer> currentPositions = positionsByClass.get(key); if (!currentList.isEmpty()) { roundRobinData.add(currentList.remove(currentList.size() - 1)); if (sourcePos != null && sourcePos.length == dataSize) { sourcePos[i] = currentPositions.remove(currentPositions.size() - 1); } i++; } } } return roundRobinData; } public void outputPrint(String val) { if(!this.supressOutput) System.out.println(val); } @Override public String toString() { String str = "Shapelets: "; for (Shapelet s : shapelets) { str += s.toString() + "\n"; } return str; } /** * An example use of a FullShapeletTransform * * @param args command line args. arg[0] should spcify a set of training * instances to transform */ public static void main(String[] args) { try { // mandatory requirements: numShapelets (k), min shapelet length, max shapelet length, input data // additional information: log output dir // example filter, k = 10, minLength = 20, maxLength = 40, data = , output = exampleOutput.txt int k = 10; int minLength = 10; int maxLength = 20; // Instances data= FullShapeletTransform2.loadData("ItalyPowerDemand_TRAIN.arff"); // for example Instances data = FullShapeletTransform2.loadData(args[0]); FullShapeletTransform sf = new FullShapeletTransform(k, minLength, maxLength); sf.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN); sf.setLogOutputFile("exampleOutput.txt"); // log file stores shapelet output // Note: sf.process returns a transformed set of Instances. The first time that // thisFilter.process(data) is called, shapelet extraction occurs. Subsequent calls to process // uses the previously extracted shapelets to transform the data. For example: // // Instances transformedTrain = sf.process(trainingData); -> extracts shapelets and can be used to transform training data // Instances transformedTest = sf.process(testData); -> uses shapelets extracted from trainingData to transform testData Instances transformed = sf.process(data); } catch (Exception e) { e.printStackTrace(); } } }