ShapeletTreeClassifier.java example

Explorer
TimeSeriesClassification-master
- TimeSeriesClassification
  - src
/*
     * copyright: Anthony Bagnall
 * */package weka.classifiers.trees.shapelet_trees;

import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.GregorianCalendar;
import java.util.TreeMap;
import java.util.TreeSet;
import weka.classifiers.AbstractClassifier;
import weka.classifiers.Classifier;
import weka.core.*;
import weka.filters.timeseries.shapelet_transforms.FullShapeletTransform;
//import java.io.File;
//import java.util.Scanner;


public class ShapeletTreeClassifier extends AbstractClassifier{


    private ShapeletNode root;
    private String logFileName;
    private int minLength, maxLength;

    public ShapeletTreeClassifier(String logFileName) throws Exception{
        this.root = new ShapeletNode();
        this.logFileName = logFileName;
        minLength = maxLength = 0;
        
        FileWriter fw = new FileWriter(logFileName);
        fw.close();
    }

    public void setShapeletMinMaxLength(int minLength, int maxLength){
        this.minLength = minLength;
        this.maxLength = maxLength;
    }
    
    @Override
    public void buildClassifier(Instances data) throws Exception{
        if(minLength < 1 || maxLength < 1){
            throw new Exception("Shapelet minimum or maximum length is incorrectly specified!");
        }
        
        root.initialiseNode(data, minLength, maxLength,0);
    }

    @Override
    public double classifyInstance(Instance instance) {
        return root.classifyInstance(instance);
    }

    private Shapelet getRootShapelet(){
        return this.root.shapelet;
    }
    
    private class ShapeletNode {

        private ShapeletNode leftNode;
        private ShapeletNode rightNode;
        private double classDecision;
        private Shapelet shapelet;

        public ShapeletNode() {
            leftNode = null;
            rightNode = null;
            classDecision = -1;
        }

        public void initialiseNode(Instances data, int minShapeletLength, int maxShapeletLength, int level) throws Exception{
            FileWriter fw = new FileWriter(logFileName,true);
            fw.append("level:"+level+", numInstances:"+data.numInstances()+"\n");
            fw.close();

                // 1. check whether this is a leaf node with only one class present
                double firstClassValue = data.instance(0).classValue();
                boolean oneClass = true;
                for(int i = 1; i < data.numInstances(); i++){
                        if(data.instance(i).classValue()!=firstClassValue){
                                oneClass = false;
                                break;
                        }
                }

                if(oneClass==true){
                        this.classDecision = firstClassValue; // no need to find shapelet, base case
//                                System.out.println("base case");
                        fw = new FileWriter(logFileName,true);
                        fw.append("class decision here: "+firstClassValue+ "\n");
                        fw.close();
                } else { // recursively call method to create left and right children nodes
                    try{
                        // 1. find the best shapelet to split the data
                       this.shapelet = findBestShapelet(1,data,minShapeletLength, maxShapeletLength); 

                        // 2. split the data using the shapelet and create new data sets
                        double dist;
//                                System.out.println("Threshold:"+shapelet.getThreshold());
//                                System.out.println("length:"+shapelet.getLength());
                        ArrayList<Instance> splitLeft = new ArrayList<Instance>();
                        ArrayList<Instance> splitRight = new ArrayList<Instance>();

                        for(int i = 0; i < data.numInstances(); i++){
                                dist = subsequenceDistance(this.shapelet.content, data.instance(i).toDoubleArray());
//                                System.out.println("dist:"+dist);
                                if(dist< shapelet.splitThreshold){
                                        splitLeft.add(data.instance(i));
//                                                System.out.println("gone left");
                                }else{
                                        splitRight.add(data.instance(i));
//                                                System.out.println("gone right");
                                }
                        }

                        // write to file here!!!!
                        fw = new FileWriter(logFileName,true);
                        fw.append("seriesId, startPos, length, infoGain, splitThresh\n");
                        fw.append(this.shapelet.seriesId+","+this.shapelet.startPos+","+this.shapelet.content.length+","+this.shapelet.informationGain+","+this.shapelet.splitThreshold+"\n");
                        for(int j = 0; j < this.shapelet.content.length; j++){
                            fw.append(this.shapelet.content[j]+",");
                        }
                        fw.append("\n");
                        fw.close();
                        
                        System.out.println("shapelet completed at:"+System.nanoTime());
                        

//                        System.out.println("leftSize:"+splitLeft.size());
//                        System.out.println("leftRight:"+splitRight.size());

                        // 5. initialise and recursively compute children nodes
                        leftNode = new ShapeletNode();
                        rightNode = new ShapeletNode();
//                                System.out.println("SplitLeft:");

                        Instances leftInstances = new Instances(data, splitLeft.size());
                        for(int i = 0; i < splitLeft.size(); i++){
                            leftInstances.add(splitLeft.get(i));
                        }
                        Instances rightInstances = new Instances(data, splitRight.size());
                        for(int i = 0; i < splitRight.size(); i++){
                            rightInstances.add(splitRight.get(i));
                        }

                        fw = new FileWriter(logFileName,true);
                        fw.append("left size under level "+level+": "+leftInstances.numInstances()+"\n");
                        fw.close();
                        leftNode.initialiseNode(leftInstances, minShapeletLength, maxShapeletLength, (level+1));
//                                System.out.println("SplitRight:");

                        fw = new FileWriter(logFileName,true);
                        fw.append("right size under level "+level+": "+rightInstances.numInstances()+"\n");
                        fw.close();

                        rightNode.initialiseNode(rightInstances, minShapeletLength, maxShapeletLength, (level+1));
                }catch(Exception e){
                    System.out.println("Problem initialising tree node: "+e);
                    e.printStackTrace();
                }
            }
        }

        public double classifyInstance(Instance instance){
            if (this.leftNode == null) {
                return this.classDecision;
            } else {
                double distance;
				distance = subsequenceDistance(this.shapelet.content, instance);

                if (distance < this.shapelet.splitThreshold) {
                    return leftNode.classifyInstance(instance);
                } else {
                    return rightNode.classifyInstance(instance);
                }
            }
        }

    }


    //#
    public double timingForSingleShapelet(Instances data, int minShapeletLength, int maxShapeletLength) {
        long startTime = System.nanoTime();
        this.findBestShapelet(1, data, minShapeletLength, maxShapeletLength);
        long finishTime = System.nanoTime();
        return (double)(finishTime - startTime) / 1000000000.0;
    }
    
    // edited from findBestKShapeletsCached
    private Shapelet findBestShapelet(int numShapelets, Instances data, int minShapeletLength, int maxShapeletLength){
        ArrayList<Shapelet> kShapelets = new ArrayList<Shapelet>();         // store (upto) the best k shapelets overall
        ArrayList<Shapelet> seriesShapelets = new ArrayList<Shapelet>();    // temp store of all shapelets for each time series

        Shapelet bestShapelet = null;
        
        
        TreeMap<Double, Integer> classDistributions = getClassDistributions(data); // used to calc info gain

        //for all time series
        System.out.println("Processing data: ");
        for(int i = 0; i < data.numInstances(); i++){
            System.out.println((1+i)+"/"+data.numInstances()+"\t Started: "+getTime());

            double[] wholeCandidate = data.instance(i).toDoubleArray();
            seriesShapelets = new ArrayList<Shapelet>();
            // for all lengths
            for(int length = minShapeletLength; length <= maxShapeletLength; length++){
                //for all possible starting positions of that length
                for(int start = 0; start <= wholeCandidate.length - length-1; start++){ //-1 = avoid classVal - handle later for series with no class val
                        // CANDIDATE ESTABLISHED - got original series, length and starting position
                        // extract relevant part into a double[] for processing
                        double[] candidate = new double[length];
                        for(int m = start; m < start + length; m++){
                            candidate[m - start] = wholeCandidate[m];
                        }

                        candidate = zNorm(candidate, false);
                        Shapelet candidateShapelet = checkCandidate(candidate, data, i, start, classDistributions);
                        
                        if(bestShapelet==null || candidateShapelet.compareTo(bestShapelet) < 0){
                            bestShapelet = candidateShapelet;
                        }
                        
                }
            }
        }

        //print out the k best shapes and then return
//        System.out.println("Shapelet No, Series ID, Start, Length, InfogGain, Gap,");
       
        return bestShapelet;
    }


    /**
     *
     * @param shapelets the input Shapelets to remove self similar Shapelet objects from
     * @return a copy of the input ArrayList with self-similar shapelets removed
     */
    private static ArrayList<Shapelet> removeSelfSimilar(ArrayList<Shapelet> shapelets){
        // return a new pruned array list - more efficient than removing
        // self-similar entries on the fly and constantly reindexing
        ArrayList<Shapelet> outputShapelets = new ArrayList<Shapelet>();
        boolean[] selfSimilar = new boolean[shapelets.size()];
        
        // to keep tract of self similarity - assume nothing is similar to begin with
        for(int i = 0; i < shapelets.size(); i++){
            selfSimilar[i] = false;
        }

        for(int i = 0; i < shapelets.size();i++){
            if(selfSimilar[i]==false){
                outputShapelets.add(shapelets.get(i));
                for(int j = i+1; j < shapelets.size(); j++){
                    if(selfSimilar[j]==false && selfSimilarity(shapelets.get(i),shapelets.get(j))){ // no point recalc'ing if already self similar to something
                        selfSimilar[j] = true;
                    }
                }
            }
        }
        return outputShapelets;
    }


    /**
     *
     * @param k the maximum number of shapelets to be returned after combining the two lists
     * @param kBestSoFar the (up to) k best shapelets that have been observed so far, passed in to combine with shapelets from a new series
     * @param timeSeriesShapelets the shapelets taken from a new series that are to be merged in descending order of fitness with the kBestSoFar
     * @return an ordered ArrayList of the best k (or less) Shapelet objects from the union of the input ArrayLists
     */
    private ArrayList<Shapelet> combine(int k, ArrayList<Shapelet> kBestSoFar, ArrayList<Shapelet> timeSeriesShapelets){

        ArrayList<Shapelet> newBestSoFar = new ArrayList<Shapelet>();
        for(int i = 0; i < timeSeriesShapelets.size();i++){
            kBestSoFar.add(timeSeriesShapelets.get(i));
        }
        Collections.sort(kBestSoFar);
        if(kBestSoFar.size()<k)
            return kBestSoFar; // no need to return up to k, as there are not k shapelets yet

        for(int i = 0; i < k; i++){
            newBestSoFar.add(kBestSoFar.get(i));
        }

        return newBestSoFar;
    }

    /**
     *
     * @param data the input data set that the class distributions are to be derived from
     * @return a TreeMap<Double, Integer> in the form of <Class Value, Frequency>
     */
    private static TreeMap<Double, Integer> getClassDistributions(Instances data){
        TreeMap<Double, Integer> classDistribution = new TreeMap<Double, Integer>();
        double classValue;
        for(int i = 0; i < data.numInstances(); i++){
            classValue = data.instance(i).classValue();
            boolean classExists = false;
            for(Double d : classDistribution.keySet()){
                if(d == classValue){
                    int temp = classDistribution.get(d);
                    temp++;
                    classDistribution.put(classValue, temp);
                    classExists = true;
                }
            }
            if(classExists == false){
                classDistribution.put(classValue, 1);
            }
        }
        return classDistribution;
    }

    /**
     *
     * @param candidate the data from the candidate Shapelet
     * @param data the entire data set to compare the candidate to
     * @param data the entire data set to compare the candidate to
     * @return a TreeMap<Double, Integer> in the form of <Class Value, Frequency>
     */
    private static Shapelet checkCandidate(double[] candidate, Instances data, int seriesId, int startPos, TreeMap classDistribution){

        // create orderline by looping through data set and calculating the subsequence
        // distance from candidate to all data, inserting in order.
        ArrayList<OrderLineObj> orderline = new ArrayList<OrderLineObj>();

        for(int i = 0; i < data.numInstances(); i++){
            double distance = subsequenceDistance(candidate, data.instance(i));
            double classVal = data.instance(i).classValue();

            boolean added = false;
            // add to orderline
            if(orderline.isEmpty()){
                orderline.add(new OrderLineObj(distance, classVal));
                added = true;
            } else{
                for(int j = 0; j < orderline.size(); j++){
                    if(added == false && orderline.get(j).distance > distance){
                        orderline.add(j, new OrderLineObj(distance, classVal));
                        added = true;
                    }
                }
            }
            // if obj hasn't been added, must be furthest so add at end
            if(added == false){
                orderline.add(new OrderLineObj(distance, classVal));
            }
        }
        // create a shapelet object to store all necessary info, i.e.
        // content, seriesId, then calc info gain, plit threshold and separation gap
        Shapelet shapelet = new Shapelet(candidate, seriesId, startPos);
        shapelet.calcInfoGainAndThreshold(orderline, classDistribution);

        // note: early abandon entropy pruning would appear here, but has been ommitted
        // in favour of a clear multi-class information gain calculation. Could be added in
        // this method in the future for speed up, but distance early abandon is more important

        return shapelet;
    }

    private static double entropy(TreeMap<Double, Integer> classDistributions){
        if(classDistributions.size() == 1){
            return 0;
        }

        double thisPart;
        double toAdd;
        int total = 0;
        for(Double d : classDistributions.keySet()){
            total += classDistributions.get(d);
        }
        // to avoid NaN calculations, the individual parts of the entropy are calculated and summed.
        // i.e. if there is 0 of a class, then that part would calculate as NaN, but this can be caught and
        // set to 0.
        ArrayList<Double> entropyParts = new ArrayList<Double>();
        for(Double d : classDistributions.keySet()){
            thisPart =(double) classDistributions.get(d) / total;
            toAdd = -thisPart * Math.log10(thisPart) / Math.log10(2);
            if(Double.isNaN(toAdd))
                toAdd=0;
            entropyParts.add(toAdd);
        }

        double entropy = 0;
        for(int i = 0; i < entropyParts.size(); i++){
            entropy += entropyParts.get(i);
        }
        return entropy;
    }


    /**
     *
     * @param candidate
     * @param timeSeriesIns
     * @return
     */
    public static double subsequenceDistance(double[] candidate, Instance timeSeriesIns){
        double[] timeSeries = timeSeriesIns.toDoubleArray();
        return subsequenceDistance(candidate, timeSeries);
    }
    public static double subsequenceDistance(double[] candidate, double[] timeSeries){

//        double[] timeSeries = timeSeriesIns.toDoubleArray();
        double bestSum = Double.MAX_VALUE;
        double sum = 0;
        double[] subseq;

        // for all possible subsequences of two
        for(int i = 0; i <= timeSeries.length - candidate.length - 1; i++){
            sum = 0;
            // get subsequence of two that is the same lenght as one
            subseq = new double[candidate.length];

            for(int j = i; j < i + candidate.length; j++){
                subseq[j - i] = timeSeries[j];
            }
            subseq = zNorm(subseq, false); // Z-NORM HERE
            for(int j = 0; j < candidate.length; j++){
                sum +=(candidate[j] - subseq[j]) *(candidate[j] - subseq[j]);
            }
            if(sum < bestSum){
                bestSum = sum;
            }
        }
        return(1.0 / candidate.length * bestSum);
    }

    // exactly the same as above, but switches shortest to candidate within method
    public static double subsequenceDistanceSwitch(double[] candidate, double[] timeSeries){

        if(timeSeries.length > candidate.length){
            double[] temp = candidate;
            candidate = timeSeries;
            timeSeries = temp;
        }

//        double[] timeSeries = timeSeriesIns.toDoubleArray();
        double bestSum = Double.MAX_VALUE;
        double sum = 0;
        double[] subseq;

        // for all possible subsequences of two
        for(int i = 0; i <= timeSeries.length - candidate.length - 1; i++){
            sum = 0;
            // get subsequence of two that is the same lenght as one
            subseq = new double[candidate.length];

            for(int j = i; j < i + candidate.length; j++){
                subseq[j - i] = timeSeries[j];
            }
            subseq = zNorm(subseq, false); // Z-NORM HERE
            for(int j = 0; j < candidate.length; j++){
                sum +=(candidate[j] - subseq[j]) *(candidate[j] - subseq[j]);
            }
            if(sum < bestSum){
                bestSum = sum;
            }
        }
        return(1.0 / candidate.length * bestSum);
    }

    /**
     *
     * @param input
     * @param classValOn
     * @return
     */
    public static double[] zNorm(double[] input, boolean classValOn){
        double mean;
        double stdv;

        double classValPenalty = 0;
        if(classValOn){
            classValPenalty = 1;
        }
        double[] output = new double[input.length];
        double seriesTotal = 0;

        for(int i = 0; i < input.length - classValPenalty; i++){
            seriesTotal += input[i];
        }

        mean = seriesTotal /(input.length - classValPenalty);
        stdv = 0;
        for(int i = 0; i < input.length - classValPenalty; i++){
            stdv +=(input[i] - mean) *(input[i] - mean);
        }

        stdv = stdv / input.length - classValPenalty;
        stdv = Math.sqrt(stdv);

        for(int i = 0; i < input.length - classValPenalty; i++){
            output[i] =(input[i] - mean) / stdv;
        }

        if(classValOn == true){
            output[output.length - 1] = input[input.length - 1];
        }

        return output;
    }

    /**
     *
     * @param fileName
     * @return
     */
    public static Instances loadData(String fileName){
        Instances data = null;
        try{
            FileReader r;
            r = new FileReader(fileName);
            data = new Instances(r);

            data.setClassIndex(data.numAttributes() - 1);
        } catch(Exception e){
            System.out.println(" Error =" + e + " in method loadData");
        }
        return data;
    }

    private static boolean selfSimilarity(int seriesId, int startPos, int length, Shapelet[] selectedShapelets){
        boolean selfSimilarity = false;

        for(Shapelet shapelet : selectedShapelets){
            if(shapelet != null){
                if(seriesId == shapelet.seriesId){
                    if(startPos >= shapelet.startPos && startPos <= shapelet.startPos + shapelet.content.length) //candidate starts within exisiting shapelet
                   {
                        selfSimilarity = true;
                    }
                    if(shapelet.startPos >= startPos && shapelet.startPos <= startPos + length){
                        selfSimilarity = true;
                    }
                }
            }
        }
        return selfSimilarity;
    }


    private static boolean selfSimilarity(Shapelet candidate, TreeSet<Shapelet> setOfShapelets){
        boolean selfSimilarity = false;
        for(Shapelet shapelet : setOfShapelets){
            if(shapelet != null){
                if(candidate.seriesId == shapelet.seriesId){
                    if(candidate.startPos >= shapelet.startPos && candidate.startPos <= shapelet.startPos + shapelet.content.length) //candidate starts within exisiting shapelet
                   {
                        selfSimilarity = true;
                    }
                    if(shapelet.startPos >= candidate.startPos && shapelet.startPos <= candidate.startPos + candidate.content.length){
                        selfSimilarity = true;
                    }
                }
            }
        }
        return selfSimilarity;
    }


    private static boolean selfSimilarity(Shapelet shapelet, Shapelet candidate){
        if(candidate.seriesId == shapelet.seriesId){
            if(candidate.startPos >= shapelet.startPos && candidate.startPos < shapelet.startPos + shapelet.content.length){ //candidate starts within exisiting shapelet
                return true;
            }
            if(shapelet.startPos >= candidate.startPos && shapelet.startPos < candidate.startPos + candidate.content.length){
                return true;
            }
        }
        return false;
    }

    private static class Shapelet implements Comparable<Shapelet>{
        private double[] content;
        private int seriesId;
        private int startPos;
        private double splitThreshold;
        private double informationGain;
        private double separationGap;

        private Shapelet(double[] content, int seriesId, int startPos){
            this.content = content;
            this.seriesId = seriesId;
            this.startPos = startPos;
        }

        // TEMPORARY - for testing
        private Shapelet(double[] content, int seriesId, int startPos, double splitThreshold, double gain, double gap){
            this.content = content;
            this.seriesId = seriesId;
            this.startPos = startPos;
            this.splitThreshold = splitThreshold;
            this.informationGain = gain;
            this.separationGap = gap;
        }

        // TEMP - used when processing has been carried out in initial stage, then the shapelets read in via csv later
        private Shapelet(double[] content){
            this.content = content;
        }
        /*
         * note: we calculate the threshold as this is used for finding the best split point in the data
         *       however, as this implementation of shapelets is as a filter, we do not actually use the
         *       threshold in the transformation.
         */

        private void calcInfoGainAndThreshold(ArrayList<OrderLineObj> orderline, TreeMap<Double, Integer> classDistribution){
            // for each split point, starting between 0 and 1, ending between end-1 and end
            // addition: track the last threshold that was used, don't bother if it's the same as the last one
            double lastDist = orderline.get(0).distance; // must be initialised as not visited(no point breaking before any data!)
            double thisDist = -1;

            double bsfGain = -1;
            double threshold = -1;

            for(int i = 1; i < orderline.size(); i++){
                thisDist = orderline.get(i).distance;
                if(i==1 || thisDist != lastDist){ // check that threshold has moved(no point in sampling identical thresholds)- special case - if 0 and 1 are the same dist

                    // count class instances below and above threshold
                    TreeMap<Double, Integer> lessClasses = new TreeMap<Double, Integer>();
                    TreeMap<Double, Integer> greaterClasses = new TreeMap<Double, Integer>();

                    for(double j : classDistribution.keySet()){
                        lessClasses.put(j, 0);
                        greaterClasses.put(j, 0);
                    }

                    int sumOfLessClasses = 0;
                    int sumOfGreaterClasses = 0;

                    //visit those below threshold
                    for(int j = 0; j < i; j++){
                        double thisClassVal = orderline.get(j).classVal;
                        int storedTotal = lessClasses.get(thisClassVal);
                        storedTotal++;
                        lessClasses.put(thisClassVal, storedTotal);
                        sumOfLessClasses++;
                    }

                    //visit those above threshold
                    for(int j = i; j < orderline.size(); j++){
                        double thisClassVal = orderline.get(j).classVal;
                        int storedTotal = greaterClasses.get(thisClassVal);
                        storedTotal++;
                        greaterClasses.put(thisClassVal, storedTotal);
                        sumOfGreaterClasses++;
                    }

                    int sumOfAllClasses = sumOfLessClasses + sumOfGreaterClasses;

                    double parentEntropy = entropy(classDistribution);

                    // calculate the info gain below the threshold
                    double lessFrac =(double) sumOfLessClasses / sumOfAllClasses;
                    double entropyLess = entropy(lessClasses);
                    // calculate the info gain above the threshold
                    double greaterFrac =(double) sumOfGreaterClasses / sumOfAllClasses;
                    double entropyGreater = entropy(greaterClasses);

                    double gain = parentEntropy - lessFrac * entropyLess - greaterFrac * entropyGreater;

                    if(gain > bsfGain){
                        bsfGain = gain;
                        threshold =(thisDist - lastDist) / 2 + lastDist;
                    }
                }
                lastDist = thisDist;
            }
            if(bsfGain >= 0){
                this.informationGain = bsfGain;
                this.splitThreshold = threshold;
                this.separationGap = calculateSeparationGap(orderline, threshold);
            }
        }

        private double calculateSeparationGap(ArrayList<OrderLineObj> orderline, double distanceThreshold){

            double sumLeft = 0;
            double leftSize = 0;
            double sumRight = 0;
            double rightSize = 0;

            for(int i = 0; i < orderline.size(); i++){
                if(orderline.get(i).distance < distanceThreshold){
                    sumLeft += orderline.get(i).distance;
                    leftSize++;
                } else{
                    sumRight += orderline.get(i).distance;
                    rightSize++;
                }
            }

            double thisSeparationGap = 1 / rightSize * sumRight - 1 / leftSize * sumLeft; //!!!! they don't divide by 1 in orderLine::minGap(int j)

            if(rightSize == 0 || leftSize == 0){
                return -1; // obviously there was no seperation, which is likely to be very rare but i still caused it!
            }                //e.g if all data starts with 0, first shapelet length =1, there will be no seperation as all time series are same dist
            // equally true if all data contains the shapelet candidate, which is a more realistic example

            return thisSeparationGap;
        }


        public double getGain(){
            return this.informationGain;
        }

        public double getGap(){
            return this.separationGap;
        }

        public int getLength(){
            return this.content.length;
        }

        // comparison 1: to determine order of shapelets in terms of info gain, then separation gap, then shortness
        public int compareTo(Shapelet shapelet) {
            final int BEFORE = -1;
            final int EQUAL = 0;
            final int AFTER = 1;

            

            if(this.informationGain != shapelet.getGain()){
                if(this.informationGain > shapelet.getGain()){
                    return BEFORE;
                }else{
                    return AFTER;
                }
            } else{// if this.informationGain == shapelet.informationGain
                if(this.separationGap != shapelet.getGap()){
                    if(this.separationGap > shapelet.getGap()){
                        return BEFORE;
                    }else{
                        return AFTER;
                    }
                } else if(this.content.length != shapelet.getLength()){
                    if(this.content.length < shapelet.getLength()){
                        return BEFORE;
                    }else{
                        return AFTER;
                    }
                } else{
                    return EQUAL;
                }
            }

        }
    }

    private static class OrderLineObj{

        private double distance;
        private double classVal;

        private OrderLineObj(double distance, double classVal){
            this.distance = distance;
            this.classVal = classVal;
        }
    }

    public static String getTime(){
        Calendar calendar = new GregorianCalendar();
        return calendar.get(Calendar.DAY_OF_MONTH)+"/"+calendar.get(Calendar.MONTH)+"/"+calendar.get(Calendar.YEAR)+" - "+calendar.get(Calendar.HOUR_OF_DAY)+":"+calendar.get(Calendar.MINUTE)+":"+calendar.get(Calendar.SECOND)+" AM";
    }

   

//    // experimental method for clustering shapelets
//    public static void clusterTestShapelets()throws Exception{
//        ArrayList<double[]> inputShapeletArrays = Main.gunShapelets100();
//        staticShapelet(inputShapeletArrays);
//    }
    
    private static class ShapeletObj{
        private Shapelet shapelet;
        private int clusterId;
        private int shapeletId;
        
        private ShapeletObj(int shapeletId, Shapelet shapelet){
            this.shapeletId = shapeletId;
            this.shapelet = shapelet;
            this.clusterId = -1;
        }
    }

    private static class ShapeletPam{
        private ArrayList<ShapeletObj> shapelets;
        private int k;
        private int[] medoids;
        private ArrayList<ArrayList<ShapeletObj>> clusters;

        private ShapeletPam(ArrayList<Shapelet> inputShapelets, int k){
            this.shapelets = new ArrayList<ShapeletObj>();
            ArrayList<Shapelet> temp = inputShapelets;
            Collections.shuffle(temp);
            int shapeletId = 0;
            for(Shapelet s:temp){
                ShapeletObj shapeObj = new ShapeletObj(shapeletId++,s);
                this.shapelets.add(shapeObj);
            }
            this.k = k;
            this.medoids = new int[k];
        }

        private void clusterShapelets(int k){
            // randomise shapelets for easier intial medoid allocation
            Collections.shuffle(this.shapelets);

            // set first k to be the medoids
            this.medoids = new int[k];
            for(int i = 0; i < k; i++){
                this.medoids[i]=i;
                this.shapelets.get(i).clusterId=i;
            }
            Shapelet thisShapelet;
            Shapelet thisMedoid;
            // calc cluster membership for all shapelets
            for(int i = k; i < this.shapelets.size(); i++){
                int clusterId = -1;
                double minDist = Double.MAX_VALUE;;
                double dist;

                thisShapelet = this.shapelets.get(i).shapelet;
                for(int j = 0; j < k; j++){
                    thisMedoid = this.shapelets.get(j).shapelet;
                    if(thisShapelet.content.length < thisMedoid.content.length){
                        dist = subsequenceDistance(thisShapelet.content, thisMedoid.content);
                    } else{
                        dist = subsequenceDistance(thisMedoid.content, thisShapelet.content);
                    }

                    if(dist < minDist){
                        minDist = dist;
                        this.shapelets.get(i).clusterId=j;
                    }
                }
            }

            // intital cluster memberships now established. repetitive part begins
            // 1. look for best medoids
            // 2. if any medoids change, recompute membership.
            // 3. if no medoids change, end. else, go back to 1.

            boolean finished = false;

            while(!finished){
                boolean anyMedoidChanged = false;

                // for each cluster
                for(int clusterNum = 0; clusterNum < k; clusterNum++){
                    int currentMedoidId = medoids[clusterNum];
                    int bestFoundId = medoids[clusterNum];
                    double bestDistance = Double.MAX_VALUE;
                    // go through each instance, if it is in the cluster then assess as a medoid
                    for(int i = 0; i < this.shapelets.size(); i++){
                        if(this.shapelets.get(i).clusterId == clusterNum){
                            ShapeletObj candidateMedoid = this.shapelets.get(i);
                            double clusterDistance = 0;
                            // compare to every shapelet in the cluster
                            for(int j = 0; j < this.shapelets.size(); j++){
                                if(this.shapelets.get(i).clusterId == clusterNum && i!=j){
                                    Shapelet thisComparison = this.shapelets.get(j).shapelet;
                                    if(candidateMedoid.shapelet.content.length < thisComparison.content.length){
                                        clusterDistance+= subsequenceDistance(candidateMedoid.shapelet.content, thisComparison.content);
                                    } else{
                                        clusterDistance+= subsequenceDistance(thisComparison.content, candidateMedoid.shapelet.content);
                                    }

                                }
                            }
                            if(clusterDistance<bestDistance){
                                bestDistance = clusterDistance;
                                bestFoundId = i;
//                                System.out.println("best found");
                            }

//                            System.out.println("best found id:"+bestFoundId);
                        }
                    }
                    if(bestFoundId!=currentMedoidId){ // if a better medoid has been found
                        anyMedoidChanged = true;
                        medoids[clusterNum] = bestFoundId;
//                        System.out.println(bestFoundId+"<>"+currentMedoidId);
                    }
//                    System.out.println("best found id:"+bestFoundId);
//                    System.out.println("medoids size: "+medoids.length);
                }

//                for(int i = 0; i < medoids.length;i++){
//                    System.out.println("cluster "+(i+1)+": "+medoids[i]);
//                }
//                System.out.println("here"); finished=true;

                if(anyMedoidChanged==false){
                    finished = true; //can end the clustering now
                }else{ // else we need to recompute membership and then go through medoids again in next loop iteration
                    for(int i = 0; i < this.shapelets.size(); i++){
                        int clusterId = -1;
                        double minDist = Double.MAX_VALUE;;
                        double dist;

                        thisShapelet = this.shapelets.get(i).shapelet;
                        for(int j = 0; j < k; j++){
                            thisMedoid = this.shapelets.get(medoids[j]).shapelet;
                            if(thisShapelet.content.length < thisMedoid.content.length){
                                dist = subsequenceDistance(thisShapelet.content, thisMedoid.content);
                            } else{
                                dist = subsequenceDistance(thisMedoid.content, thisShapelet.content);
                            }

                            if(dist < minDist){
                                minDist = dist;
                                this.shapelets.get(i).clusterId=k;
                            }
                        }
                    }
                }
            }
            
            this.clusters = new ArrayList<ArrayList<ShapeletObj>>(); 
            for(int i = 0; i < k; i++){
                clusters.add(new ArrayList<ShapeletObj>());
                for(int j = 0; j < this.shapelets.size(); j++){
                    if(this.shapelets.get(j).clusterId==k){
                        clusters.get(i).add(this.shapelets.get(j));
                    }
                }
            }
            
        }

        // definition: p.2(88) eq.1 of http://dx.doi.org/10.1016/j.aca.2003.12.020
        private double calculateAvgSil() throws Exception{
            if(this.clusters == null){
                throw new Exception("Clusters not initialised yet");
            }
            double totalSilVal = 0;
            for(int i = 0; i < this.k; i++){ // for each cluster
                for(int j = 0; j < clusters.get(i).size();j++){ // for each ins in each cluster
                    double ownClusterDist = 0;
                    double bestOtherClusterDist = Double.MAX_VALUE;
                    double currentOtherClusterDist = 0;

                    double[] thisShapelet = clusters.get(i).get(j).shapelet.content;

                    // get own cluster distance
                    for(int m = 0; m < clusters.get(i).size(); m++){
                        if(m!=j){ //dist(this,this) == 0, so no point
                            if(thisShapelet.length < clusters.get(i).get(m).shapelet.content.length){
                                ownClusterDist += subsequenceDistance(thisShapelet, clusters.get(i).get(m).shapelet.content);
                            }else{
                                ownClusterDist += subsequenceDistance(clusters.get(i).get(m).shapelet.content, thisShapelet);
                            }
//                            ownClusterDist += subsequenceDistanceSwitch(thisShapelet, clusters.get(i).get(m).shapelet.content);
//                            System.out.println("ocd:"+ownClusterDist);
                        }
                    }
                    ownClusterDist/=clusters.get(i).size();

                    // now try find the closest average cluster dist where cluster != i
                    for(int cluster = 0; cluster < k; cluster++){
                        currentOtherClusterDist = 0;
                        for(int m = 0; m < clusters.get(cluster).size(); m++){
                            currentOtherClusterDist += subsequenceDistanceSwitch(thisShapelet, clusters.get(cluster).get(m).shapelet.content);
                        }
                        currentOtherClusterDist/=clusters.get(cluster).size();
                        if(bestOtherClusterDist > currentOtherClusterDist){
                            bestOtherClusterDist = currentOtherClusterDist;
                        }
                    }

                    // calculate the silhoette value for this point
                    double silVal = bestOtherClusterDist-ownClusterDist;
                    double div = ownClusterDist;
                    if(bestOtherClusterDist > ownClusterDist){
                        div = bestOtherClusterDist;
                    }
                    silVal/=div;
                    totalSilVal+=silVal;
                    
                }
                
            }
            totalSilVal/= this.shapelets.size();
            return totalSilVal;
        }

    }
    
    
    ///
    //created for testing - carried out shapelet discovery independently and now
     public static void staticShapelet(ArrayList<double[]> inputShapeletArrays) throws Exception{
        
        
        ArrayList<Shapelet> shapelets = new ArrayList<Shapelet>();

        for(int i = 0; i < inputShapeletArrays.size(); i++){
            Shapelet temp = new Shapelet(inputShapeletArrays.get(i));
            shapelets.add(temp);
        }

        for(int k = 1; k < 29; k++){
//        int k =3;
            FullShapeletTransform sf = new FullShapeletTransform();
            ShapeletPam sp = new ShapeletPam(shapelets, k);
            sp.clusterShapelets(k);
            double avgSil = sp.calculateAvgSil();
            System.out.println("K:"+k+": "+avgSil);
        }
    }






}