GrammarVizChartData.java example

Explorer
grammarviz2_src-master
- src
  - main
    - java
      - net
        seninp
        grammarviz
        GrammarSizeSorter.java
        GrammarVizAnomaly.java
        GrammarVizAnomalyParameters.java
        GrammarVizGUI.java
        anomaly
        AnomalyAlgorithm.java
        RRAImplementation.java
        cli
        RulesWriter.java
        TS2GrammarParameters.java
        TS2SequiturGrammar.java
        controller
        GrammarVizController.java
        logic
        CoverageCountStrategy.java
        GrammarVizAnomalyFinder.java
        GrammarVizChartData.java
        IntegerValueFilter.java
        PackedRuleRecord.java
        RulePeriodicityRecord.java
        SAXMotif.java
        SAXPointsNumber.java
        SameLengthMotifs.java
        model
        GrammarVizMessage.java
        GrammarVizModel.java
        session
        UserSession.java
        tinker
        OneEuroFilter.java
        view
        AboutGrammarVizDialog.java
        GrammarRulesPanel.java
        GrammarVizAnomaliesPanel.java
        GrammarVizView.java
        GrammarvizChartPanel.java
        GrammarvizGuesserDialog.java
        GrammarvizGuesserPane.java
        GrammarvizOptionsDialog.java
        GrammarvizOptionsPane.java
        GrammarvizParamsSampler.java
        GrammarvizRuleChartPanel.java
        MouseMarker.java
        PackedRulesPanel.java
        RulesPeriodicityPanel.java
        table
        AnomalyTableColumns.java
        AnomalyTableModel.java
        CellDoubleRenderer.java
        GrammarvizRulesTableColumns.java
        GrammarvizRulesTableDataModel.java
        GrammarvizRulesTableModel.java
        PeriodicityTableColumns.java
        PeriodicityTableModel.java
        PrunedRulesTableColumns.java
        PrunedRulesTableDataModel.java
        PrunedRulesTableModel.java
        tinker
        Interval.java
        MovieMaker.java
        MovieUtils.java
        PagePrinter.java
        PaperDiscordFinder.java
        PaperDiscordFinderHOTSAX.java
        PaperWindowSampler.java
        ParamSampler.java
        SamplerAnomaly.java
        SamplerAnomalyParameters.java
        SamplerRecord.java
        SamplingSorter.java
        util
        SAXFileIOHelper.java
  - test
    - java
      - net
        seninp
        grammarviz
        anomaly
        TestRRAanomaly.java
        tinker
        TestInterval.java
package net.seninp.grammarviz.logic;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Observable;
import java.util.Observer;
import java.util.Random;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;
import com.apporiented.algorithm.clustering.AverageLinkageStrategy;
import com.apporiented.algorithm.clustering.Cluster;
import com.apporiented.algorithm.clustering.ClusteringAlgorithm;
import com.apporiented.algorithm.clustering.DefaultClusteringAlgorithm;
import net.seninp.gi.logic.GrammarRuleRecord;
import net.seninp.gi.logic.GrammarRules;
import net.seninp.gi.logic.RuleInterval;
import net.seninp.gi.rulepruner.RulePrunerFactory;
import net.seninp.grammarviz.model.GrammarVizMessage;
import net.seninp.jmotif.sax.NumerosityReductionStrategy;
import net.seninp.jmotif.sax.discord.DiscordRecords;

/**
 * The main data structure used in SAXSequitur. It contains all the information needed for charting
 * and tables.
 * 
 * @author Manfred Lerner, seninp
 * 
 */
public class GrammarVizChartData extends Observable implements Observer {

  /** SAX conversion parameters. */
  protected final boolean slidingWindowOn;
  protected final NumerosityReductionStrategy numerosityReductionStrategy;
  protected final int saxWindowSize;
  protected final int saxAlphabetSize;
  protected final int saxPAASize;
  protected final double zNormThreshold;

  /** Original data file name. */
  @SuppressWarnings("unused")
  private final String inputFname;

  /** Original data which will be used for the chart. */
  protected final double[] originalTimeSeries;

  /** The whole timeseries as a string */
  private String saxDisplayString = null;

  /** The grammar rules. */
  private GrammarRules grammarRules;

  /** The discords. */
  protected DiscordRecords discords;

  /** Pruning related vars. */
  private SAXPointsNumber[] pointsNumberRemoveStrategy;
  private ArrayList<SameLengthMotifs> allClassifiedMotifs;
  private ArrayList<PackedRuleRecord> arrPackedRuleRecords;

  /**
   * Constructor.
   * 
   * @param dataFileName the original filename.
   * @param ts the time series.
   * @param useSlidingWindow
   * @param numerosityReductionStrategy
   * @param windowSize SAX window size.
   * @param alphabetSize SAX alphabet size.
   * @param paaSize SAX PAA size.
   * @param zNormThreshold the z-normalization threshold.
   */
  public GrammarVizChartData(String dataFileName, double[] ts, boolean useSlidingWindow,
      NumerosityReductionStrategy numerosityReductionStrategy, int windowSize, int paaSize,
      int alphabetSize, double zNormThreshold) {

    this.inputFname = dataFileName;

    this.slidingWindowOn = useSlidingWindow;
    this.numerosityReductionStrategy = numerosityReductionStrategy;

    this.originalTimeSeries = ts;

    this.saxWindowSize = windowSize;
    this.saxPAASize = paaSize;
    this.saxAlphabetSize = alphabetSize;
    this.zNormThreshold = zNormThreshold;
  }

  /**
   * Get the original, untransformed time series.
   * 
   * @return the original time series
   */
  public double[] getOriginalTimeseries() {
    return originalTimeSeries;
  }

  /**
   * Sets the grammar rules data.
   * 
   * @param rules the grammar rules collection.
   */
  public void setGrammarRules(GrammarRules rules) {
    this.grammarRules = rules;
  }

  /**
   * Get the grammar rules.
   * 
   * @return the grammar rules collection.
   */
  public GrammarRules getGrammarRules() {
    return this.grammarRules;
  }

  /**
   * @return SAX window size
   */
  public int getSAXWindowSize() {
    return saxWindowSize;
  }

  /**
   * @return SAX alphabet size
   */
  public int getSAXAlphabetSize() {
    return saxAlphabetSize;
  }

  /**
   * @return SAX PAA size
   */
  public int getSAXPaaSize() {
    return saxPAASize;
  }

  public boolean isSlidingWindowOn() {
    return this.slidingWindowOn;
  }

  public double getZNormThreshold() {
    return this.zNormThreshold;
  }

  /**
   * Get the collection of transformed rule records.
   * 
   * @return the collection of transformed rules.
   */
  public ArrayList<PackedRuleRecord> getArrPackedRuleRecords() {
    return arrPackedRuleRecords;
  }

  /**
   * Set the collection of transformed rule records.
   * 
   * @param arrPackedRuleRecords the collection of transformed rules.
   */
  public void setArrPackedRuleRecords(ArrayList<PackedRuleRecord> arrPackedRuleRecords) {
    this.arrPackedRuleRecords = arrPackedRuleRecords;
  }

  /**
   * Сonverts rules from a foreign alphabet to the internal original SAX alphabet.
   * 
   * @param rule the SAX rule in foreign SAX alphabet.
   * @return the SAX string in original alphabet, e.g. aabbdd.
   */
  public String convert2OriginalSAXAlphabet(char firstForeignAlphabetChar, String rule) {
    String textRule = rule;
    for (int i = 0; i < getSAXAlphabetSize(); i++) {
      char c1 = (char) (firstForeignAlphabetChar + i);
      char c2 = (char) ('a' + i);
      textRule = textRule.replace(c1, c2);
    }
    return textRule;
  }

  /**
   * @param SAXDisplay SAX display formatted string
   */
  public void setSAXDisplay(String SAXDisplay) {
    saxDisplayString = SAXDisplay;
  }

  /**
   * @return SAX display formatted string.
   */
  public String getSAXDisplay() {
    return saxDisplayString;
  }

  /**
   * Recovers start and stop coordinates ofRule's subsequences.
   * 
   * @param ruleIdx The rule index.
   * @return The array of all intervals corresponding to this rule.
   */
  public ArrayList<RuleInterval> getRulePositionsByRuleNum(Integer ruleIdx) {
    GrammarRuleRecord ruleRec = this.grammarRules.getRuleRecord(ruleIdx);
    return ruleRec.getRuleIntervals();
  }

  /**
   * Get the rule-corresponding subsequences from a class.
   * 
   * @param clsIdx the class index.
   * @return the class-associated subsequences.
   */
  public ArrayList<RuleInterval> getSubsequencesPositionsByClassNum(Integer clsIdx) {

    // this will be the result
    ArrayList<RuleInterval> positions = new ArrayList<RuleInterval>();

    // the sub-sequences class container
    SameLengthMotifs thisClass = allClassifiedMotifs.get(clsIdx);

    // Use minimal length to name the file.
    // String fileName = thisClass.getMinMotifLen() + ".txt";
    // The position of those sub-sequences in the original time series.
    // String positionFileName = thisClass.getMinMotifLen() + "Position" + ".txt";

    // String path = "Result" + System.getProperties().getProperty("file.separator") + "data"
    // + System.getProperties().getProperty("file.separator");

    double[] values = this.getOriginalTimeseries();

    XYSeriesCollection data = new XYSeriesCollection();

    for (SAXMotif subSequence : thisClass.getSameLenMotifs()) {
      positions.add(new RuleInterval(subSequence.getPos().startPos, subSequence.getPos().endPos));
    }

    int index = 0;
    for (RuleInterval pos : positions) {
      XYSeries dataset = new XYSeries("Daten" + String.valueOf(index));

      int start = pos.getStart();
      int end = pos.getEnd() - 1;

      int count = 0;
      for (int i = start; (i <= end) && (i < values.length); i++) {
        dataset.add(count++, values[i]);
      }
      data.addSeries(dataset);
      index++;
    }
    // SAXFileIOHelper.writeFileXYSeries(path, fileName, positionFileName, data, positions);

    return positions;
  }

  public int getRulesNumber() {
    return grammarRules.size();
  }

  // ********************************
  // Refactoring in Xing's code below
  // ********************************

  public GrammarRuleRecord getRule(Integer ruleIndex) {
    return this.grammarRules.get(ruleIndex);
  }

  /**
   * Performs greedy rule prunung, the grammar will be lost.
   */
  public void performRulePruning() {
    GrammarRules prunedRulesSet = RulePrunerFactory.performPruning(this.originalTimeSeries,
        this.grammarRules);
    this.grammarRules = prunedRulesSet;
  }

  /**
   * This computes anomalies.
   * 
   * @throws Exception
   */
  public void findAnomalies() throws Exception {
    GrammarVizAnomalyFinder finder = new GrammarVizAnomalyFinder(this);
    finder.addObserver(this);
    finder.run();
  }

  public DiscordRecords getAnomalies() {
    return this.discords;
  }

  /**
   * This method counts how many times each data point is used in ANY sequitur rule (i.e. data point
   * 1 appears only in R1 and R2, the number for data point 1 is two). The function will get the
   * occurrence time for all points, and write the result into a text file named as
   * "PointsNumber.txt".
   */
  protected void countPointNumber() {

    // init the data structure and copy the original values
    SAXPointsNumber pointsNumber[] = new SAXPointsNumber[this.originalTimeSeries.length];
    for (int i = 0; i < this.originalTimeSeries.length; i++) {
      pointsNumber[i] = new SAXPointsNumber();
      pointsNumber[i].setPointIndex(i);
      pointsNumber[i].setPointValue(this.originalTimeSeries[i]);
    }

    // get all the rules and populate the occurrence density
    int rulesNum = this.getRulesNumber();
    for (int i = 0; i < rulesNum; i++) {
      ArrayList<RuleInterval> arrPos = this.getRulePositionsByRuleNum(i);
      for (RuleInterval saxPos : arrPos) {
        int start = saxPos.getStart();
        int end = saxPos.getEnd() - 1;
        for (int position = start; position <= end; position++) {
          pointsNumber[position]
              .setPointOccurenceNumber(pointsNumber[position].getPointOccurenceNumber() + 1);
        }
      }
    }

    // make an output
    // String path = "Result" + System.getProperties().getProperty("file.separator");
    // String fileName = "PointsNumber.txt";
    // SAXFileIOHelper.deleteFile(path, fileName);
    // SAXFileIOHelper.writeFile(path, fileName, Arrays.toString(pointsNumber));

    this.pointsNumberRemoveStrategy = pointsNumber;
  }

  /**
   * This method counts how many times each data point is used in REDUCED sequitur rule (i.e. data
   * point 1 appears only in R1 and R2, the number for data point 1 is two). The function will get
   * the occurrence time for all points, and write the result into a text file named as
   * "PointsNumberAfterRemoving.txt".
   */
  protected void countPointNumberAfterRemoving() {

    // init the data structure and copy the original values
    SAXPointsNumber pointsNumber[] = new SAXPointsNumber[this.originalTimeSeries.length];
    for (int i = 0; i < this.originalTimeSeries.length; i++) {
      pointsNumber[i] = new SAXPointsNumber();
      pointsNumber[i].setPointIndex(i);
      pointsNumber[i].setPointValue(this.originalTimeSeries[i]);
    }

    for (SameLengthMotifs sameLenMotifs : this.getReducedMotifs()) {
      for (SAXMotif motif : sameLenMotifs.getSameLenMotifs()) {
        RuleInterval pos = motif.getPos();
        for (int i = pos.getStart(); i <= pos.getEnd() - 1; i++) {
          pointsNumber[i].setPointOccurenceNumber(pointsNumber[i].getPointOccurenceNumber() + 1);
          // pointsNumber[i].setRule(textRule);
        }
      }
    }

    // make an output
    // String path = "Result" + System.getProperties().getProperty("file.separator");
    // String fileName = "PointsNumberAfterRemoving.txt";
    // SAXFileIOHelper.deleteFile(path, fileName);
    // SAXFileIOHelper.writeFile(path, fileName, Arrays.toString(pointsNumber));

  }

  /**
   * Cleans-up the rules set by classifying the sub-sequences by length and removing the overlapping
   * in the same length range.
   * 
   * Sub-sequences with the length difference within threshold: "thresouldLength" will be classified
   * as a class with the function "classifyMotifs(double)", i.e. 1-100 and 101-205 will be
   * classified as a class when the threshold is 0.1, because the length difference is 5, which is
   * less than the threshold (0.1 * 100 = 10). If two sub-sequences within one class share a common
   * part which is more than the threshold: "thresouldCom", one of them will be removed by the
   * function "removeOverlappingInSimiliar(double)". i.e. 1-100 and 21-120.
   * 
   * @param intraThreshold, the threshold between the same motifs.
   * @param interThreshould, the threshold between the different motifs.
   */
  protected void removeOverlapping(double intraThreshold, double interThreshould) {

    classifyMotifs(intraThreshold);
    // ArrayList<SAXMotif> motifsBeDeleted = removeOverlappingInSimiliar(interThreshould);

    // String path = "Result" +
    // System.getProperties().getProperty("file.separator");
    // String fileName = "Deleted Motifs.txt";
    // SAXFileIOHelper.deleteFile(path, fileName);
    // SAXFileIOHelper.writeFile(path, fileName, motifsBeDeleted.toString());

  }

  /**
   * Classify the motifs based on their length.
   * 
   * It calls "getAllMotifs()" to get all the sub-sequences that were generated by Sequitur rules in
   * ascending order. Then bins all the sub-sequences by length based on the length of the first
   * sub-sequence in each class, that is, the shortest sub-sequence in each class.
   * 
   * @param lengthThreshold the motif length threshold.
   */
  protected void classifyMotifs(double lengthThreshold) {

    // reset vars
    allClassifiedMotifs = new ArrayList<SameLengthMotifs>();

    // down to business
    ArrayList<SAXMotif> allMotifs = getAllMotifs();

    // is this one better?
    int currentIndex = 0;
    for (SAXMotif tmpMotif : allMotifs) {

      currentIndex++;

      if (tmpMotif.isClassified()) {
        // this breaks the loop flow, so it goes to //for (SAXMotif tempMotif : allMotifs) {
        continue;
      }

      SameLengthMotifs tmpSameLengthMotifs = new SameLengthMotifs();
      int tmpMotifLen = tmpMotif.getPos().getEnd() - tmpMotif.getPos().getStart() + 1;
      int minLen = tmpMotifLen;
      int maxLen = tmpMotifLen;

      // TODO: assuming that this motif has not been processed, right?
      ArrayList<SAXMotif> newMotifClass = new ArrayList<SAXMotif>();
      newMotifClass.add(tmpMotif);
      tmpMotif.setClassified(true);

      // TODO: this motif assumed to be the first one of it's class, traverse the rest down
      for (int i = currentIndex; i < allMotifs.size(); i++) {

        SAXMotif anotherMotif = allMotifs.get(i);

        // if the two motifs are similar or not.
        int anotherMotifLen = anotherMotif.getPos().getEnd() - anotherMotif.getPos().getStart() + 1;

        // if they have the similar length.
        if (Math.abs(anotherMotifLen - tmpMotifLen) < (tmpMotifLen * lengthThreshold)) {
          newMotifClass.add(anotherMotif);
          anotherMotif.setClassified(true);
          if (anotherMotifLen > maxLen) {
            maxLen = anotherMotifLen;
          }
          else if (anotherMotifLen < minLen) {
            minLen = anotherMotifLen;
          }
        }
      }

      tmpSameLengthMotifs.setSameLenMotifs(newMotifClass);
      tmpSameLengthMotifs.setMinMotifLen(minLen);
      tmpSameLengthMotifs.setMaxMotifLen(maxLen);
      allClassifiedMotifs.add(tmpSameLengthMotifs);
    }
    // System.out.println();
  }

  protected ArrayList<SAXMotif> removeOverlappingInSimiliar(double thresouldCom) {

    ArrayList<SAXMotif> motifsBeDeleted = new ArrayList<SAXMotif>();

    countPointNumber();
    for (SameLengthMotifs sameLenMotifs : allClassifiedMotifs) {
      outer: for (int j = 0; j < sameLenMotifs.getSameLenMotifs().size(); j++) {
        SAXMotif tempMotif = sameLenMotifs.getSameLenMotifs().get(j);
        int tempMotifLen = tempMotif.getPos().getEnd() - tempMotif.getPos().getStart() + 1;

        for (int i = j + 1; i < sameLenMotifs.getSameLenMotifs().size(); i++) {
          SAXMotif anotherMotif = sameLenMotifs.getSameLenMotifs().get(i);
          int anotherMotifLen = anotherMotif.getPos().getEnd() - anotherMotif.getPos().getStart()
              + 1;

          double minEndPos = Math.min(tempMotif.getPos().getEnd(), anotherMotif.getPos().getEnd());
          double maxStartPos = Math.max(tempMotif.getPos().getStart(),
              anotherMotif.getPos().getStart());
          // the length in common.
          double commonLen = minEndPos - maxStartPos + 1;

          // if they are overlapped motif, remove the shorter one
          if (commonLen > (tempMotifLen * thresouldCom)) {
            SAXMotif deletedMotif = new SAXMotif();
            SAXMotif similarWith = new SAXMotif();

            boolean isAnotherBetter;

            if (pointsNumberRemoveStrategy != null) {
              isAnotherBetter = decideRemove(anotherMotif, tempMotif);
            }
            else {
              isAnotherBetter = anotherMotifLen > tempMotifLen;

            }
            if (isAnotherBetter) {
              deletedMotif = tempMotif;
              similarWith = anotherMotif;
              sameLenMotifs.getSameLenMotifs().remove(j);
              deletedMotif.setSimilarWith(similarWith);
              motifsBeDeleted.add(deletedMotif);
              j--;
              continue outer;
            }
            else {
              deletedMotif = anotherMotif;
              similarWith = tempMotif;
              sameLenMotifs.getSameLenMotifs().remove(i);
              deletedMotif.setSimilarWith(similarWith);
              motifsBeDeleted.add(deletedMotif);
              i--;
            }
          }
        }
      }

      int minLength = sameLenMotifs.getSameLenMotifs().get(0).getPos().endPos
          - sameLenMotifs.getSameLenMotifs().get(0).getPos().startPos + 1;
      int sameLenMotifsSize = sameLenMotifs.getSameLenMotifs().size();
      int maxLength = sameLenMotifs.getSameLenMotifs().get(sameLenMotifsSize - 1).getPos().endPos
          - sameLenMotifs.getSameLenMotifs().get(sameLenMotifsSize - 1).getPos().startPos + 1;
      sameLenMotifs.setMinMotifLen(minLength);
      sameLenMotifs.setMaxMotifLen(maxLength);
    }
    countPointNumberAfterRemoving();

    refinePatternsByClustering();
    return motifsBeDeleted;
  }

  protected double eculideanDistNormEAbandon(double[] ts1, double[] ts2, double bsfDist) {
    double dist = 0;
    double tsLen = ts1.length;

    double bsf = Math.pow(tsLen * bsfDist, 2);

    for (int i = 0; i < ts1.length; i++) {
      double diff = ts1[i] - ts2[i];
      dist += Math.pow(diff, 2);

      if (dist > bsf)
        return Double.NaN;

    }
    return Math.sqrt(dist) / tsLen;
  }

  protected double eculideanDistNorm(double[] ts1, double[] ts2) {
    double dist = 0;
    double tsLen = ts1.length;

    for (int i = 0; i < ts1.length; i++) {
      double diff = ts1[i] - ts2[i];
      dist += Math.pow(diff, 2);
    }

    return Math.sqrt(dist) / tsLen;
  }

  /**
   * Calculating the distance between time series and pattern.
   * 
   * @param ts , a series of points for time series.
   * @param pValue , a series of points for pattern.
   * @return
   */
  protected double calcDistTSAndPattern(double[] ts, double[] pValue) {
    double INF = 10000000000000000000f;
    double bestDist = INF;
    int patternLen = pValue.length;

    int lastStartP = ts.length - pValue.length + 1;
    if (lastStartP < 1)
      return bestDist;

    Random rand = new Random();
    int startP = rand.nextInt((lastStartP - 1 - 0) + 1);

    double[] slidingWindow = new double[patternLen];

    System.arraycopy(ts, startP, slidingWindow, 0, patternLen);
    bestDist = eculideanDistNorm(pValue, slidingWindow);

    for (int i = 0; i < lastStartP; i++) {
      System.arraycopy(ts, i, slidingWindow, 0, patternLen);

      double tempDist = eculideanDistNormEAbandon(pValue, slidingWindow, bestDist);

      if (tempDist < bestDist) {
        bestDist = tempDist;
      }
    }

    return bestDist;
  }

  protected void refinePatternsByClustering() {
    double[] origTS = originalTimeSeries;
    ArrayList<SameLengthMotifs> newAllClassifiedMotifs = new ArrayList<SameLengthMotifs>();
    for (SameLengthMotifs sameLenMotifs : allClassifiedMotifs) {
      ArrayList<RuleInterval> arrPos = new ArrayList<RuleInterval>();
      ArrayList<SAXMotif> subsequences = sameLenMotifs.getSameLenMotifs();
      for (SAXMotif ss : subsequences) {
        arrPos.add(ss.getPos());
      }

      int patternNum = arrPos.size();
      if (patternNum < 2) {
        continue;
      }
      double dt[][] = new double[patternNum][patternNum];
      // Build distance matrix.
      for (int i = 0; i < patternNum; i++) {
        RuleInterval saxPos = arrPos.get(i);

        int start1 = saxPos.getStart();
        int end1 = saxPos.getEnd();
        double[] ts1 = Arrays.copyOfRange(origTS, start1, end1);

        for (int j = 0; j < arrPos.size(); j++) {
          RuleInterval saxPos2 = arrPos.get(j);
          if (dt[i][j] > 0) {
            continue;
          }
          double d = 0;
          dt[i][j] = d;
          if (i == j) {
            continue;
          }
          int start2 = saxPos2.getStart();
          int end2 = saxPos2.getEnd();
          double[] ts2 = Arrays.copyOfRange(origTS, start2, end2);

          if (ts1.length > ts2.length)
            d = calcDistTSAndPattern(ts1, ts2);
          else
            d = calcDistTSAndPattern(ts2, ts1);

          // DTW dtw = new DTW(ts1, ts2);
          // d = dtw.warpingDistance;

          dt[i][j] = d;
        }
      }

      String[] patternsName = new String[patternNum];
      for (int i = 0; i < patternNum; i++) {
        patternsName[i] = String.valueOf(i);
      }

      ClusteringAlgorithm alg = new DefaultClusteringAlgorithm();
      Cluster cluster = alg.performClustering(dt, patternsName, new AverageLinkageStrategy());

      // int minPatternPerCls = (int) (0.3 * patternNum);
      // minPatternPerCls = minPatternPerCls > 0 ? minPatternPerCls : 1;
      int minPatternPerCls = 1;

      if (cluster.getDistance() == null) {
        // System.out.print(false);
        continue;
      }

      // TODO: refine hard coded threshold
      // double cutDist = cluster.getDistance() * 0.67;
      double cutDist = cluster.getDistanceValue() * 0.67;

      ArrayList<String[]> clusterTSIdx = findCluster(cluster, cutDist, minPatternPerCls);
      while (clusterTSIdx.size() <= 0) {
        cutDist += cutDist / 2;
        clusterTSIdx = findCluster(cluster, cutDist, minPatternPerCls);
      }

      newAllClassifiedMotifs.addAll(SeparateMotifsByClustering(clusterTSIdx, sameLenMotifs));
    }
    allClassifiedMotifs = newAllClassifiedMotifs;
  }

  private ArrayList<SameLengthMotifs> SeparateMotifsByClustering(ArrayList<String[]> clusterTSIdx,
      SameLengthMotifs sameLenMotifs) {
    ArrayList<SameLengthMotifs> newResult = new ArrayList<SameLengthMotifs>();
    if (clusterTSIdx.size() > 1) {
      ArrayList<SAXMotif> subsequences = sameLenMotifs.getSameLenMotifs();
      for (String[] idxesInCluster : clusterTSIdx) {
        SameLengthMotifs newIthSLM = new SameLengthMotifs();
        ArrayList<SAXMotif> sameLenSS = new ArrayList<SAXMotif>();
        int minL = sameLenMotifs.getMinMotifLen();
        int maxL = sameLenMotifs.getMaxMotifLen();

        for (String i : idxesInCluster) {
          SAXMotif ssI = subsequences.get(Integer.parseInt(i));
          int len = ssI.getPos().getEnd() - ssI.getPos().getStart();
          if (len < minL) {
            minL = len;
          }
          else if (len > maxL) {
            maxL = len;
          }
          sameLenSS.add(ssI);
        }

        newIthSLM.setSameLenMotifs(sameLenSS);
        newIthSLM.setMaxMotifLen(maxL);
        newIthSLM.setMinMotifLen(minL);
        newResult.add(newIthSLM);
      }
    }
    else {
      newResult.add(sameLenMotifs);
    }

    return newResult;
  }

  private ArrayList<String[]> findCluster(Cluster cluster, double cutDist, int minPatternPerCls) {

    ArrayList<String[]> clusterTSIdx = new ArrayList<String[]>();

    if (cluster.getDistance() != null) {
      // if (cluster.getDistance() > cutDist) {
      if (cluster.getDistanceValue() > cutDist) {
        if (cluster.getChildren().size() > 0) {
          clusterTSIdx.addAll(findCluster(cluster.getChildren().get(0), cutDist, minPatternPerCls));
          clusterTSIdx.addAll(findCluster(cluster.getChildren().get(1), cutDist, minPatternPerCls));
        }
      }
      else {
        // String[] idxes = cluster.getName().split("&");
        ArrayList<String> itemsInCluster = getNameInCluster(cluster);
        String[] idxes = itemsInCluster.toArray(new String[itemsInCluster.size()]);
        if (idxes.length > minPatternPerCls) {
          clusterTSIdx.add(idxes);
        }
      }
    }

    return clusterTSIdx;
  }

  private ArrayList<String> getNameInCluster(Cluster cluster) {
    ArrayList<String> itemsInCluster = new ArrayList<String>();

    String nodeName;
    if (cluster.isLeaf()) {
      nodeName = cluster.getName();
      itemsInCluster.add(nodeName);
    }
    else {
      // String[] clusterName = cluster.getName().split("#");
      // nodeName = clusterName[1];
    }

    for (Cluster child : cluster.getChildren()) {
      ArrayList<String> childrenNames = getNameInCluster(child);
      itemsInCluster.addAll(childrenNames);
    }
    return itemsInCluster;
  }

  /**
   * Stores all the sub-sequences that generated by Sequitur rules into an array list sorted by
   * sub-sequence length in ascending order.
   * 
   * @return the list of all sub-sequences sorted by length in ascending order.
   */
  protected ArrayList<SAXMotif> getAllMotifs() {

    // result
    ArrayList<SAXMotif> allMotifs = new ArrayList<SAXMotif>();

    // iterate over all rules
    for (int i = 0; i < this.getRulesNumber(); i++) {

      // iterate over all segments/motifs/sub-sequences which correspond to the rule
      ArrayList<RuleInterval> arrPos = this.getRulePositionsByRuleNum(i);
      for (RuleInterval saxPos : arrPos) {
        SAXMotif motif = new SAXMotif();
        motif.setPos(saxPos);
        motif.setRuleIndex(i);
        motif.setClassified(false);
        allMotifs.add(motif);
      }

    }

    // ascending order
    Collections.sort(allMotifs);
    return allMotifs;
  }

  /**
   * Decide which one from overlapping subsequences should be removed. The decision rule is that
   * each sub-sequence has a weight, the one with the smaller weight should be removed.
   * 
   * The weight is S/(A * L). S is the sum of occurrence time of all data points in that
   * sub-sequence, A is the average weight of the whole time series, and L is the length of that
   * sub-sequence.
   * 
   * @param motif1
   * @param motif2
   * 
   * @return
   */
  protected boolean decideRemove(SAXMotif motif1, SAXMotif motif2) {

    // motif1 details
    int motif1Start = motif1.getPos().getStart();
    int motif1End = motif1.getPos().getEnd() - 1;
    int length1 = motif1End - motif1Start;

    // motif2 details
    int motif2Start = motif2.getPos().getStart();
    int motif2End = motif1.getPos().getEnd() - 1;
    int length2 = motif2End - motif2Start;

    int countsMotif1 = 0;
    int countsMotif2 = 0;

    // compute the averageWeight
    double averageWeight = 1;
    int count = 0;
    for (int i = 0; i < pointsNumberRemoveStrategy.length; i++) {
      count += pointsNumberRemoveStrategy[i].getPointOccurenceNumber();
    }
    averageWeight = (double) count / (double) pointsNumberRemoveStrategy.length;

    // compute counts for motif 1
    for (int i = motif1Start; i <= motif1End; i++) {
      countsMotif1 += pointsNumberRemoveStrategy[i].getPointOccurenceNumber();
    }

    // compute counts for motif 2
    for (int i = motif2Start; i <= motif2End; i++) {
      countsMotif2 += pointsNumberRemoveStrategy[i].getPointOccurenceNumber();
    }

    // get weights
    double weight1 = countsMotif1 / (averageWeight * length1);
    double weight2 = countsMotif2 / (averageWeight * length2);

    if (weight1 > weight2) {
      return true;
    }

    return false;
  }

  /**
   * Performs rules pruning based on their overlap.
   * 
   * @param thresholdLength
   * @param thresholdCom
   */
  public void performRemoveOverlapping(double thresholdLength, double thresholdCom) {

    removeOverlapping(thresholdLength, thresholdCom);

    arrPackedRuleRecords = new ArrayList<PackedRuleRecord>();

    int i = 0;
    for (SameLengthMotifs subsequencesInClass : allClassifiedMotifs) {
      int classIndex = i;
      int subsequencesNumber = subsequencesInClass.getSameLenMotifs().size();
      int minLength = subsequencesInClass.getMinMotifLen();
      int maxLength = subsequencesInClass.getMaxMotifLen();

      PackedRuleRecord packedRuleRecord = new PackedRuleRecord();
      packedRuleRecord.setClassIndex(classIndex);
      packedRuleRecord.setSubsequenceNumber(subsequencesNumber);
      packedRuleRecord.setMinLength(minLength);
      packedRuleRecord.setMaxLength(maxLength);

      arrPackedRuleRecords.add(packedRuleRecord);
      i++;
    }

  }

  public ArrayList<SameLengthMotifs> getReducedMotifs() {
    // TODO Auto-generated method stub
    return allClassifiedMotifs;
  }

  @Override
  public void update(Observable o, Object arg) {
    if (arg instanceof GrammarVizMessage) {
      this.setChanged();
      notifyObservers(arg);
    }
  }

  @SuppressWarnings("unused")
  private double getPeriodError(int[] starts, double meanPeriod) {
    double sqd = 0.0;
    for (int i = 1; i < starts.length; i++) {
      double periodDiff = ((double) starts[i] - starts[i - 1]) - meanPeriod;
      sqd = sqd + periodDiff * periodDiff;
    }
    return Math.sqrt(sqd / (starts.length - 1));
  }

  @SuppressWarnings("unused")
  private double getMeanPeriod(int[] starts) {
    int sum = 0;
    for (int i = 1; i < starts.length; i++) {
      sum = sum + starts[i] - starts[i - 1];
    }
    return ((double) sum) / (double) (starts.length - 1);
  }

  @SuppressWarnings("unused")
  private Integer getMeanLength(int[] lengths) {
    int sum = 0;
    for (int l : lengths) {
      sum = sum + l;
    }
    return sum / lengths.length;
  }

}