package net.seninp.grammarviz.logic; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Observable; import java.util.Observer; import java.util.Random; import org.jfree.data.xy.XYSeries; import org.jfree.data.xy.XYSeriesCollection; import com.apporiented.algorithm.clustering.AverageLinkageStrategy; import com.apporiented.algorithm.clustering.Cluster; import com.apporiented.algorithm.clustering.ClusteringAlgorithm; import com.apporiented.algorithm.clustering.DefaultClusteringAlgorithm; import net.seninp.gi.logic.GrammarRuleRecord; import net.seninp.gi.logic.GrammarRules; import net.seninp.gi.logic.RuleInterval; import net.seninp.gi.rulepruner.RulePrunerFactory; import net.seninp.grammarviz.model.GrammarVizMessage; import net.seninp.jmotif.sax.NumerosityReductionStrategy; import net.seninp.jmotif.sax.discord.DiscordRecords; /** * The main data structure used in SAXSequitur. It contains all the information needed for charting * and tables. * * @author Manfred Lerner, seninp * */ public class GrammarVizChartData extends Observable implements Observer { /** SAX conversion parameters. */ protected final boolean slidingWindowOn; protected final NumerosityReductionStrategy numerosityReductionStrategy; protected final int saxWindowSize; protected final int saxAlphabetSize; protected final int saxPAASize; protected final double zNormThreshold; /** Original data file name. */ @SuppressWarnings("unused") private final String inputFname; /** Original data which will be used for the chart. */ protected final double[] originalTimeSeries; /** The whole timeseries as a string */ private String saxDisplayString = null; /** The grammar rules. */ private GrammarRules grammarRules; /** The discords. */ protected DiscordRecords discords; /** Pruning related vars. */ private SAXPointsNumber[] pointsNumberRemoveStrategy; private ArrayList<SameLengthMotifs> allClassifiedMotifs; private ArrayList<PackedRuleRecord> arrPackedRuleRecords; /** * Constructor. * * @param dataFileName the original filename. * @param ts the time series. * @param useSlidingWindow * @param numerosityReductionStrategy * @param windowSize SAX window size. * @param alphabetSize SAX alphabet size. * @param paaSize SAX PAA size. * @param zNormThreshold the z-normalization threshold. */ public GrammarVizChartData(String dataFileName, double[] ts, boolean useSlidingWindow, NumerosityReductionStrategy numerosityReductionStrategy, int windowSize, int paaSize, int alphabetSize, double zNormThreshold) { this.inputFname = dataFileName; this.slidingWindowOn = useSlidingWindow; this.numerosityReductionStrategy = numerosityReductionStrategy; this.originalTimeSeries = ts; this.saxWindowSize = windowSize; this.saxPAASize = paaSize; this.saxAlphabetSize = alphabetSize; this.zNormThreshold = zNormThreshold; } /** * Get the original, untransformed time series. * * @return the original time series */ public double[] getOriginalTimeseries() { return originalTimeSeries; } /** * Sets the grammar rules data. * * @param rules the grammar rules collection. */ public void setGrammarRules(GrammarRules rules) { this.grammarRules = rules; } /** * Get the grammar rules. * * @return the grammar rules collection. */ public GrammarRules getGrammarRules() { return this.grammarRules; } /** * @return SAX window size */ public int getSAXWindowSize() { return saxWindowSize; } /** * @return SAX alphabet size */ public int getSAXAlphabetSize() { return saxAlphabetSize; } /** * @return SAX PAA size */ public int getSAXPaaSize() { return saxPAASize; } public boolean isSlidingWindowOn() { return this.slidingWindowOn; } public double getZNormThreshold() { return this.zNormThreshold; } /** * Get the collection of transformed rule records. * * @return the collection of transformed rules. */ public ArrayList<PackedRuleRecord> getArrPackedRuleRecords() { return arrPackedRuleRecords; } /** * Set the collection of transformed rule records. * * @param arrPackedRuleRecords the collection of transformed rules. */ public void setArrPackedRuleRecords(ArrayList<PackedRuleRecord> arrPackedRuleRecords) { this.arrPackedRuleRecords = arrPackedRuleRecords; } /** * Сonverts rules from a foreign alphabet to the internal original SAX alphabet. * * @param rule the SAX rule in foreign SAX alphabet. * @return the SAX string in original alphabet, e.g. aabbdd. */ public String convert2OriginalSAXAlphabet(char firstForeignAlphabetChar, String rule) { String textRule = rule; for (int i = 0; i < getSAXAlphabetSize(); i++) { char c1 = (char) (firstForeignAlphabetChar + i); char c2 = (char) ('a' + i); textRule = textRule.replace(c1, c2); } return textRule; } /** * @param SAXDisplay SAX display formatted string */ public void setSAXDisplay(String SAXDisplay) { saxDisplayString = SAXDisplay; } /** * @return SAX display formatted string. */ public String getSAXDisplay() { return saxDisplayString; } /** * Recovers start and stop coordinates ofRule's subsequences. * * @param ruleIdx The rule index. * @return The array of all intervals corresponding to this rule. */ public ArrayList<RuleInterval> getRulePositionsByRuleNum(Integer ruleIdx) { GrammarRuleRecord ruleRec = this.grammarRules.getRuleRecord(ruleIdx); return ruleRec.getRuleIntervals(); } /** * Get the rule-corresponding subsequences from a class. * * @param clsIdx the class index. * @return the class-associated subsequences. */ public ArrayList<RuleInterval> getSubsequencesPositionsByClassNum(Integer clsIdx) { // this will be the result ArrayList<RuleInterval> positions = new ArrayList<RuleInterval>(); // the sub-sequences class container SameLengthMotifs thisClass = allClassifiedMotifs.get(clsIdx); // Use minimal length to name the file. // String fileName = thisClass.getMinMotifLen() + ".txt"; // The position of those sub-sequences in the original time series. // String positionFileName = thisClass.getMinMotifLen() + "Position" + ".txt"; // String path = "Result" + System.getProperties().getProperty("file.separator") + "data" // + System.getProperties().getProperty("file.separator"); double[] values = this.getOriginalTimeseries(); XYSeriesCollection data = new XYSeriesCollection(); for (SAXMotif subSequence : thisClass.getSameLenMotifs()) { positions.add(new RuleInterval(subSequence.getPos().startPos, subSequence.getPos().endPos)); } int index = 0; for (RuleInterval pos : positions) { XYSeries dataset = new XYSeries("Daten" + String.valueOf(index)); int start = pos.getStart(); int end = pos.getEnd() - 1; int count = 0; for (int i = start; (i <= end) && (i < values.length); i++) { dataset.add(count++, values[i]); } data.addSeries(dataset); index++; } // SAXFileIOHelper.writeFileXYSeries(path, fileName, positionFileName, data, positions); return positions; } public int getRulesNumber() { return grammarRules.size(); } // ******************************** // Refactoring in Xing's code below // ******************************** public GrammarRuleRecord getRule(Integer ruleIndex) { return this.grammarRules.get(ruleIndex); } /** * Performs greedy rule prunung, the grammar will be lost. */ public void performRulePruning() { GrammarRules prunedRulesSet = RulePrunerFactory.performPruning(this.originalTimeSeries, this.grammarRules); this.grammarRules = prunedRulesSet; } /** * This computes anomalies. * * @throws Exception */ public void findAnomalies() throws Exception { GrammarVizAnomalyFinder finder = new GrammarVizAnomalyFinder(this); finder.addObserver(this); finder.run(); } public DiscordRecords getAnomalies() { return this.discords; } /** * This method counts how many times each data point is used in ANY sequitur rule (i.e. data point * 1 appears only in R1 and R2, the number for data point 1 is two). The function will get the * occurrence time for all points, and write the result into a text file named as * "PointsNumber.txt". */ protected void countPointNumber() { // init the data structure and copy the original values SAXPointsNumber pointsNumber[] = new SAXPointsNumber[this.originalTimeSeries.length]; for (int i = 0; i < this.originalTimeSeries.length; i++) { pointsNumber[i] = new SAXPointsNumber(); pointsNumber[i].setPointIndex(i); pointsNumber[i].setPointValue(this.originalTimeSeries[i]); } // get all the rules and populate the occurrence density int rulesNum = this.getRulesNumber(); for (int i = 0; i < rulesNum; i++) { ArrayList<RuleInterval> arrPos = this.getRulePositionsByRuleNum(i); for (RuleInterval saxPos : arrPos) { int start = saxPos.getStart(); int end = saxPos.getEnd() - 1; for (int position = start; position <= end; position++) { pointsNumber[position] .setPointOccurenceNumber(pointsNumber[position].getPointOccurenceNumber() + 1); } } } // make an output // String path = "Result" + System.getProperties().getProperty("file.separator"); // String fileName = "PointsNumber.txt"; // SAXFileIOHelper.deleteFile(path, fileName); // SAXFileIOHelper.writeFile(path, fileName, Arrays.toString(pointsNumber)); this.pointsNumberRemoveStrategy = pointsNumber; } /** * This method counts how many times each data point is used in REDUCED sequitur rule (i.e. data * point 1 appears only in R1 and R2, the number for data point 1 is two). The function will get * the occurrence time for all points, and write the result into a text file named as * "PointsNumberAfterRemoving.txt". */ protected void countPointNumberAfterRemoving() { // init the data structure and copy the original values SAXPointsNumber pointsNumber[] = new SAXPointsNumber[this.originalTimeSeries.length]; for (int i = 0; i < this.originalTimeSeries.length; i++) { pointsNumber[i] = new SAXPointsNumber(); pointsNumber[i].setPointIndex(i); pointsNumber[i].setPointValue(this.originalTimeSeries[i]); } for (SameLengthMotifs sameLenMotifs : this.getReducedMotifs()) { for (SAXMotif motif : sameLenMotifs.getSameLenMotifs()) { RuleInterval pos = motif.getPos(); for (int i = pos.getStart(); i <= pos.getEnd() - 1; i++) { pointsNumber[i].setPointOccurenceNumber(pointsNumber[i].getPointOccurenceNumber() + 1); // pointsNumber[i].setRule(textRule); } } } // make an output // String path = "Result" + System.getProperties().getProperty("file.separator"); // String fileName = "PointsNumberAfterRemoving.txt"; // SAXFileIOHelper.deleteFile(path, fileName); // SAXFileIOHelper.writeFile(path, fileName, Arrays.toString(pointsNumber)); } /** * Cleans-up the rules set by classifying the sub-sequences by length and removing the overlapping * in the same length range. * * Sub-sequences with the length difference within threshold: "thresouldLength" will be classified * as a class with the function "classifyMotifs(double)", i.e. 1-100 and 101-205 will be * classified as a class when the threshold is 0.1, because the length difference is 5, which is * less than the threshold (0.1 * 100 = 10). If two sub-sequences within one class share a common * part which is more than the threshold: "thresouldCom", one of them will be removed by the * function "removeOverlappingInSimiliar(double)". i.e. 1-100 and 21-120. * * @param intraThreshold, the threshold between the same motifs. * @param interThreshould, the threshold between the different motifs. */ protected void removeOverlapping(double intraThreshold, double interThreshould) { classifyMotifs(intraThreshold); // ArrayList<SAXMotif> motifsBeDeleted = removeOverlappingInSimiliar(interThreshould); // String path = "Result" + // System.getProperties().getProperty("file.separator"); // String fileName = "Deleted Motifs.txt"; // SAXFileIOHelper.deleteFile(path, fileName); // SAXFileIOHelper.writeFile(path, fileName, motifsBeDeleted.toString()); } /** * Classify the motifs based on their length. * * It calls "getAllMotifs()" to get all the sub-sequences that were generated by Sequitur rules in * ascending order. Then bins all the sub-sequences by length based on the length of the first * sub-sequence in each class, that is, the shortest sub-sequence in each class. * * @param lengthThreshold the motif length threshold. */ protected void classifyMotifs(double lengthThreshold) { // reset vars allClassifiedMotifs = new ArrayList<SameLengthMotifs>(); // down to business ArrayList<SAXMotif> allMotifs = getAllMotifs(); // is this one better? int currentIndex = 0; for (SAXMotif tmpMotif : allMotifs) { currentIndex++; if (tmpMotif.isClassified()) { // this breaks the loop flow, so it goes to //for (SAXMotif tempMotif : allMotifs) { continue; } SameLengthMotifs tmpSameLengthMotifs = new SameLengthMotifs(); int tmpMotifLen = tmpMotif.getPos().getEnd() - tmpMotif.getPos().getStart() + 1; int minLen = tmpMotifLen; int maxLen = tmpMotifLen; // TODO: assuming that this motif has not been processed, right? ArrayList<SAXMotif> newMotifClass = new ArrayList<SAXMotif>(); newMotifClass.add(tmpMotif); tmpMotif.setClassified(true); // TODO: this motif assumed to be the first one of it's class, traverse the rest down for (int i = currentIndex; i < allMotifs.size(); i++) { SAXMotif anotherMotif = allMotifs.get(i); // if the two motifs are similar or not. int anotherMotifLen = anotherMotif.getPos().getEnd() - anotherMotif.getPos().getStart() + 1; // if they have the similar length. if (Math.abs(anotherMotifLen - tmpMotifLen) < (tmpMotifLen * lengthThreshold)) { newMotifClass.add(anotherMotif); anotherMotif.setClassified(true); if (anotherMotifLen > maxLen) { maxLen = anotherMotifLen; } else if (anotherMotifLen < minLen) { minLen = anotherMotifLen; } } } tmpSameLengthMotifs.setSameLenMotifs(newMotifClass); tmpSameLengthMotifs.setMinMotifLen(minLen); tmpSameLengthMotifs.setMaxMotifLen(maxLen); allClassifiedMotifs.add(tmpSameLengthMotifs); } // System.out.println(); } protected ArrayList<SAXMotif> removeOverlappingInSimiliar(double thresouldCom) { ArrayList<SAXMotif> motifsBeDeleted = new ArrayList<SAXMotif>(); countPointNumber(); for (SameLengthMotifs sameLenMotifs : allClassifiedMotifs) { outer: for (int j = 0; j < sameLenMotifs.getSameLenMotifs().size(); j++) { SAXMotif tempMotif = sameLenMotifs.getSameLenMotifs().get(j); int tempMotifLen = tempMotif.getPos().getEnd() - tempMotif.getPos().getStart() + 1; for (int i = j + 1; i < sameLenMotifs.getSameLenMotifs().size(); i++) { SAXMotif anotherMotif = sameLenMotifs.getSameLenMotifs().get(i); int anotherMotifLen = anotherMotif.getPos().getEnd() - anotherMotif.getPos().getStart() + 1; double minEndPos = Math.min(tempMotif.getPos().getEnd(), anotherMotif.getPos().getEnd()); double maxStartPos = Math.max(tempMotif.getPos().getStart(), anotherMotif.getPos().getStart()); // the length in common. double commonLen = minEndPos - maxStartPos + 1; // if they are overlapped motif, remove the shorter one if (commonLen > (tempMotifLen * thresouldCom)) { SAXMotif deletedMotif = new SAXMotif(); SAXMotif similarWith = new SAXMotif(); boolean isAnotherBetter; if (pointsNumberRemoveStrategy != null) { isAnotherBetter = decideRemove(anotherMotif, tempMotif); } else { isAnotherBetter = anotherMotifLen > tempMotifLen; } if (isAnotherBetter) { deletedMotif = tempMotif; similarWith = anotherMotif; sameLenMotifs.getSameLenMotifs().remove(j); deletedMotif.setSimilarWith(similarWith); motifsBeDeleted.add(deletedMotif); j--; continue outer; } else { deletedMotif = anotherMotif; similarWith = tempMotif; sameLenMotifs.getSameLenMotifs().remove(i); deletedMotif.setSimilarWith(similarWith); motifsBeDeleted.add(deletedMotif); i--; } } } } int minLength = sameLenMotifs.getSameLenMotifs().get(0).getPos().endPos - sameLenMotifs.getSameLenMotifs().get(0).getPos().startPos + 1; int sameLenMotifsSize = sameLenMotifs.getSameLenMotifs().size(); int maxLength = sameLenMotifs.getSameLenMotifs().get(sameLenMotifsSize - 1).getPos().endPos - sameLenMotifs.getSameLenMotifs().get(sameLenMotifsSize - 1).getPos().startPos + 1; sameLenMotifs.setMinMotifLen(minLength); sameLenMotifs.setMaxMotifLen(maxLength); } countPointNumberAfterRemoving(); refinePatternsByClustering(); return motifsBeDeleted; } protected double eculideanDistNormEAbandon(double[] ts1, double[] ts2, double bsfDist) { double dist = 0; double tsLen = ts1.length; double bsf = Math.pow(tsLen * bsfDist, 2); for (int i = 0; i < ts1.length; i++) { double diff = ts1[i] - ts2[i]; dist += Math.pow(diff, 2); if (dist > bsf) return Double.NaN; } return Math.sqrt(dist) / tsLen; } protected double eculideanDistNorm(double[] ts1, double[] ts2) { double dist = 0; double tsLen = ts1.length; for (int i = 0; i < ts1.length; i++) { double diff = ts1[i] - ts2[i]; dist += Math.pow(diff, 2); } return Math.sqrt(dist) / tsLen; } /** * Calculating the distance between time series and pattern. * * @param ts , a series of points for time series. * @param pValue , a series of points for pattern. * @return */ protected double calcDistTSAndPattern(double[] ts, double[] pValue) { double INF = 10000000000000000000f; double bestDist = INF; int patternLen = pValue.length; int lastStartP = ts.length - pValue.length + 1; if (lastStartP < 1) return bestDist; Random rand = new Random(); int startP = rand.nextInt((lastStartP - 1 - 0) + 1); double[] slidingWindow = new double[patternLen]; System.arraycopy(ts, startP, slidingWindow, 0, patternLen); bestDist = eculideanDistNorm(pValue, slidingWindow); for (int i = 0; i < lastStartP; i++) { System.arraycopy(ts, i, slidingWindow, 0, patternLen); double tempDist = eculideanDistNormEAbandon(pValue, slidingWindow, bestDist); if (tempDist < bestDist) { bestDist = tempDist; } } return bestDist; } protected void refinePatternsByClustering() { double[] origTS = originalTimeSeries; ArrayList<SameLengthMotifs> newAllClassifiedMotifs = new ArrayList<SameLengthMotifs>(); for (SameLengthMotifs sameLenMotifs : allClassifiedMotifs) { ArrayList<RuleInterval> arrPos = new ArrayList<RuleInterval>(); ArrayList<SAXMotif> subsequences = sameLenMotifs.getSameLenMotifs(); for (SAXMotif ss : subsequences) { arrPos.add(ss.getPos()); } int patternNum = arrPos.size(); if (patternNum < 2) { continue; } double dt[][] = new double[patternNum][patternNum]; // Build distance matrix. for (int i = 0; i < patternNum; i++) { RuleInterval saxPos = arrPos.get(i); int start1 = saxPos.getStart(); int end1 = saxPos.getEnd(); double[] ts1 = Arrays.copyOfRange(origTS, start1, end1); for (int j = 0; j < arrPos.size(); j++) { RuleInterval saxPos2 = arrPos.get(j); if (dt[i][j] > 0) { continue; } double d = 0; dt[i][j] = d; if (i == j) { continue; } int start2 = saxPos2.getStart(); int end2 = saxPos2.getEnd(); double[] ts2 = Arrays.copyOfRange(origTS, start2, end2); if (ts1.length > ts2.length) d = calcDistTSAndPattern(ts1, ts2); else d = calcDistTSAndPattern(ts2, ts1); // DTW dtw = new DTW(ts1, ts2); // d = dtw.warpingDistance; dt[i][j] = d; } } String[] patternsName = new String[patternNum]; for (int i = 0; i < patternNum; i++) { patternsName[i] = String.valueOf(i); } ClusteringAlgorithm alg = new DefaultClusteringAlgorithm(); Cluster cluster = alg.performClustering(dt, patternsName, new AverageLinkageStrategy()); // int minPatternPerCls = (int) (0.3 * patternNum); // minPatternPerCls = minPatternPerCls > 0 ? minPatternPerCls : 1; int minPatternPerCls = 1; if (cluster.getDistance() == null) { // System.out.print(false); continue; } // TODO: refine hard coded threshold // double cutDist = cluster.getDistance() * 0.67; double cutDist = cluster.getDistanceValue() * 0.67; ArrayList<String[]> clusterTSIdx = findCluster(cluster, cutDist, minPatternPerCls); while (clusterTSIdx.size() <= 0) { cutDist += cutDist / 2; clusterTSIdx = findCluster(cluster, cutDist, minPatternPerCls); } newAllClassifiedMotifs.addAll(SeparateMotifsByClustering(clusterTSIdx, sameLenMotifs)); } allClassifiedMotifs = newAllClassifiedMotifs; } private ArrayList<SameLengthMotifs> SeparateMotifsByClustering(ArrayList<String[]> clusterTSIdx, SameLengthMotifs sameLenMotifs) { ArrayList<SameLengthMotifs> newResult = new ArrayList<SameLengthMotifs>(); if (clusterTSIdx.size() > 1) { ArrayList<SAXMotif> subsequences = sameLenMotifs.getSameLenMotifs(); for (String[] idxesInCluster : clusterTSIdx) { SameLengthMotifs newIthSLM = new SameLengthMotifs(); ArrayList<SAXMotif> sameLenSS = new ArrayList<SAXMotif>(); int minL = sameLenMotifs.getMinMotifLen(); int maxL = sameLenMotifs.getMaxMotifLen(); for (String i : idxesInCluster) { SAXMotif ssI = subsequences.get(Integer.parseInt(i)); int len = ssI.getPos().getEnd() - ssI.getPos().getStart(); if (len < minL) { minL = len; } else if (len > maxL) { maxL = len; } sameLenSS.add(ssI); } newIthSLM.setSameLenMotifs(sameLenSS); newIthSLM.setMaxMotifLen(maxL); newIthSLM.setMinMotifLen(minL); newResult.add(newIthSLM); } } else { newResult.add(sameLenMotifs); } return newResult; } private ArrayList<String[]> findCluster(Cluster cluster, double cutDist, int minPatternPerCls) { ArrayList<String[]> clusterTSIdx = new ArrayList<String[]>(); if (cluster.getDistance() != null) { // if (cluster.getDistance() > cutDist) { if (cluster.getDistanceValue() > cutDist) { if (cluster.getChildren().size() > 0) { clusterTSIdx.addAll(findCluster(cluster.getChildren().get(0), cutDist, minPatternPerCls)); clusterTSIdx.addAll(findCluster(cluster.getChildren().get(1), cutDist, minPatternPerCls)); } } else { // String[] idxes = cluster.getName().split("&"); ArrayList<String> itemsInCluster = getNameInCluster(cluster); String[] idxes = itemsInCluster.toArray(new String[itemsInCluster.size()]); if (idxes.length > minPatternPerCls) { clusterTSIdx.add(idxes); } } } return clusterTSIdx; } private ArrayList<String> getNameInCluster(Cluster cluster) { ArrayList<String> itemsInCluster = new ArrayList<String>(); String nodeName; if (cluster.isLeaf()) { nodeName = cluster.getName(); itemsInCluster.add(nodeName); } else { // String[] clusterName = cluster.getName().split("#"); // nodeName = clusterName[1]; } for (Cluster child : cluster.getChildren()) { ArrayList<String> childrenNames = getNameInCluster(child); itemsInCluster.addAll(childrenNames); } return itemsInCluster; } /** * Stores all the sub-sequences that generated by Sequitur rules into an array list sorted by * sub-sequence length in ascending order. * * @return the list of all sub-sequences sorted by length in ascending order. */ protected ArrayList<SAXMotif> getAllMotifs() { // result ArrayList<SAXMotif> allMotifs = new ArrayList<SAXMotif>(); // iterate over all rules for (int i = 0; i < this.getRulesNumber(); i++) { // iterate over all segments/motifs/sub-sequences which correspond to the rule ArrayList<RuleInterval> arrPos = this.getRulePositionsByRuleNum(i); for (RuleInterval saxPos : arrPos) { SAXMotif motif = new SAXMotif(); motif.setPos(saxPos); motif.setRuleIndex(i); motif.setClassified(false); allMotifs.add(motif); } } // ascending order Collections.sort(allMotifs); return allMotifs; } /** * Decide which one from overlapping subsequences should be removed. The decision rule is that * each sub-sequence has a weight, the one with the smaller weight should be removed. * * The weight is S/(A * L). S is the sum of occurrence time of all data points in that * sub-sequence, A is the average weight of the whole time series, and L is the length of that * sub-sequence. * * @param motif1 * @param motif2 * * @return */ protected boolean decideRemove(SAXMotif motif1, SAXMotif motif2) { // motif1 details int motif1Start = motif1.getPos().getStart(); int motif1End = motif1.getPos().getEnd() - 1; int length1 = motif1End - motif1Start; // motif2 details int motif2Start = motif2.getPos().getStart(); int motif2End = motif1.getPos().getEnd() - 1; int length2 = motif2End - motif2Start; int countsMotif1 = 0; int countsMotif2 = 0; // compute the averageWeight double averageWeight = 1; int count = 0; for (int i = 0; i < pointsNumberRemoveStrategy.length; i++) { count += pointsNumberRemoveStrategy[i].getPointOccurenceNumber(); } averageWeight = (double) count / (double) pointsNumberRemoveStrategy.length; // compute counts for motif 1 for (int i = motif1Start; i <= motif1End; i++) { countsMotif1 += pointsNumberRemoveStrategy[i].getPointOccurenceNumber(); } // compute counts for motif 2 for (int i = motif2Start; i <= motif2End; i++) { countsMotif2 += pointsNumberRemoveStrategy[i].getPointOccurenceNumber(); } // get weights double weight1 = countsMotif1 / (averageWeight * length1); double weight2 = countsMotif2 / (averageWeight * length2); if (weight1 > weight2) { return true; } return false; } /** * Performs rules pruning based on their overlap. * * @param thresholdLength * @param thresholdCom */ public void performRemoveOverlapping(double thresholdLength, double thresholdCom) { removeOverlapping(thresholdLength, thresholdCom); arrPackedRuleRecords = new ArrayList<PackedRuleRecord>(); int i = 0; for (SameLengthMotifs subsequencesInClass : allClassifiedMotifs) { int classIndex = i; int subsequencesNumber = subsequencesInClass.getSameLenMotifs().size(); int minLength = subsequencesInClass.getMinMotifLen(); int maxLength = subsequencesInClass.getMaxMotifLen(); PackedRuleRecord packedRuleRecord = new PackedRuleRecord(); packedRuleRecord.setClassIndex(classIndex); packedRuleRecord.setSubsequenceNumber(subsequencesNumber); packedRuleRecord.setMinLength(minLength); packedRuleRecord.setMaxLength(maxLength); arrPackedRuleRecords.add(packedRuleRecord); i++; } } public ArrayList<SameLengthMotifs> getReducedMotifs() { // TODO Auto-generated method stub return allClassifiedMotifs; } @Override public void update(Observable o, Object arg) { if (arg instanceof GrammarVizMessage) { this.setChanged(); notifyObservers(arg); } } @SuppressWarnings("unused") private double getPeriodError(int[] starts, double meanPeriod) { double sqd = 0.0; for (int i = 1; i < starts.length; i++) { double periodDiff = ((double) starts[i] - starts[i - 1]) - meanPeriod; sqd = sqd + periodDiff * periodDiff; } return Math.sqrt(sqd / (starts.length - 1)); } @SuppressWarnings("unused") private double getMeanPeriod(int[] starts) { int sum = 0; for (int i = 1; i < starts.length; i++) { sum = sum + starts[i] - starts[i - 1]; } return ((double) sum) / (double) (starts.length - 1); } @SuppressWarnings("unused") private Integer getMeanLength(int[] lengths) { int sum = 0; for (int l : lengths) { sum = sum + l; } return sum / lengths.length; } }