package edu.hawaii.jmotif.sequitur; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import edu.hawaii.jmotif.distance.EuclideanDistance; import edu.hawaii.jmotif.performance.UCRUtils; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.timeseries.TSException; import edu.hawaii.jmotif.timeseries.TSUtils; /** * Helper-runner for CBF test. * * @author psenin * */ public class SAXVSMSequiturClassifier { // various variables // classifier test parameters // /** The timeseries length. */ private static final int SERIES_LENGTH = 128; /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { if (6 != args.length) { System.out.println("Expecting parameters W P A Strategy TRAIN_FILE TEST_FILE"); } Integer windowSize = Integer.valueOf(args[0]); Integer paaSize = Integer.valueOf(args[1]); Integer alphabetSize = Integer.valueOf(args[2]); SAXCollectionStrategy strategy = SAXCollectionStrategy.NOREDUCTION; if ("exact".equalsIgnoreCase(args[3])) { strategy = SAXCollectionStrategy.EXACT; } else if ("classic".equalsIgnoreCase(args[3])) { strategy = SAXCollectionStrategy.CLASSIC; } // making training and test collections Map<String, List<double[]>> trainSet = UCRUtils.readUCRData(args[4]); Map<String, List<double[]>> testSet = UCRUtils.readUCRData(args[5]); if (windowSize < paaSize + 1) { System.exit(0); } // Euclidean section // // int classifiedSeriesNum = 0; int positiveEuclideanTests = 0; long euclidenStart = System.currentTimeMillis(); for (String label : testSet.keySet()) { List<double[]> testD = testSet.get(label); for (double[] series : testD) { double bestDistance = Double.MAX_VALUE; String bestClass = ""; for (Entry<String, List<double[]>> refClass : trainSet.entrySet()) { for (double[] refSeries : refClass.getValue()) { Double distance = EuclideanDistance.earlyAbandonedDistance(TSUtils.zNormalize(series), TSUtils.zNormalize(refSeries), bestDistance); if (null != distance && distance.doubleValue() < bestDistance) { bestDistance = distance.doubleValue(); bestClass = refClass.getKey(); } } } if (label.equalsIgnoreCase(bestClass)) { positiveEuclideanTests = positiveEuclideanTests + 1; } classifiedSeriesNum++; } } double accuracyE = (double) positiveEuclideanTests / (double) classifiedSeriesNum; double errorE = 1.0d - accuracyE; long euclideanEnd = System.currentTimeMillis(); // TF-IDF statistics section // // long tfidfStart = System.currentTimeMillis(); List<WordBag> basSaxVsm = TextUtils.labeledSeries2WordBags(trainSet, paaSize, alphabetSize, windowSize, strategy); HashMap<String, HashMap<String, Double>> tfidfSaxVsm = TextUtils.computeTFIDF(basSaxVsm); tfidfSaxVsm = TextUtils.normalizeToUnitVectors(tfidfSaxVsm); long tfidfEndSaxVsm = System.currentTimeMillis(); List<WordBag> bagsSequitur = SequiturFactory.labeledSeries2WordBags(trainSet, paaSize, alphabetSize, windowSize, strategy); HashMap<String, HashMap<String, Double>> tfidfSequitur = TextUtils.computeTFIDF(bagsSequitur); tfidfSequitur = TextUtils.normalizeToUnitVectors(tfidfSequitur); long tfidfEndSequitur = System.currentTimeMillis(); System.out.println(TextUtils.tfidfToTable(tfidfSaxVsm)); System.out.println(TextUtils.tfidfToTable(tfidfSequitur)); // SAX-VSM section // // classifiedSeriesNum = 0; int positiveSaxVsmTests = 0; for (String label : tfidfSaxVsm.keySet()) { List<double[]> testD = testSet.get(label); int positives = 0; for (double[] series : testD) { positives = positives + TextUtils.classify(label, series, tfidfSaxVsm, paaSize, alphabetSize, windowSize, strategy); classifiedSeriesNum++; } positiveSaxVsmTests = positiveSaxVsmTests + positives; } double accuracySaxVsm = (double) positiveSaxVsmTests / (double) classifiedSeriesNum; double errorSaxVsm = 1.0d - accuracySaxVsm; long saxVsmEnd = System.currentTimeMillis(); // SAX-VSM-Sequitur section // // classifiedSeriesNum = 0; int positiveTestsSequitur = 0; for (String label : tfidfSaxVsm.keySet()) { List<double[]> testD = testSet.get(label); int positivesS = 0; for (double[] series : testD) { positivesS = positivesS + SequiturFactory.classify(label, series, tfidfSequitur, paaSize, alphabetSize, windowSize, strategy); classifiedSeriesNum++; } positiveTestsSequitur = positiveTestsSequitur + positivesS; } double accuracySequitur = (double) positiveTestsSequitur / (double) classifiedSeriesNum; double errorSequitur = 1.0d - accuracySequitur; long sequiturEnd = System.currentTimeMillis(); // Output // // StringBuffer sb = new StringBuffer(); sb.append("cmprun "); sb.append(trainSet.size() + ", ").append(testSet.size() + ", "); sb.append(windowSize + ", ").append(paaSize + ", ").append(alphabetSize + ", "); sb.append(errorE + ", ").append(errorSaxVsm + ", ").append(errorSequitur + ", "); sb.append(String.valueOf(euclideanEnd - euclidenStart) + ", "); sb.append(String.valueOf(tfidfEndSaxVsm - tfidfStart) + ", "); sb.append(String.valueOf(tfidfEndSequitur - tfidfEndSaxVsm) + ", "); sb.append(String.valueOf(saxVsmEnd - tfidfEndSequitur) + ", "); sb.append(String.valueOf(sequiturEnd - saxVsmEnd)); System.out.println(sb.toString()); } }