package edu.hawaii.jmotif.experiment.cbf; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.timeseries.TSException; /** * Helper-runner for CBF test. * * @author psenin * */ public class CBFClassifier { // various variables // classifier test parameters // /** The timeseries length. */ private static final int SERIES_LENGTH = 128; /** The test set size. */ private static int TRAINING_SET_SIZE = 300; private static final int TEST_SAMPLE_SIZE = 1000; // SAX parameters to use // private static int[] PAA_SIZES = { 4 }; private static int[] ALPHABET_SIZES = { 12 }; private static int[] WINDOW_SIZES = { 55 }; private static SAXCollectionStrategy strategy = SAXCollectionStrategy.NOREDUCTION; /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { // making training and test collections Map<String, List<double[]>> trainData = new HashMap<String, List<double[]>>(); Map<String, List<double[]>> testData = new HashMap<String, List<double[]>>(); // ticks int[] t = new int[SERIES_LENGTH]; for (int i = 0; i < SERIES_LENGTH; i++) { t[i] = i; } if (args.length > 0) { TRAINING_SET_SIZE = Integer.valueOf(args[0]); } BufferedWriter bw = new BufferedWriter(new FileWriter("output_" + String.valueOf(TRAINING_SET_SIZE) + ".csv")); // cylinder sample List<double[]> cylinders = new ArrayList<double[]>(); for (int i = 0; i < TRAINING_SET_SIZE + TEST_SAMPLE_SIZE; i++) { cylinders.add(CBFGenerator.cylinder(t)); } trainData.put("cylinder", extract(cylinders, 0, TRAINING_SET_SIZE)); testData.put("cylinder", extract(cylinders, TRAINING_SET_SIZE, TRAINING_SET_SIZE + TEST_SAMPLE_SIZE)); // bell sample List<double[]> bells = new ArrayList<double[]>(); for (int i = 0; i < TRAINING_SET_SIZE + TEST_SAMPLE_SIZE; i++) { bells.add(CBFGenerator.bell(t)); } trainData.put("bell", extract(bells, 0, TRAINING_SET_SIZE)); testData.put("bell", extract(bells, TRAINING_SET_SIZE, TRAINING_SET_SIZE + TEST_SAMPLE_SIZE)); // funnel sample List<double[]> funnels = new ArrayList<double[]>(); for (int i = 0; i < TRAINING_SET_SIZE + TEST_SAMPLE_SIZE; i++) { funnels.add(CBFGenerator.funnel(t)); } trainData.put("funnel", extract(funnels, 0, TRAINING_SET_SIZE)); testData.put("funnel", extract(funnels, TRAINING_SET_SIZE, TRAINING_SET_SIZE + TEST_SAMPLE_SIZE)); for (int paaSize : PAA_SIZES) { for (int alphabetSize : ALPHABET_SIZES) { for (int windowSize : WINDOW_SIZES) { if (windowSize < paaSize + 1) { continue; } // making training bags collection List<WordBag> bags = TextUtils.labeledSeries2WordBags(trainData, paaSize, alphabetSize, windowSize, strategy); // System.out.println(TextUtils.bagsToTable(bags)); HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags); tfidf = TextUtils.normalizeToUnitVectors(tfidf); // System.out.println(TextUtils.getCosineDistanceMatrix(tfidf).toString()); int totalTestSample = 0; int totalPositiveTests = 0; for (String label : tfidf.keySet()) { List<double[]> testD = testData.get(label); int positives = 0; for (double[] series : testD) { positives = positives + TextUtils.classify(label, series, tfidf, paaSize, alphabetSize, windowSize, strategy); totalTestSample++; } totalPositiveTests = totalPositiveTests + positives; } double accuracy = (double) totalPositiveTests / (double) totalTestSample; double error = 1.0d - accuracy; System.out.println(paaSize + "," + alphabetSize + ", " + windowSize + "," + accuracy + "," + error); } } } bw.close(); } private static List<double[]> extract(List<double[]> cylinders, int start, int end) { List<double[]> res = new ArrayList<double[]>(); for (int i = start; i < end; i++) { res.add(cylinders.get(i)); } return res; } }