package edu.hawaii.jmotif.experiment.synthetic; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.logging.ConsoleHandler; import java.util.logging.Formatter; import java.util.logging.Handler; import java.util.logging.Logger; import org.hackystat.utilities.logger.HackystatLogger; import edu.hawaii.jmotif.performance.UCRUtils; import edu.hawaii.jmotif.text.CosineDistanceMatrix; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.text.cluster.Cluster; import edu.hawaii.jmotif.text.cluster.HC; import edu.hawaii.jmotif.text.cluster.LinkageCriterion; import edu.hawaii.jmotif.timeseries.TSException; import edu.hawaii.jmotif.util.BriefFormatter; /** * Helper-runner for CBF test. * * @author psenin * */ public class SyntheticControlHClust { // series to take private static final int SERIES_NUM = 2; // prefix for all of the output private static final String PREFIX = "/home/psenin/dendroscope/"; // data locations private static final String TRAINING_DATA = "data/synthetic_control/synthetic_control_TRAIN"; private static final String TEST_DATA = "data/synthetic_control/synthetic_control_TEST"; // SAX parameters to use // private static final int WINDOW_SIZE = 45; private static final int PAA_SIZE = 5; private static final int ALPHABET_SIZE = 5; private static final SAXCollectionStrategy STRATEGY = SAXCollectionStrategy.EXACT; protected static Logger consoleLogger; private static String LOGGING_LEVEL = "FINE"; static { consoleLogger = HackystatLogger.getLogger("debug.console", "preseries"); consoleLogger.setUseParentHandlers(false); for (Handler handler : consoleLogger.getHandlers()) { consoleLogger.removeHandler(handler); } ConsoleHandler handler = new ConsoleHandler(); Formatter formatter = new BriefFormatter(); handler.setFormatter(formatter); consoleLogger.addHandler(handler); HackystatLogger.setLoggingLevel(consoleLogger, LOGGING_LEVEL); } /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { int[][] params = new int[1][4]; params[0][0] = WINDOW_SIZE; params[0][1] = PAA_SIZE; params[0][2] = ALPHABET_SIZE; params[0][3] = STRATEGY.index(); // reading training and test collections // Map<String, List<double[]>> trainData = UCRUtils.readUCRData(TRAINING_DATA); consoleLogger.fine("trainData classes: " + trainData.size() + ", series length: " + trainData.entrySet().iterator().next().getValue().get(0).length); for (Entry<String, List<double[]>> e : trainData.entrySet()) { consoleLogger.fine(" training class: " + e.getKey() + " series: " + e.getValue().size()); } // make a map of resulting bags List<WordBag> preRes = new ArrayList<WordBag>(); // process series one by one building word bags for (Entry<String, List<double[]>> e : trainData.entrySet()) { String classLabel = e.getKey(); if (classLabel.equalsIgnoreCase("2") || classLabel.equalsIgnoreCase("3") || classLabel.equalsIgnoreCase("6")) { continue; } int i = 0; int skip = 0; for (double[] series : e.getValue()) { skip++; // if (skip < 0) { if (skip < 15) { continue; } WordBag cb = TextUtils.seriesToWordBag(classLabel + String.valueOf(i), series, params[0]); System.out.println("series" + classLabel + String.valueOf(i) + " = c" + Arrays.toString(series).replace("[", "(").replace("]", ")")); preRes.add(cb); i++; if (i > SERIES_NUM) { break; } } } // compute TFIDF statistics for training set HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(preRes); // normalize to unit vectors to avoid false discrimination by vector magnitude tfidf = TextUtils.normalizeToUnitVectors(tfidf); // launch KMeans with random centers Cluster clusters = HC.Hc(tfidf, LinkageCriterion.SINGLE); System.out.println((new CosineDistanceMatrix(tfidf)).toString()); BufferedWriter bw = new BufferedWriter(new FileWriter(PREFIX + "test.newick")); bw.write("(" + clusters.toNewick() + ")"); bw.close(); } }