package edu.hawaii.jmotif.experiment.cbf; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import edu.hawaii.jmotif.sax.SAXFactory; import edu.hawaii.jmotif.sax.alphabet.Alphabet; import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet; import edu.hawaii.jmotif.text.CosineDistanceMatrix; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.text.cluster.Cluster; import edu.hawaii.jmotif.text.cluster.HC; import edu.hawaii.jmotif.text.cluster.LinkageCriterion; import edu.hawaii.jmotif.timeseries.TSException; import edu.hawaii.jmotif.timeseries.TSUtils; /** * Helper-runner for CBF Hierarchical web example test. * * @author psenin * */ public class CBFHClust { // prefix for all of the output private static final String PREFIX = "/home/psenin/dendroscope/"; // we really need an alphabet for SAX private final static Alphabet a = new NormalAlphabet(); // The timeseries length private static final int SERIES_LENGTH = 128; // Number of samples to generate from each subset private static final int SET_SAMPLES_NUM = 10; // Number of samples within the each bag of words private static final int TRAINING_SET_REPETITIONS = 1; // SAX parameters to use // private static final int PAA_SIZE = 6; private static final int ALPHABET_SIZE = 5; private static final int WINDOW_SIZE = 40; // processing strategy to utilize // private static final SAXCollectionStrategy STRATEGY = SAXCollectionStrategy.NOREDUCTION; /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { // time ticks int[] t = new int[SERIES_LENGTH]; for (int i = 0; i < SERIES_LENGTH; i++) { t[i] = i; } // cylinder sample double[][] cylinder = new double[SET_SAMPLES_NUM * TRAINING_SET_REPETITIONS][SERIES_LENGTH]; for (int i = 0; i < cylinder.length; i++) { cylinder[i] = CBFGenerator.cylinder(t); } // bell sample double[][] bell = new double[SET_SAMPLES_NUM * TRAINING_SET_REPETITIONS][SERIES_LENGTH]; for (int i = 0; i < bell.length; i++) { bell[i] = CBFGenerator.bell(t); } // funnel sample double[][] funnel = new double[SET_SAMPLES_NUM * TRAINING_SET_REPETITIONS][SERIES_LENGTH]; for (int i = 0; i < funnel.length; i++) { funnel[i] = CBFGenerator.funnel(t); } // making bags collection List<WordBag> bags = new ArrayList<WordBag>(); bags.addAll(getWordBags("cylinder", cylinder, TRAINING_SET_REPETITIONS, WINDOW_SIZE, PAA_SIZE, ALPHABET_SIZE)); bags.addAll(getWordBags("bell", bell, TRAINING_SET_REPETITIONS, WINDOW_SIZE, PAA_SIZE, ALPHABET_SIZE)); bags.addAll(getWordBags("funnel", funnel, TRAINING_SET_REPETITIONS, WINDOW_SIZE, PAA_SIZE, ALPHABET_SIZE)); // for (WordBag b : bags) { // System.out.println(b.getName()); // } // create the TFIDF data structure HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags); tfidf = TextUtils.normalizeToUnitVectors(tfidf); // launch KMeans with random centers Cluster clusters = HC.Hc(tfidf, LinkageCriterion.COMPLETE); System.out.println((new CosineDistanceMatrix(tfidf)).toString()); BufferedWriter bw = new BufferedWriter(new FileWriter(PREFIX + "test.newick")); bw.write("(" + clusters.toNewick() + ")"); bw.close(); } private static List<WordBag> getWordBags(String bagPrefix, double[][] series, int repeats, int windowSize, int paaSize, int alphabetSize) throws IndexOutOfBoundsException, TSException, IOException { List<WordBag> res = new ArrayList<WordBag>(); for (int i = 0; i < series.length / repeats; i++) { WordBag bag = new WordBag(bagPrefix + String.valueOf(i)); for (int r = 0; r < repeats; r++) { int seriesIdx = i + r; String oldStr = ""; for (int j = 0; j < series[seriesIdx].length - windowSize; j++) { double[] paa = TSUtils.paa( TSUtils.zNormalize(TSUtils.subseries(series[seriesIdx], j, windowSize)), PAA_SIZE); char[] sax = TSUtils.ts2String(paa, a.getCuts(ALPHABET_SIZE)); if (SAXCollectionStrategy.CLASSIC.equals(STRATEGY)) { if (oldStr.length() > 0 && SAXFactory.strDistance(sax, oldStr.toCharArray()) == 0) { continue; } } else if (SAXCollectionStrategy.EXACT.equals(STRATEGY)) { if (oldStr.equalsIgnoreCase(String.valueOf(sax))) { continue; } } oldStr = String.valueOf(sax); bag.addWord(String.valueOf(sax)); } } res.add(bag); } return res; } }