package edu.hawaii.jmotif.experiment.synthetic; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.TreeSet; import edu.hawaii.jmotif.sax.SAXFactory; import edu.hawaii.jmotif.sax.alphabet.Alphabet; import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.text.cluster.FurthestFirstStrategy; import edu.hawaii.jmotif.text.cluster.TextKMeans; import edu.hawaii.jmotif.timeseries.TSException; import edu.hawaii.jmotif.timeseries.TSUtils; /** * Helper-runner for CBF test. * * @author psenin * */ public class TestSyntheticControlKMeans { // string constants private static final String COMMA = ","; // prefix for all of the output private static final String PREFIX = "RCode/synthetic.control/"; // various variables private final static Alphabet a = new NormalAlphabet(); private static final DecimalFormat df = new DecimalFormat("#0.0000000000"); // SAX parameters to use // // private static final int WINDOW_SIZE1 = 12; // private static final int PAA_SIZE1 = 4; // private static final int ALPHABET_SIZE1 = 3; // private static final int[][] params = { { 9, 3, 3 }, { 15, 3, 3 }, { 12, 3, 5 }, { 45, 3, 3 } // }; // private static final int[][] params = { { 12, 3, 5 }, { 44, 4, 5 } }; private static final int[][] params = { { 15, 5, 5 }, { 44, 4, 6 } }; private static final SAXCollectionStrategy STRATEGY = SAXCollectionStrategy.EXACT; private static final Integer NUM_CLUSTERS = 2; /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { // get the data loaded into memory // double[][] data = new double[600][60]; BufferedReader br = new BufferedReader(new FileReader(PREFIX + "synthetic_control.data")); String line = null; int i = 0; while ((line = br.readLine()) != null) { String[] dat = line.split("\\s+"); for (int j = 0; j < 60; j++) { data[i][j] = Double.valueOf(dat[j]).doubleValue(); } i++; } List<WordBag> bags = new ArrayList<WordBag>(); String tag = "A"; for (int k = 0; k < 600; k++) { if (k > 99) { tag = "B"; } if (k > 199) { tag = "C"; } if (k > 299) { tag = "D"; } if (k > 399) { tag = "E"; } if (k > 499) { tag = "F"; } if ("E".equalsIgnoreCase(tag) || "C".equalsIgnoreCase(tag)) bags.add(makeAbag(tag, k, data[k], params)); if (k == 9) { k = 99; } if (k == 109) { k = 199; } if (k == 209) { k = 299; } if (k == 309) { k = 399; } if (k == 409) { k = 499; } if (k == 509) { break; } } // create the TFIDF data structure HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags); tfidf = TextUtils.normalizeToUnitVectors(tfidf); // launch KMeans with random centers // Cluster clusters = HC.Hc(tfidf, LinkageCriterion.SINGLE); // // BufferedWriter bw = new BufferedWriter(new FileWriter(PREFIX + "test.newick")); // bw.write("(" + clusters.toNewick() + ")"); // bw.close(); // launch KMeans with random centers HashMap<String, List<String>> clusters = TextKMeans.cluster(tfidf, NUM_CLUSTERS, new FurthestFirstStrategy()); } private static WordBag makeAbag(String tag, int k, double[] ds, int[][] params) throws IndexOutOfBoundsException, TSException { WordBag bag = new WordBag(tag + String.valueOf(k)); for (int[] p : params) { String oldStr = ""; int ws = p[0]; int paaNum = p[1]; int aSize = p[2]; for (int j = 0; j < ds.length - ws; j++) { double[] paa = TSUtils.paa(TSUtils.zNormalize(TSUtils.subseries(ds, j, ws)), paaNum); char[] sax = TSUtils.ts2String(paa, a.getCuts(aSize)); if (SAXCollectionStrategy.CLASSIC.equals(STRATEGY)) { if (oldStr.length() > 0 && SAXFactory.strDistance(sax, oldStr.toCharArray()) == 0) { continue; } } else if (SAXCollectionStrategy.EXACT.equals(STRATEGY)) { if (oldStr.equalsIgnoreCase(String.valueOf(sax))) { continue; } } oldStr = String.valueOf(sax); bag.addWord(String.valueOf(sax)); } } return bag; } private static void writePreClusterTable(HashMap<String, HashMap<String, Double>> tfidf, String fname) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(new File(fname))); // melt together sets of keys // TreeSet<String> words = new TreeSet<String>(); for (HashMap<String, Double> t : tfidf.values()) { words.addAll(t.keySet()); } // print keys - the dictionaries names // StringBuilder sb = new StringBuilder("\"\","); for (String key : tfidf.keySet()) { sb.append("\"").append(key).append("\","); } bw.write(sb.delete(sb.length() - 1, sb.length()).append("\n").toString()); // print rows, one by one // for (String w : words) { sb = new StringBuilder(); sb.append("\"").append(w).append("\","); for (String key : tfidf.keySet()) { HashMap<String, Double> data = tfidf.get(key); if (data.keySet().contains(w)) { sb.append(df.format(data.get(w))).append(COMMA); } else { sb.append(df.format(0.0d)).append(COMMA); } } bw.write(sb.delete(sb.length() - 1, sb.length()).append("\n").toString()); } bw.close(); } }