package edu.hawaii.jmotif.experiment.clustering; import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import cc.mallet.util.Randoms; import edu.hawaii.jmotif.performance.UCRUtils; import edu.hawaii.jmotif.sax.alphabet.Alphabet; import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.text.cluster.RandomStartStrategy; import edu.hawaii.jmotif.text.cluster.TextKMeans; import edu.hawaii.jmotif.timeseries.TSException; /** * Helper-runner for CBF test. * * @author psenin * */ public class TestPhysioKMeans { protected final static int CLASSIC = 0; protected final static int EXACT = 1; protected final static int NOREDUCTION = 2; // string constants private static final String COMMA = ","; // prefix for all of the output private static final String DATA = "physio/PHYSIO_CLUSTER.csv"; // various variables private final static Alphabet a = new NormalAlphabet(); private static final DecimalFormat df = new DecimalFormat("#0.0000000000"); // SAX parameters to use // private static final int[][] params = { { 51, 10, 10, EXACT } }; private static final String[] keys = { "II", "AVR", "RESP", "PLETH", "CO2" }; private static final int[] key2 = { 1449, 235, 1058, 668, 1310, 230, 501, 865, 551, 700, 1077, 1242, 528, 1107, 881, 314, 483, 234, 1475, 725 }; // [1] "II_1449" "II_235" "II_1058" "II_668" "II_1310" "II_230" "II_501" "II_865" // [9] "II_551" "II_700" "II_1077" "II_1242" "II_528" "II_1107" "II_881" "II_314" // [17] "II_483" "II_234" "II_1475" "II_725" private static final int[] keyV = { 1212, 164, 141, 998, 720, 357, 1019, 455, 481, 739, 880, 1136, 1222, 492, 406, 654, 834, 1500, 1116, 1116 }; // [1] "V_1212" "V_164" "V_141" "V_998" "V_720" "V_357" "V_1019" "V_455" "V_481" // [10] "V_739" "V_880" "V_1136" "V_1222" "V_492" "V_406" "V_654" "V_834" // [19] "V_1500" "V_1116" "V_1116" private static final int[] keyRESP = { 793, 224, 1111, 774, 1148, 1091, 1167, 764, 593, 177, 612, 1011, 231, 701, 565, 279, 246, 359, 1376, 149 }; // [1] "RESP_793" "RESP_224" "RESP_1111" "RESP_774" "RESP_1148" "RESP_1091" "RESP_1167" // [8] "RESP_764" "RESP_593" "RESP_177" "RESP_612" "RESP_1011" "RESP_231" "RESP_701" // [15] "RESP_565" "RESP_279" "RESP_246" "RESP_359" "RESP_1376" "RESP_149" private static final int[] keyPLETH = { 189, 82, 926, 13, 1005, 1220, 539, 1328, 958, 337, 226, 640, 755, 931, 549, 905, 171, 567, 891, 1398 }; // [1] "PLETH_189" "PLETH_82" "PLETH_926" "PLETH_13" "PLETH_1005" "PLETH_1220" // [7] "PLETH_539" "PLETH_1328" "PLETH_958" "PLETH_337" "PLETH_226" "PLETH_640" // [13] "PLETH_755" "PLETH_931" "PLETH_549" "PLETH_905" "PLETH_171" "PLETH_567" // [19] "PLETH_891" "PLETH_1398" private static final int[] keyAVR = { 744, 681, 1038, 865, 855, 927, 1187, 814, 938, 88, 1012, 716, 370, 1162, 923, 1361, 1368, 169, 1242, 1147 }; // "AVR_744" "AVR_681" "AVR_1038" "AVR_865" "AVR_855" "AVR_927" "AVR_1187" // "AVR_814" "AVR_938" "AVR_88" "AVR_1012" "AVR_716" "AVR_370" "AVR_1162" // "AVR_923" "AVR_1361" "AVR_1368" "AVR_169" "AVR_1242" "AVR_1147" private static final int[] keyCO2 = { 467, 77, 161, 312, 408, 239, 1, 383, 264, 143, 423, 443, 58, 428, 183, 276, 416, 11, 334, 365 }; private static final Integer NUM_CLUSTERS = 5; private static final Integer NUM_REPEATS = 10; private static final int NUM_SAMPLES = 15; private static Randoms randoms; /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { randoms = new Randoms(); int[][] p = new int[1][4]; p[0][0] = params[0][0]; p[0][1] = params[0][1]; p[0][2] = params[0][2]; SAXCollectionStrategy strategy = SAXCollectionStrategy.CLASSIC; if (EXACT == params[0][3]) { strategy = SAXCollectionStrategy.EXACT; } else if (NOREDUCTION == params[0][3]) { strategy = SAXCollectionStrategy.NOREDUCTION; } p[0][3] = strategy.index(); // get the data loaded into memory // Map<String, List<double[]>> trainData = UCRUtils.readUCRData(DATA); List<double[]> zeroes = trainData.get(keys[0]); System.out.println("Zeroes: " + zeroes.size()); for (int repeat = 0; repeat < NUM_REPEATS; repeat++) { System.out.println("REPEAT " + repeat + " OUT OF " + NUM_REPEATS); List<WordBag> bags = new ArrayList<WordBag>(); bags.addAll(getSeries(trainData, keys[0], key2, p)); bags.addAll(getSeries(trainData, keys[1], keyAVR, p)); bags.addAll(getSeries(trainData, keys[2], keyRESP, p)); bags.addAll(getSeries(trainData, keys[3], keyPLETH, p)); bags.addAll(getSeries(trainData, keys[4], keyCO2, p)); // create the TFIDF data structure HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags); tfidf = TextUtils.normalizeToUnitVectors(tfidf); // launch KMeans with random centers // Cluster clusters = HC.Hc(tfidf, LinkageCriterion.SINGLE); // // BufferedWriter bw = new BufferedWriter(new FileWriter(PREFIX + "test.newick")); // bw.write("(" + clusters.toNewick() + ")"); // bw.close(); // launch KMeans with random centers HashMap<String, List<String>> clusters = TextKMeans.cluster(tfidf, NUM_CLUSTERS, new RandomStartStrategy()); } } private static Collection<? extends WordBag> getSeries(Map<String, List<double[]>> trainData, String label, int[] indexes, int[][] p) throws IndexOutOfBoundsException, TSException { List<double[]> set = trainData.get(label); Collection<WordBag> wb = new ArrayList<WordBag>(); for (int i : indexes) { wb.add(TextUtils.seriesToWordBag(label + "_" + String.valueOf(i - 1), set.get(i - 1), p[0])); } return wb; } private static Collection<WordBag> getSeries(Map<String, List<double[]>> trainData, String label, String prefix2, int[] is, int[][] p) throws IndexOutOfBoundsException, TSException { List<double[]> set = trainData.get(label); Collection<WordBag> wb = new ArrayList<WordBag>(); for (int i : is) { wb.add(TextUtils.seriesToWordBag(prefix2 + String.valueOf(i - 1), set.get(i), p[0])); } return wb; } private static Collection<WordBag> getSeries(Map<String, List<double[]>> trainData, String label, String prefix, int num, int[][] p) throws IndexOutOfBoundsException, TSException { List<double[]> set = trainData.get(label); Collection<WordBag> wb = new ArrayList<WordBag>(); for (int i = 0; i < num; i++) { int idx = randoms.nextInt(set.size()); wb.add(TextUtils.seriesToWordBag(prefix + "_" + String.valueOf(idx), set.get(idx), p[0])); } return wb; } }