package edu.hawaii.jmotif.experiment;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import edu.hawaii.jmotif.performance.UCRUtils;
import edu.hawaii.jmotif.text.CosineDistanceMatrix;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import edu.hawaii.jmotif.text.cluster.Cluster;
import edu.hawaii.jmotif.text.cluster.HC;
import edu.hawaii.jmotif.text.cluster.LinkageCriterion;
import edu.hawaii.jmotif.timeseries.TSException;
/**
* Helper-runner for CBF Hierarchical web example test.
*
* @author psenin
*
*/
public class HClust {
// prefix for all of the output
private static final String DATA = "/home/psenin/tmp/series.csv";
// SAX parameters to use
//
private static final int PAA_SIZE = 10;
private static final int ALPHABET_SIZE = 8;
private static final int WINDOW_SIZE = 40;
// processing strategy to utilize
//
private static final SAXCollectionStrategy STRATEGY = SAXCollectionStrategy.NOREDUCTION;
/**
* @param args
* @throws TSException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException {
Map<String, List<double[]>> trainData = UCRUtils.readUCRData(DATA);
System.out.println("trainData classes: " + trainData.size() + ", series length: "
+ trainData.entrySet().iterator().next().getValue().get(0).length);
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
System.out.println(" training class: " + e.getKey() + " series: " + e.getValue().size());
}
System.out.println("\nParams: WINDOW " + WINDOW_SIZE + ", PAA " + PAA_SIZE + ", ALPHABET "
+ ALPHABET_SIZE + ", Strategy " + STRATEGY + "\n\nDistance matrix:");
// parameters
int[] params = new int[4];
params[0] = WINDOW_SIZE;
params[1] = PAA_SIZE;
params[2] = ALPHABET_SIZE;
params[3] = STRATEGY.index();
// making bags collection
List<WordBag> bags = TextUtils.labeledSeries2WordBags(trainData, params);
// create the TFIDF data structure
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
// launch KMeans with random centers
Cluster clusters = HC.Hc(tfidf, LinkageCriterion.COMPLETE);
System.out.println((new CosineDistanceMatrix(tfidf)).toString());
System.out.println(TextUtils.tfidfToTable(tfidf));
BufferedWriter bw = new BufferedWriter(new FileWriter("/home/psenin/tmp/test2.newick"));
bw.write("(" + clusters.toNewick() + ")");
bw.close();
}
}