package edu.hawaii.jmotif.experiment.clustering;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import edu.hawaii.jmotif.performance.UCRUtils;
import edu.hawaii.jmotif.sax.alphabet.Alphabet;
import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import edu.hawaii.jmotif.text.cluster.RandomStartStrategy;
import edu.hawaii.jmotif.text.cluster.TextKMeans;
import edu.hawaii.jmotif.timeseries.TSException;
/**
* Helper-runner for CBF test.
*
* @author psenin
*
*/
public class TestSyntheticControlKMeans {
protected final static int CLASSIC = 0;
protected final static int EXACT = 1;
protected final static int NOREDUCTION = 2;
// string constants
private static final String COMMA = ",";
// prefix for all of the output
private static final String PREFIX = "data/ElectricDevices/";
// various variables
private final static Alphabet a = new NormalAlphabet();
private static final DecimalFormat df = new DecimalFormat("#0.0000000000");
// SAX parameters to use
//
private static final int[][] params = { { 17, 13, 6, NOREDUCTION } };
private static final Integer NUM_CLUSTERS = 7;
/**
* @param args
* @throws TSException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException {
int[][] p = new int[1][4];
p[0][0] = params[0][0];
p[0][1] = params[0][1];
p[0][2] = params[0][2];
SAXCollectionStrategy strategy = SAXCollectionStrategy.CLASSIC;
if (EXACT == params[0][3]) {
strategy = SAXCollectionStrategy.EXACT;
}
else if (NOREDUCTION == params[0][3]) {
strategy = SAXCollectionStrategy.NOREDUCTION;
}
p[0][3] = strategy.index();
// get the data loaded into memory
//
Map<String, List<double[]>> trainData = UCRUtils.readUCRData(PREFIX + "ElectricDevices_TRAIN");
List<double[]> zeroes = trainData.get("0");
System.out.println("Zeroes: " + zeroes.size());
List<WordBag> bags = new ArrayList<WordBag>();
bags.addAll(getSeries(trainData, "0", "TV", new int[] { 409, 410, 412, 413, 414 }, p));
bags.addAll(getSeries(trainData, "1", "D", new int[] { 368, 375, 383, 386, 390 }, p));
bags.addAll(getSeries(trainData, "2", "C", new int[] { 332, 334, 335, 336, 346 }, p));
bags.addAll(getSeries(trainData, "3", "I", new int[] { 283, 303, 305, 317, 322 }, p));
bags.addAll(getSeries(trainData, "4", "K", new int[] { 105, 32, 52, 76, 81 }, p));
bags.addAll(getSeries(trainData, "5", "O", new int[] { 201, 321, 331, 333, 352 }, p));
bags.addAll(getSeries(trainData, "6", "W", new int[] { 18, 21, 26, 7, 8 }, p));
// create the TFIDF data structure
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
// launch KMeans with random centers
// Cluster clusters = HC.Hc(tfidf, LinkageCriterion.SINGLE);
//
// BufferedWriter bw = new BufferedWriter(new FileWriter(PREFIX + "test.newick"));
// bw.write("(" + clusters.toNewick() + ")");
// bw.close();
// launch KMeans with random centers
HashMap<String, List<String>> clusters = TextKMeans.cluster(tfidf, NUM_CLUSTERS,
new RandomStartStrategy());
}
private static Collection<WordBag> getSeries(Map<String, List<double[]>> trainData, String label,
String prefix2, int[] is, int[][] p) throws IndexOutOfBoundsException, TSException {
List<double[]> set = trainData.get(label);
Collection<WordBag> wb = new ArrayList<WordBag>();
for (int i : is) {
wb.add(TextUtils.seriesToWordBag(prefix2 + String.valueOf(i-1), set.get(i), p[0]));
}
return wb;
}
private static Collection<WordBag> getSeries(Map<String, List<double[]>> trainData, String label,
String prefix, int num, int[][] p) throws IndexOutOfBoundsException, TSException {
List<double[]> set = trainData.get(label);
Collection<WordBag> wb = new ArrayList<WordBag>();
for (int i = 0; i < num; i++) {
wb.add(TextUtils.seriesToWordBag(prefix + String.valueOf(i), set.get(i), p[0]));
}
return wb;
}
}