package edu.hawaii.jmotif.experiment.activity;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.TreeSet;
import edu.hawaii.jmotif.sax.SAXFactory;
import edu.hawaii.jmotif.sax.alphabet.Alphabet;
import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet;
import edu.hawaii.jmotif.text.CosineDistanceMatrix;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import edu.hawaii.jmotif.text.cluster.Cluster;
import edu.hawaii.jmotif.text.cluster.FurthestFirstStrategy;
import edu.hawaii.jmotif.text.cluster.HC;
import edu.hawaii.jmotif.text.cluster.LinkageCriterion;
import edu.hawaii.jmotif.text.cluster.TextKMeans;
import edu.hawaii.jmotif.timeseries.TSException;
import edu.hawaii.jmotif.timeseries.TSUtils;
/**
* Helper-runner for White Paper span example.
*
* @author psenin
*
*/
public class ActivityClustDuration {
// string constants
private static final String COMMA = ",";
private static final DecimalFormat df = new DecimalFormat("#0.0000000000");
// prefix for all of the output
private static final String PREFIX = "RCode/activity/";
// we really need an alphabet for SAX
private final static Alphabet a = new NormalAlphabet();
// The timeseries length
private static final int SERIES_LENGTH = 60;
// Number of samples to generate from each subset
private static final int SET_SAMPLES_NUM = 5;
// SAX parameters to use
//
private static final int PAA_SIZE = 4;
private static final int ALPHABET_SIZE = 8;
private static final int WINDOW_SIZE = 8;
// processing strategy to utilize
//
private static final SAXCollectionStrategy STRATEGY = SAXCollectionStrategy.CLASSIC;
/**
* Main runnable.
*
* @param args None used.
* @throws TSException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException {
// one hour runs
double[][] one = new double[SET_SAMPLES_NUM][SERIES_LENGTH];
for (int i = 0; i < one.length; i++) {
one[i] = ActivityGenerator.threePeriods(SERIES_LENGTH, new int[] { 1, 1, 1 });
}
// two hours runs
double[][] two = new double[SET_SAMPLES_NUM][SERIES_LENGTH];
for (int i = 0; i < two.length; i++) {
two[i] = ActivityGenerator.threePeriods(SERIES_LENGTH, new int[] { 2, 2, 2 });
}
// three hours runs
double[][] three = new double[SET_SAMPLES_NUM][SERIES_LENGTH];
for (int i = 0; i < three.length; i++) {
three[i] = ActivityGenerator.threePeriods(SERIES_LENGTH, new int[] { 3, 3, 3 });
}
// write down the series
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(PREFIX
+ "test-duration-series.csv")));
for (int i = 0; i < one.length; i++) {
String str = "one".concat(String.valueOf(i))
+ Arrays.toString(one[i]).replace("[", " ").replace("]", "").replace(", ", " ") + "\n";
bw.write(str);
}
for (int i = 0; i < two.length; i++) {
String str = "two".concat(String.valueOf(i))
+ Arrays.toString(two[i]).replace("[", " ").replace("]", "").replace(", ", " ") + "\n";
bw.write(str);
}
for (int i = 0; i < two.length; i++) {
String str = "three".concat(String.valueOf(i))
+ Arrays.toString(three[i]).replace("[", " ").replace("]", "").replace(", ", " ") + "\n";
bw.write(str);
}
bw.close();
// making bags collection
//
List<WordBag> bags = new ArrayList<WordBag>();
bags.addAll(getWordBags("one", one, WINDOW_SIZE, PAA_SIZE, ALPHABET_SIZE));
bags.addAll(getWordBags("two", two, WINDOW_SIZE, PAA_SIZE, ALPHABET_SIZE));
bags.addAll(getWordBags("three", three, WINDOW_SIZE, PAA_SIZE, ALPHABET_SIZE));
// build the TFIDF data structure
//
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
bw = new BufferedWriter(new FileWriter(new File(PREFIX + "test-duration-cosineDM.csv")));
bw.write(new CosineDistanceMatrix(tfidf).toString() + "\n");
bw.close();
// launch HC algorithm
//
Cluster clusters = HC.Hc(tfidf, LinkageCriterion.COMPLETE);
bw = new BufferedWriter(new FileWriter(PREFIX + "test-duration-HC-SAX-VSM.newick"));
bw.write("(" + clusters.toNewick() + ")");
bw.close();
// launch KMeans with random centers
HashMap<String, List<String>> kClusters = TextKMeans.cluster(tfidf, 3,
new FurthestFirstStrategy());
// write down tf*idf vectors for each class
writePreClusterTable(tfidf, PREFIX + "test-duration-terms.csv");
}
private static List<WordBag> getWordBags(String bagPrefix, double[][] series, int windowSize,
int paaSize, int alphabetSize) throws IndexOutOfBoundsException, TSException, IOException {
List<WordBag> res = new ArrayList<WordBag>();
for (int i = 0; i < series.length; i++) {
WordBag bag = new WordBag(bagPrefix + String.valueOf(i));
int seriesIdx = i;
String oldStr = "";
for (int j = 0; j < series[seriesIdx].length - windowSize; j++) {
double[] paa = TSUtils.paa(
TSUtils.zNormalize(TSUtils.subseries(series[seriesIdx], j, windowSize)), PAA_SIZE);
char[] sax = TSUtils.ts2String(paa, a.getCuts(ALPHABET_SIZE));
if (SAXCollectionStrategy.CLASSIC.equals(STRATEGY)) {
if (oldStr.length() > 0 && SAXFactory.strDistance(sax, oldStr.toCharArray()) == 0) {
continue;
}
}
else if (SAXCollectionStrategy.EXACT.equals(STRATEGY)) {
if (oldStr.equalsIgnoreCase(String.valueOf(sax))) {
continue;
}
}
oldStr = String.valueOf(sax);
bag.addWord(String.valueOf(sax));
}
res.add(bag);
}
return res;
}
private static void writePreClusterTable(HashMap<String, HashMap<String, Double>> tfidf,
String fname) throws IOException {
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(fname)));
// melt together sets of keys
//
TreeSet<String> words = new TreeSet<String>();
for (HashMap<String, Double> t : tfidf.values()) {
words.addAll(t.keySet());
}
// print keys - the dictionaries names
//
StringBuilder sb = new StringBuilder("\"\",");
for (String key : tfidf.keySet()) {
sb.append("\"").append(key).append("\",");
}
bw.write(sb.delete(sb.length() - 1, sb.length()).append("\n").toString());
// print rows, one by one
//
for (String w : words) {
sb = new StringBuilder();
sb.append("\"").append(w).append("\",");
for (String key : tfidf.keySet()) {
HashMap<String, Double> data = tfidf.get(key);
if (data.keySet().contains(w)) {
sb.append(df.format(data.get(w))).append(COMMA);
}
else {
sb.append(df.format(0.0d)).append(COMMA);
}
}
bw.write(sb.delete(sb.length() - 1, sb.length()).append("\n").toString());
}
bw.close();
}
}