package edu.hawaii.jmotif.thesis;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import edu.hawaii.jmotif.experiment.cbf.CBFGenerator;
import edu.hawaii.jmotif.sequitur.SequiturFactory;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import edu.hawaii.jmotif.timeseries.TSException;
/**
* Helper-runner for CBF test.
*
* @author psenin
*
*/
public class CBFSequiturClassifier {
// various variables
// classifier test parameters
//
/** The timeseries length. */
private static final int SERIES_LENGTH = 128;
private static final int NUM_REPETITIONS = 10;
private static final int[] SIZES = { 10, 20, 50, 100, 200, 400, 800, 1600, 3200, 6400 };
/**
* @param args
* @throws TSException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException {
if (6 != args.length) {
System.out.println("Expecting parameters W P A Strategy trainSize testSize");
}
Integer windowSize = Integer.valueOf(args[0]);
Integer paaSize = Integer.valueOf(args[1]);
Integer alphabetSize = Integer.valueOf(args[2]);
SAXCollectionStrategy strategy = SAXCollectionStrategy.NOREDUCTION;
if ("exact".equalsIgnoreCase(args[3])) {
strategy = SAXCollectionStrategy.EXACT;
}
else if ("classic".equalsIgnoreCase(args[3])) {
strategy = SAXCollectionStrategy.CLASSIC;
}
Integer trainSize = Integer.valueOf(args[4]);
Integer testSize = Integer.valueOf(args[5]);
// making training and test collections
Map<String, List<double[]>> trainSet = makeSet(trainSize);
Map<String, List<double[]>> testSet = makeSet(testSize);
if (windowSize < paaSize + 1) {
System.exit(0);
}
long tfidfStart = System.currentTimeMillis();
// making training bags collection
List<WordBag> bags = TextUtils.labeledSeries2WordBags(trainSet, paaSize, alphabetSize,
windowSize, strategy);
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
long tfidfEnd = System.currentTimeMillis();
List<WordBag> bagsS = SequiturFactory.labeledSeries2WordBags(trainSet, paaSize, alphabetSize,
windowSize, strategy);
HashMap<String, HashMap<String, Double>> tfidfS = TextUtils.computeTFIDF(bagsS);
tfidfS = TextUtils.normalizeToUnitVectors(tfidfS);
long tfidfEndS = System.currentTimeMillis();
// System.out.println(TextUtils.bagsToTable(bags));
int totalTestSample = 0;
int totalPositiveTests = 0;
for (String label : tfidf.keySet()) {
List<double[]> testD = testSet.get(label);
int positives = 0;
for (double[] series : testD) {
positives = positives
+ TextUtils.classify(label, series, tfidf, paaSize, alphabetSize, windowSize, strategy);
totalTestSample++;
}
totalPositiveTests = totalPositiveTests + positives;
}
double accuracy = (double) totalPositiveTests / (double) totalTestSample;
double error = 1.0d - accuracy;
long tfidfClassEnd = System.currentTimeMillis();
totalTestSample = 0;
int totalPositiveTestsS = 0;
for (String label : tfidf.keySet()) {
List<double[]> testD = testSet.get(label);
int positivesS = 0;
for (double[] series : testD) {
positivesS = positivesS
+ SequiturFactory.classify(label, series, tfidfS, paaSize, alphabetSize, windowSize,
strategy);
totalTestSample++;
}
totalPositiveTestsS = totalPositiveTestsS + positivesS;
}
double accuracyS = (double) totalPositiveTestsS / (double) totalTestSample;
double errorS = 1.0d - accuracyS;
long tfidfClassEndS = System.currentTimeMillis();
StringBuffer sb = new StringBuffer();
sb.append("cmprun ");
sb.append(trainSize + ", ").append(testSize + ", ");
sb.append(windowSize + ", ").append(paaSize + ", ").append(alphabetSize + ", ");
sb.append(accuracy + ", ").append(error + ", ").append(accuracyS + ", ").append(errorS + ", ");
sb.append(String.valueOf(tfidfEnd - tfidfStart) + ", ");
sb.append(String.valueOf(tfidfEndS - tfidfEnd) + ", ");
sb.append(String.valueOf(tfidfClassEnd - tfidfEndS) + ", ");
sb.append(String.valueOf(tfidfClassEndS - tfidfClassEnd));
System.out.println(sb.toString());
}
private static Map<String, List<double[]>> makeSet(int num) {
// ticks - i.e. time
int[] t = new int[128];
for (int i = 0; i < 128; i++) {
t[i] = i;
}
Map<String, List<double[]>> set = new HashMap<String, List<double[]>>();
ArrayList<double[]> c = new ArrayList<double[]>();
for (int i = 0; i < num; i++) {
c.add(CBFGenerator.cylinder(t));
}
ArrayList<double[]> b = new ArrayList<double[]>();
for (int i = 0; i < num; i++) {
b.add(CBFGenerator.bell(t));
}
ArrayList<double[]> f = new ArrayList<double[]>();
for (int i = 0; i < num; i++) {
f.add(CBFGenerator.funnel(t));
}
set.put("1", c);
set.put("2", b);
set.put("3", f);
return set;
}
private static void save(String fname, Map<String, ArrayList<double[]>> set) throws IOException {
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(fname)));
for (Entry<String, ArrayList<double[]>> e : set.entrySet()) {
for (double[] a : e.getValue()) {
bw.write(e.getKey()
+ " "
+ Arrays.toString(a).replace("[", "").replace("]", "").replaceAll(" ", "")
.replace(",", " ") + "\n");
}
}
bw.close();
}
}