package edu.hawaii.jmotif.experiment.cbf;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.ConsoleHandler;
import java.util.logging.Formatter;
import java.util.logging.Handler;
import java.util.logging.Logger;
import org.hackystat.utilities.logger.HackystatLogger;
import cc.mallet.util.Randoms;
import edu.hawaii.jmotif.distance.EuclideanDistance;
import edu.hawaii.jmotif.sax.alphabet.Alphabet;
import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import edu.hawaii.jmotif.timeseries.TSException;
import edu.hawaii.jmotif.util.BriefFormatter;
/**
* Helper-runner for CBF test.
*
* @author psenin
*
*/
public class CBFProgressivePrecisionExperiment {
protected final static int CLASSIC = 0;
protected final static int EXACT = 1;
protected final static int NOREDUCTION = 2;
private static final double[] NOISE_SAMPLING_POINTS = { 0.0, 0.05, 0.1, 0.15, 0.20, 0.25, 0.30,
0.35, 0.4, 0.45, 0.5 };
private static final int[] TRAIN_SAMPLE_SIZE = { 5, 10, 25, 50, 75, 100, 150, 250, 500, 1000 };
// private static final double[] NOISE_SAMPLING_POINTS = { 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
// 0.4 };
private static final int[][] SAX_PARAMS_POINTS = { { 60, 7, 7, NOREDUCTION },
{ 55, 6, 4, NOREDUCTION }, { 50, 7, 7, NOREDUCTION }, { 50, 7, 7, NOREDUCTION },
{ 45, 6, 6, NOREDUCTION }, { 45, 6, 6, NOREDUCTION }, { 40, 6, 5, NOREDUCTION },
{ 30, 6, 4, NOREDUCTION }, { 30, 6, 4, NOREDUCTION }, { 24, 8, 4, NOREDUCTION },
{ 24, 8, 5, NOREDUCTION } };
// { 30, 6, 4, NOREDUCTION },
// { 32, 6, 4, NOREDUCTION },
// { 36, 6, 6, NOREDUCTION },
// { 38, 7, 5, NOREDUCTION },
// { 40, 7, 6, NOREDUCTION },
// { 44, 8, 6, NOREDUCTION },
// { 50, 8, 6, NOREDUCTION }, };
private static final int TEST_SET_SIZE = 10000;
private static final int REPEATS = 10;
private static Logger consoleLogger;
private static String LOGGING_LEVEL = "FINE";
private static Randoms randoms;
private static final Alphabet a = new NormalAlphabet();
private static final String COMMA = ",";
private static final double NOISE_STDEV = 0.167;
static {
randoms = new Randoms();
consoleLogger = HackystatLogger.getLogger("debug.console", "preseries");
consoleLogger.setUseParentHandlers(false);
for (Handler handler : consoleLogger.getHandlers()) {
consoleLogger.removeHandler(handler);
}
ConsoleHandler handler = new ConsoleHandler();
Formatter formatter = new BriefFormatter();
handler.setFormatter(formatter);
consoleLogger.addHandler(handler);
HackystatLogger.setLoggingLevel(consoleLogger, LOGGING_LEVEL);
}
/**
* @param args
* @throws TSException
* @throws IndexOutOfBoundsException
* @throws IOException
*/
public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException {
for (int rep = 0; rep < REPEATS; rep++) {
for (int tSize : TRAIN_SAMPLE_SIZE) {
Map<String, List<double[]>> trainData = getDataSet(tSize);
for (int i = 0; i < NOISE_SAMPLING_POINTS.length; i++) {
double lossPercentage = NOISE_SAMPLING_POINTS[i];
double lossStDev = NOISE_STDEV;
int[] params = SAX_PARAMS_POINTS[i];
String msg = toLogStr(params, lossPercentage, lossStDev);
consoleLogger.info(msg);
Map<String, List<double[]>> testData = getDataSet(TEST_SET_SIZE);
testData = damage(testData, lossPercentage, lossStDev);
// classifying with JMotif
//
int testSampleSize = 0;
int positiveTestCounter = 0;
long jmotifStart = System.currentTimeMillis();
// building vectors
//
// converting back from easy encoding
int WINDOW_SIZE = params[0];
int PAA_SIZE = params[1];
int ALPHABET_SIZE = params[2];
SAXCollectionStrategy strategy = SAXCollectionStrategy.CLASSIC;
if (EXACT == params[3]) {
strategy = SAXCollectionStrategy.EXACT;
}
else if (NOREDUCTION == params[3]) {
strategy = SAXCollectionStrategy.NOREDUCTION;
}
// making training bags collection
List<WordBag> bags = TextUtils.labeledSeries2WordBags(trainData, PAA_SIZE, ALPHABET_SIZE,
WINDOW_SIZE, strategy);
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
long jmotifTFIDF = System.currentTimeMillis();
for (String label : tfidf.keySet()) {
List<double[]> testD = testData.get(label);
for (double[] series : testD) {
positiveTestCounter = positiveTestCounter
+ TextUtils.classify(label, series, tfidf, PAA_SIZE, ALPHABET_SIZE, WINDOW_SIZE,
strategy);
testSampleSize++;
}
}
long jmotifFinish = System.currentTimeMillis();
// accuracy and error
double accuracy = (double) positiveTestCounter / (double) testSampleSize;
double error = 1.0d - accuracy;
// euclidean 1-NN
// #### here we iterate over all TEST series, class by class, series by series
//
int euclideanPositiveTests = 0;
int euclideanQueryCounter = 0;
long euclideanStart = System.currentTimeMillis();
for (Entry<String, List<double[]>> querySet : testData.entrySet()) {
for (double[] querySeries : querySet.getValue()) {
// this holds the closest neighbor out of all training data with its class
double bestDistance = Double.MAX_VALUE;
String bestClass = "";
// #### here we iterate over all TRAIN series, class by class, series by series
//
for (Entry<String, List<double[]>> referenceSet : trainData.entrySet()) {
for (double[] referenceSeries : referenceSet.getValue()) {
// Double distance = EuclideanDistance.distance(querySeries, referenceSeries);
// this computes the Euclidean distance.
// earlyAbandonedDistance implementation abandons full distance computation
// if current value is above the best known
//
Double distance = EuclideanDistance.earlyAbandonedDistance(querySeries,
referenceSeries, bestDistance);
if (null != distance && distance.doubleValue() < bestDistance) {
bestDistance = distance.doubleValue();
bestClass = referenceSet.getKey();
}
}
}
if (bestClass.equalsIgnoreCase(querySet.getKey())) {
euclideanPositiveTests++;
}
euclideanQueryCounter++;
}
}
long euclideanFinish = System.currentTimeMillis();
double euclideanAccuracy = (double) euclideanPositiveTests
/ (double) euclideanQueryCounter;
double euclideanError = 1.0d - euclideanAccuracy;
System.out.println(msg + COMMA + tSize + COMMA + error + COMMA + euclideanError
+ COMMA + (jmotifFinish - jmotifStart) + COMMA + (jmotifTFIDF - jmotifStart) + COMMA
+ (euclideanFinish - euclideanStart));
// System.out.println(msg + COMMA + lossPercentage + COMMA + error + COMMA +
// euclideanError
// + COMMA + (jmotifFinish - jmotifStart) + COMMA + (jmotifTFIDF - jmotifStart) + COMMA
// + (euclideanFinish - euclideanStart));
// saveData("cbf_performance_test_damaged" + String.valueOf(currentStep), testData);
}
}
}
}
private static String toLogStr(int[] p, double lossPercentage, double lossStDev) {
StringBuffer sb = new StringBuffer();
if (SAXCollectionStrategy.CLASSIC.index() == p[3]) {
sb.append("CLASSIC,");
}
else if (SAXCollectionStrategy.EXACT.index() == p[3]) {
sb.append("EXACT,");
}
else if (SAXCollectionStrategy.NOREDUCTION.index() == p[3]) {
sb.append("NOREDUCTION,");
}
sb.append(p[0]).append(COMMA);
sb.append(p[1]).append(COMMA);
sb.append(p[2]).append(COMMA);
sb.append(lossPercentage).append(COMMA);
sb.append(lossStDev);
return sb.toString();
}
private static Map<String, List<double[]>> damage(Map<String, List<double[]>> trainData,
double damagedIntervalLength, double noiseStandardDeviation) {
Map<String, List<double[]>> res = new HashMap<String, List<double[]>>();
for (Entry<String, List<double[]>> referenceSet : trainData.entrySet()) {
List<double[]> newData = new ArrayList<double[]>();
int seriesCounter = 0;
for (double[] referenceSeries : referenceSet.getValue()) {
// if (seriesCounter > 3 && seriesCounter < 5) {
// System.out.println(referenceSet.getKey() + " = " + Arrays.toString(referenceSeries));
// }
int noiseStart = Double.valueOf(
Math.floor(randoms.nextUniform(0D, 128D * (1 - damagedIntervalLength)))).intValue();
int noiseEnd = noiseStart + Double.valueOf(128D * damagedIntervalLength).intValue();
for (int i = noiseStart; i < noiseEnd; i++) {
referenceSeries[i] = randoms.nextGaussian(0, noiseStandardDeviation);
}
// if (seriesCounter > 3 && seriesCounter < 5) {
// System.out.println(referenceSet.getKey() + "<-" + Arrays.toString(referenceSeries));
// }
newData.add(referenceSeries);
seriesCounter++;
}
res.put(referenceSet.getKey(), newData);
}
return res;
}
private static void saveData(String fileName, Map<String, List<double[]>> data)
throws IOException {
BufferedWriter wr = new BufferedWriter(new FileWriter(new File(fileName)));
for (Entry<String, List<double[]>> cClass : data.entrySet()) {
for (double[] series : cClass.getValue()) {
wr.write(String.valueOf(cClass.getKey()) + " "
+ Arrays.toString(series).replace("[", "").replace(", ", " ").replace("]", "\n"));
}
}
wr.close();
}
private static Map<String, List<double[]>> getDataSet(int size) {
Map<String, List<double[]>> res = new HashMap<String, List<double[]>>();
// ticks - i.e. time
int[] t = new int[128];
for (int i = 0; i < 128; i++) {
t[i] = i;
}
// cylinder sample
List<double[]> cylinders = new ArrayList<double[]>();
for (int i = 0; i < size; i++) {
cylinders.add(CBFGenerator.cylinder(t));
}
res.put("0", cylinders);
// bell sample
List<double[]> bells = new ArrayList<double[]>();
for (int i = 0; i < size; i++) {
bells.add(CBFGenerator.bell(t));
}
res.put("1", bells);
// funnel sample
List<double[]> funnels = new ArrayList<double[]>();
for (int i = 0; i < size; i++) {
funnels.add(CBFGenerator.funnel(t));
}
res.put("2", funnels);
return res;
}
protected static String getStrategyPrefix(SAXCollectionStrategy strategy) {
String strategyP = "noreduction";
if (SAXCollectionStrategy.EXACT.equals(strategy)) {
strategyP = "exact";
}
if (SAXCollectionStrategy.CLASSIC.equals(strategy)) {
strategy = SAXCollectionStrategy.CLASSIC;
strategyP = "classic";
}
return strategyP;
}
}