package edu.hawaii.jmotif.performance;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.logging.ConsoleHandler;
import java.util.logging.Formatter;
import java.util.logging.Handler;
import java.util.logging.Logger;
import org.hackystat.utilities.logger.HackystatLogger;
import org.hackystat.utilities.stacktrace.StackTrace;
import edu.hawaii.jmotif.text.Bigram;
import edu.hawaii.jmotif.text.BigramBag;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
import edu.hawaii.jmotif.timeseries.TSException;
import edu.hawaii.jmotif.timeseries.TSUtils;
import edu.hawaii.jmotif.util.BriefFormatter;
/**
* Helper-runner for CBF test.
*
* @author psenin
*
*/
public class UCRGenericClassifier {
protected final static int CLASSIC = 0;
protected final static int EXACT = 1;
protected final static int NOREDUCTION = 2;
// output stuff
//
protected static final String COMMA = ",";
protected static final String CR = "\n";
// logger
//
protected static Logger consoleLogger;
private static String LOGGING_LEVEL = "FINE";
// static blok to init logger
//
static {
consoleLogger = HackystatLogger.getLogger("debug.console", "preseries");
consoleLogger.setUseParentHandlers(false);
for (Handler handler : consoleLogger.getHandlers()) {
consoleLogger.removeHandler(handler);
}
ConsoleHandler handler = new ConsoleHandler();
Formatter formatter = new BriefFormatter();
handler.setFormatter(formatter);
consoleLogger.addHandler(handler);
HackystatLogger.setLoggingLevel(consoleLogger, LOGGING_LEVEL);
}
/**
* This implements k-leave out classification. It iterates over possible sets of parameters
* running training on the subset of N-k series, while validating over k series. Result is going
* to be the map of experiment abbreviation (window_paa_alphabet) and an entry combining mean
* error value and a set of SAX parameters.
*
* @param threadsNum How many threads to use.
* @param windowSizes possible sliding window sizes.
* @param paaSizes possible PAA sizes.
* @param alphabetSizes possible alphabet sizes.
* @param strategy the bag building strategy to employ.
* @param trainData training data.
* @param validationSampleSize validation sample size.
* @return
* @throws IndexOutOfBoundsException if error occurs.
* @throws TSException if error occurs.
*/
protected static List<String> trainKNNFoldJMotifThreaded(int threadsNum, int[] windowSizes,
int[] paaSizes, int[] alphabetSizes, SAXCollectionStrategy strategy,
Map<String, List<double[]>> trainData, int validationSampleSize)
throws IndexOutOfBoundsException, TSException {
// make a result map
//
// here keys are parameters like window _ PAA _ Alphabet
//
//
List<String> results = new ArrayList<String>();
// create thread pool for processing these users
//
ExecutorService executorService = Executors.newFixedThreadPool(threadsNum);
CompletionService<String> completionService = new ExecutorCompletionService<String>(
executorService);
int totalTaskCounter = 0;
// here is a loop over SAX parameters, strategy is fixed
//
for (int windowSize : windowSizes) {
for (int paaSize : paaSizes) {
for (int alphabetSize : alphabetSizes) {
// make sure to brake if PAA greater than window
if (windowSize < paaSize + 1) {
continue;
}
// create and submit the job
final UCRKNNloocvJob job = new UCRKNNloocvJob(trainData, validationSampleSize,
windowSize, paaSize, alphabetSize, strategy);
completionService.submit(job);
totalTaskCounter++;
}
}
}
// waiting for completion, shutdown pool disabling new tasks from being submitted
executorService.shutdown();
consoleLogger.info("Submitted " + totalTaskCounter + " jobs, shutting down the pool");
try {
while (totalTaskCounter > 0) {
//
// poll with a wait up to FOUR hours
Future<String> finished = completionService.poll(96, TimeUnit.HOURS);
if (null == finished) {
// something went wrong - break from here
System.err.println("Breaking POLL loop after 48 HOURS of waiting...");
break;
}
else {
String res = finished.get();
if (!(res.startsWith("ok_"))) {
System.err.println("Exception caught: " + finished.get());
break;
}
else {
String record = res.substring(3);
consoleLogger.info(record);
results.add(record);
}
totalTaskCounter--;
}
}
consoleLogger.info("All jobs completed.");
}
catch (Exception e) {
System.err.println("Error while waiting results: " + StackTrace.toString(e));
}
finally {
// wait at least 1 more hour before terminate and fail
try {
if (!executorService.awaitTermination(1, TimeUnit.HOURS)) {
executorService.shutdownNow(); // Cancel currently executing tasks
if (!executorService.awaitTermination(30, TimeUnit.MINUTES))
System.err.println("Pool did not terminate... FATAL ERROR");
}
}
catch (InterruptedException ie) {
System.err.println("Error while waiting interrupting: " + StackTrace.toString(ie));
// (Re-)Cancel if current thread also interrupted
executorService.shutdownNow();
// Preserve interrupt status
Thread.currentThread().interrupt();
}
}
return results;
}
/**
* This implements k-leave out classification. It iterates over possible sets of parameters
* running training on the subset of N-k series, while validating over k series. Result is going
* to be the map of experiment abbreviation (window_paa_alphabet) and an entry combining mean
* error value and a set of SAX parameters.
*
* @param windowSizes possible sliding window sizes.
* @param paaSizes possible PAA sizes.
* @param alphabetSizes possible alphabet sizes.
* @param strategy the bag building strategy to employ.
* @param trainData training data.
* @param validationSampleSize validation sample size.
* @return
* @throws IndexOutOfBoundsException if error occurs.
* @throws TSException if error occurs.
*/
protected static List<String> trainKNNFoldJMotif(int[] windowSizes, int[] paaSizes,
int[] alphabetSizes, SAXCollectionStrategy strategy, Map<String, List<double[]>> trainData,
int validationSampleSize) throws IndexOutOfBoundsException, TSException {
// make a result map
//
// here keys are parameters like window _ PAA _ Alphabet
//
//
List<String> results = new ArrayList<String>();
// here is a loop over SAX parameters, strategy is fixed
//
for (int windowSize : windowSizes) {
for (int paaSize : paaSizes) {
for (int alphabetSize : alphabetSizes) {
// make sure to brake if PAA greater than window
if (windowSize < paaSize + 1) {
continue;
}
// parameters
int[] params = new int[4];
params[0] = windowSize;
params[1] = paaSize;
params[2] = alphabetSize;
params[3] = strategy.index();
// push into stack all the samples we are going to validate for
Stack<KNNOptimizedStackEntry> samples2go = new Stack<KNNOptimizedStackEntry>();
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
String key = e.getKey();
int index = 0;
for (double[] sample : e.getValue()) {
samples2go.push(new KNNOptimizedStackEntry(key, sample, index));
index++;
}
}
// total counter
int totalSamples = samples2go.size();
// missclassified counter
int missclassifiedSamples = 0;
// cache for bags
HashMap<String, WordBag> cache = new HashMap<String, WordBag>();
// while something in stack
while (!samples2go.isEmpty()) {
// extracting validation samples
//
List<KNNOptimizedStackEntry> currentValidationSample = new ArrayList<KNNOptimizedStackEntry>();
Set<Integer> currentValidationIndexes = new TreeSet<Integer>();
for (int i = 0; i < validationSampleSize; i++) {
if (samples2go.isEmpty()) {
break;
}
KNNOptimizedStackEntry sample = samples2go.pop();
String cKey = sample.getKey();
if (i > 0) {
if (!(cKey.equalsIgnoreCase(currentValidationSample.get(i - 1).getKey()))) {
samples2go.push(sample);
break;
}
}
currentValidationSample.add(sample);
currentValidationIndexes.add(sample.getIndex());
}
// check if something in the validation sample
//
if (currentValidationSample.isEmpty()) {
break;
}
String validationKey = currentValidationSample.get(0).getKey();
// re-build bags if there is a need or pop them from the stack
//
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
// if there is a hit - need to rebuild that bag and replace it in the cache
if (e.getKey().equalsIgnoreCase(validationKey)) {
WordBag bag = new WordBag(validationKey);
int index = -1;
for (double[] series : e.getValue()) {
index++;
if (currentValidationIndexes.contains(index)) {
// if (sampleContainsSeries(currentValidationSample, series)) {
// System.out.println("bingo! ");
// }
// else {
// System.out.println("Wrong! ");
// System.exit(10);
// }
continue;
}
WordBag cb = TextUtils.seriesToWordBag("tmp", series, params);
bag.mergeWith(cb);
}
cache.put(validationKey, bag);
}
// else we just check if a bag is in place, if not - we put it in
else {
if (!cache.containsKey(e.getKey())) {
WordBag bag = new WordBag(e.getKey());
for (double[] series : e.getValue()) {
WordBag cb = TextUtils.seriesToWordBag("tmp", series, params);
bag.mergeWith(cb);
}
cache.put(e.getKey(), bag);
}
}
} // end of cache update loop
// all stuff from the cache will build a classifier vectors
//
// compute TFIDF statistics for training set
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(cache.values());
// normalize to unit vectors to avoid false discrimination by vector magnitude
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
// Classifying...
//
// is this sample correctly classified?
for (KNNOptimizedStackEntry e : currentValidationSample) {
int res = TextUtils.classify(e.getKey(), e.getValue(), tfidf, params);
if (0 == res) {
missclassifiedSamples = missclassifiedSamples + 1;
}
}
}
double error = Integer.valueOf(missclassifiedSamples).doubleValue()
/ Integer.valueOf(totalSamples).doubleValue();
results.add(toLogStr(params, 1.0D - error, error));
consoleLogger.fine(toLogStr(params, 1.0D - error, error));
}
}
}
return results;
}
protected static void run2GrammClassificationExperiment(String trainingDataName,
String testDataName, int windowSize, int[] paa_sizes, int[] alphabet_sizes,
SAXCollectionStrategy strategy, String outFname) throws IOException,
IndexOutOfBoundsException, TSException {
BufferedWriter bw = new BufferedWriter(new FileWriter(outFname));
// reading training and test collections
//
Map<String, List<double[]>> trainData = UCRUtils.readUCRData(trainingDataName);
consoleLogger.fine("trainData classes: " + trainData.size() + ", series length: "
+ trainData.entrySet().iterator().next().getValue().get(0).length);
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
consoleLogger.fine(" training class: " + e.getKey() + " series: " + e.getValue().size());
}
Map<String, List<double[]>> testData = UCRUtils.readUCRData(testDataName);
consoleLogger.fine("testData classes: " + testData.size());
for (Entry<String, List<double[]>> e : testData.entrySet()) {
consoleLogger.fine(" test class: " + e.getKey() + " series: " + e.getValue().size());
}
for (int paaSize : paa_sizes) {
for (int alphabetSize : alphabet_sizes) {
if (windowSize < paaSize + 1) {
continue;
}
int[][] params = new int[1][4];
params[0][0] = windowSize;
params[0][1] = paaSize;
params[0][2] = alphabetSize;
params[0][3] = strategy.index();
// making training bags collection
List<BigramBag> bags = TextUtils.labeledSeries2BigramBags(trainData, params);
HashMap<String, HashMap<Bigram, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeBigramsToUnitVectors(tfidf);
int totalTestSample = 0;
int totalPositiveTests = 0;
for (String currenClassUnderTest : testData.keySet()) {
List<double[]> testD = testData.get(currenClassUnderTest);
int positives = 0;
for (double[] series : testD) {
positives = positives
+ TextUtils.classifyBigrams(currenClassUnderTest, series, tfidf, params);
totalTestSample++;
}
totalPositiveTests = totalPositiveTests + positives;
}
double accuracy = (double) totalPositiveTests / (double) totalTestSample;
double error = 1.0d - accuracy;
String str = windowSize + COMMA + paaSize + COMMA + alphabetSize + COMMA + accuracy + COMMA
+ error;
bw.write(str + CR);
consoleLogger.fine(str);
}
}
bw.close();
}
protected static void runClassificationExperiment(String trainingDataName, String testDataName,
Integer windowSize, int[] paa_sizes, int[] alphabet_sizes, SAXCollectionStrategy strategy,
String outFname) throws IOException, IndexOutOfBoundsException, TSException {
BufferedWriter bw = new BufferedWriter(new FileWriter(outFname));
// reading training and test collections
//
Map<String, List<double[]>> trainData = UCRUtils.readUCRData(trainingDataName);
consoleLogger.fine("trainData classes: " + trainData.size() + ", series length: "
+ trainData.entrySet().iterator().next().getValue().get(0).length);
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
consoleLogger.fine(" training class: " + e.getKey() + " series: " + e.getValue().size());
}
Map<String, List<double[]>> testData = UCRUtils.readUCRData(testDataName);
consoleLogger.fine("testData classes: " + testData.size());
for (Entry<String, List<double[]>> e : testData.entrySet()) {
consoleLogger.fine(" test class: " + e.getKey() + " series: " + e.getValue().size());
}
for (int paaSize : paa_sizes) {
for (int alphabetSize : alphabet_sizes) {
if (windowSize < paaSize + 1) {
continue;
}
int[] params = new int[4];
params[0] = windowSize;
params[1] = paaSize;
params[2] = alphabetSize;
params[3] = strategy.index();
// making training bags collection
List<WordBag> bags = TextUtils.labeledSeries2WordBags(trainData, params);
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
int totalTestSample = 0;
int totalPositiveTests = 0;
for (String currenClassUnderTest : testData.keySet()) {
List<double[]> testD = testData.get(currenClassUnderTest);
int positives = 0;
for (double[] series : testD) {
positives = positives + TextUtils.classify(currenClassUnderTest, series, tfidf, params);
totalTestSample++;
}
totalPositiveTests = totalPositiveTests + positives;
}
double accuracy = (double) totalPositiveTests / (double) totalTestSample;
double error = 1.0d - accuracy;
String str = windowSize + COMMA + paaSize + COMMA + alphabetSize + COMMA + accuracy + COMMA
+ error;
bw.write(str + CR);
consoleLogger.fine(str);
}
}
bw.close();
}
protected static void runKNNExperiment(Map<String, List<double[]>> trainData,
Map<String, List<double[]>> testData, Integer windowSize, int paaSize, int alphabetSize,
SAXCollectionStrategy strategy, String outFname) throws IOException,
IndexOutOfBoundsException, TSException {
BufferedWriter bw = new BufferedWriter(new FileWriter(outFname));
// make parameters array
//
int[][] params = new int[1][4];
params[0][0] = windowSize;
params[0][1] = paaSize;
params[0][2] = alphabetSize;
params[0][3] = strategy.index();
// figuring out a total test collection size
//
int totalTestSample = 0;
for (Entry<String, List<double[]>> e : testData.entrySet()) {
totalTestSample = totalTestSample + e.getValue().size();
}
// build huge TFIDF table for all of trainData
//
long start = System.currentTimeMillis();
List<WordBag> trainBags = new ArrayList<WordBag>();
for (Entry<String, List<double[]>> referenceSet : trainData.entrySet()) {
int counter = 0;
for (double[] series : referenceSet.getValue()) {
WordBag newBag = TextUtils.seriesToWordBag(
referenceSet.getKey() + "_" + String.valueOf(counter), series, params[0]);
trainBags.add(newBag);
counter++;
}
}
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(trainBags);
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
consoleLogger.fine("TFIDF statistics table is built in "
+ timeToString(start, System.currentTimeMillis()));
// ################ begin classification
//
int totalPositiveTests = 0;
int queryCounter = 0;
// #### here we iterate over all TEST series, class by class, series by series
//
for (Entry<String, List<double[]>> querySet : testData.entrySet()) {
for (double[] querySeries : querySet.getValue()) {
consoleLogger.fine("classifying query " + queryCounter + " of class " + querySet.getKey());
// this holds the closest neighbor out of all training data with its class
//
double bestDistance = Double.MIN_VALUE;
String bestClass = "";
// the query word bag
WordBag queryBag = TextUtils.seriesToWordBag("query", querySeries, params[0]);
for (Entry<String, HashMap<String, Double>> neighbor : tfidf.entrySet()) {
double similarity = TextUtils.cosineSimilarity(queryBag, neighbor.getValue());
if (similarity > bestDistance) {
bestDistance = similarity;
bestClass = neighbor.getKey();
consoleLogger.fine(" + closest class: " + bestClass + " distance: " + bestDistance);
}
}
// best distance inner loop - over references
if (bestClass.substring(0, bestClass.indexOf('_')).equalsIgnoreCase(querySet.getKey())) {
totalPositiveTests++;
consoleLogger.fine(" * hit!");
}
else {
consoleLogger.fine(" ? miss!");
}
queryCounter++;
}
}
double accuracy = (double) totalPositiveTests / (double) totalTestSample;
double error = 1.0d - accuracy;
System.out.println(accuracy + "," + error + "\n");
bw.write(accuracy + "," + error + "\n");
bw.close();
}
protected static void runKNNExperiment(String trainingDataName, String testDataName,
Integer windowSize, int paa_size, int alphabet_size, SAXCollectionStrategy strategy,
String outFname) throws IOException, IndexOutOfBoundsException, TSException {
// reading training and test collections
//
Map<String, List<double[]>> trainData = UCRUtils.readUCRData(trainingDataName);
consoleLogger.fine("trainData classes: " + trainData.size() + ", series length: "
+ trainData.entrySet().iterator().next().getValue().get(0).length);
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
consoleLogger.fine(" training class: " + e.getKey() + " series: " + e.getValue().size());
}
int totalTestSample = 0;
Map<String, List<double[]>> testData = UCRUtils.readUCRData(testDataName);
consoleLogger.fine("testData classes: " + testData.size());
for (Entry<String, List<double[]>> e : testData.entrySet()) {
consoleLogger.fine(" test class: " + e.getKey() + " series: " + e.getValue().size());
totalTestSample = totalTestSample + e.getValue().size();
}
runKNNExperiment(trainData, testData, windowSize, paa_size, alphabet_size, strategy, outFname);
}
protected static int[] makeArray(int minValue, int maxValue, int incrementValue) {
ArrayList<Integer> preRes = new ArrayList<Integer>();
int curValue = minValue;
do {
preRes.add(curValue);
curValue = curValue + incrementValue;
}
while (curValue <= maxValue);
int[] res = new int[preRes.size()];
for (int i = 0; i < preRes.size(); i++) {
res[i] = preRes.get(i).intValue();
}
return res;
}
/**
* This implements k-leave out classification. It iterates over possible sets of parameters
* running training on the subset of N-k series, while validating over k series. Result is going
* to be the map of experiment abbreviation (window_paa_alphabet) and an entry combining mean
* error value and a set of SAX parameters.
*
* @param windowSizes possible sliding window sizes.
* @param paaSizes possible PAA sizes.
* @param alphabetSizes possible alphabet sizes.
* @param strategy the bag building strategy to employ.
* @param trainData training data.
* @param validationSampleSize validation sample size.
* @return
* @throws IndexOutOfBoundsException if error occurs.
* @throws TSException if error occurs.
*/
protected static Map<String, Entry<Double, int[][]>> trainKNNFold(int[] windowSizes,
int[] paaSizes, int[] alphabetSizes, SAXCollectionStrategy strategy,
Map<String, List<double[]>> trainData, int validationSampleSize)
throws IndexOutOfBoundsException, TSException {
// make a result map
HashMap<String, Entry<Double, int[][]>> results = new HashMap<String, Map.Entry<Double, int[][]>>();
// minimal subclass length
//
int minLen = Integer.MAX_VALUE;
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
if (minLen > e.getValue().size()) {
minLen = e.getValue().size();
}
}
int slicesNum = minLen / validationSampleSize;
HashMap<String, Integer> classIncerements = new HashMap<String, Integer>();
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
Integer currSliceSize = e.getValue().size() / slicesNum;
classIncerements.put(e.getKey(), currSliceSize);
}
for (int windowSize : windowSizes) {
for (int paaSize : paaSizes) {
for (int alphabetSize : alphabetSizes) {
if (windowSize < paaSize + 1) {
continue;
}
// get the iteration number
int slices = minLen / validationSampleSize;
// init the iteration's error rates array
double[] errors = new double[slices];
// iterate over possible sets
for (int currentSlice = 0; currentSlice < slices; currentSlice++) {
// training subset
Map<String, List<double[]>> innerTrainData = remove(trainData, classIncerements,
currentSlice);
// validation subset
Map<String, List<double[]>> innerTestData = extract(trainData, classIncerements,
currentSlice);
// sometimes classes are of different sizes; we took care about not getting out of
// boundaries, but we need to take care about the last iteration
if (currentSlice == slices - 1) {
innerTrainData = removeMax(trainData, classIncerements, currentSlice);
innerTestData = extractMax(trainData, classIncerements, currentSlice);
}
// making training bags collection
List<WordBag> bags = TextUtils.labeledSeries2WordBags(innerTrainData, paaSize,
alphabetSize, windowSize, strategy);
// compute TFIDF statistics for training set
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
// normalize to unit vectors to avoid false discrimination by vector magnitude
tfidf = TextUtils.normalizeToUnitVectors(tfidf);
// init counters
int totalTestSample = 0;
int totalPositiveTests = 0;
// let's see the error rate for this fold
// iterating over class labels
for (String label : tfidf.keySet()) {
List<double[]> testD = innerTestData.get(label);
int positives = 0;
for (double[] series : testD) {
positives = positives
+ TextUtils.classify(label, series, tfidf, paaSize, alphabetSize, windowSize,
strategy);
totalTestSample++;
}
totalPositiveTests = totalPositiveTests + positives;
}
// compute accuracy and the error rate
double accuracy = (double) totalPositiveTests / (double) totalTestSample;
double error = 1.0d - accuracy;
// save the error rate value
errors[currentSlice] = error;
}
// here cross-validation stuff finished
//
int[][] params = new int[2][3];
params[0][0] = windowSize;
params[0][1] = paaSize;
params[0][2] = alphabetSize;
results.put(
String.valueOf(windowSize) + "_" + String.valueOf(paaSize) + "_"
+ String.valueOf(alphabetSize),
new KNNStackEntry<Double, int[][]>(TSUtils.mean(errors), params));
consoleLogger.fine("params " + Arrays.toString(params[0]) + ", max. error: "
+ TSUtils.max(errors) + ", mean error: " + TSUtils.mean(errors) + ", min. error: "
+ TSUtils.min(errors));
}
}
}
return results;
}
/**
* Extract subset.
*
* @param trainData
* @param classIncerements
* @param currentSlice
* @return
*/
private static Map<String, List<double[]>> extract(Map<String, List<double[]>> trainData,
HashMap<String, Integer> classIncerements, int currentSlice) {
Map<String, List<double[]>> res = new HashMap<String, List<double[]>>();
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
String className = e.getKey();
Integer classSliseSize = classIncerements.get(className);
List<double[]> classSample = new ArrayList<double[]>();
int lowBound = classSliseSize * currentSlice;
int highBound = classSliseSize * (currentSlice + 1);
for (int i = lowBound; i < highBound; i++) {
classSample.add(e.getValue().get(i));
}
res.put(className, classSample);
}
return res;
}
private static Map<String, List<double[]>> extractMax(Map<String, List<double[]>> trainData,
HashMap<String, Integer> classIncerements, int currentSlice) {
Map<String, List<double[]>> res = new HashMap<String, List<double[]>>();
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
String className = e.getKey();
Integer classSliseSize = classIncerements.get(className);
List<double[]> classSample = new ArrayList<double[]>();
int lowBound = classSliseSize * currentSlice;
int highBound = e.getValue().size();
for (int i = lowBound; i < highBound; i++) {
classSample.add(e.getValue().get(i));
}
res.put(className, classSample);
}
return res;
}
/**
* Remove subset.
*
* @param trainData
* @param classIncerements
* @param currentSlice
* @return
*/
private static Map<String, List<double[]>> remove(Map<String, List<double[]>> trainData,
HashMap<String, Integer> classIncerements, int currentSlice) {
Map<String, List<double[]>> res = new HashMap<String, List<double[]>>();
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
String className = e.getKey();
Integer classSliseSize = classIncerements.get(className);
List<double[]> classSample = new ArrayList<double[]>();
int lowBound = classSliseSize * currentSlice;
int highBound = classSliseSize * (currentSlice + 1);
for (int i = 0; i < e.getValue().size(); i++) {
if (lowBound <= i && i < highBound) {
continue;
}
classSample.add(e.getValue().get(i));
}
res.put(className, classSample);
}
return res;
}
private static Map<String, List<double[]>> removeMax(Map<String, List<double[]>> trainData,
HashMap<String, Integer> classIncerements, int currentSlice) {
Map<String, List<double[]>> res = new HashMap<String, List<double[]>>();
for (Entry<String, List<double[]>> e : trainData.entrySet()) {
String className = e.getKey();
Integer classSliseSize = classIncerements.get(className);
List<double[]> classSample = new ArrayList<double[]>();
int lowBound = classSliseSize * currentSlice;
int highBound = e.getValue().size();
for (int i = 0; i < e.getValue().size(); i++) {
if (lowBound <= i && i < highBound) {
continue;
}
classSample.add(e.getValue().get(i));
}
res.put(className, classSample);
}
return res;
}
private static String timeToString(long start, long finish) {
StringBuffer sb = new StringBuffer();
long diff = finish - start;
final long secondInMillis = 1000;
final long minuteInMillis = secondInMillis * 60;
final long hourInMillis = minuteInMillis * 60;
// final long dayInMillis = hourInMillis * 24;
// final long yearInMillis = dayInMillis * 365;
// long elapsedYears = diff / yearInMillis;
// diff = diff % yearInMillis;
// long elapsedDays = diff / dayInMillis;
// diff = diff % dayInMillis;
long elapsedHours = diff / hourInMillis;
if (elapsedHours > 0) {
sb.append(String.valueOf(elapsedHours) + "h ");
}
diff = diff % hourInMillis;
long elapsedMinutes = diff / minuteInMillis;
if (elapsedMinutes > 0) {
sb.append(String.valueOf(elapsedMinutes) + "m ");
}
diff = diff % minuteInMillis;
long elapsedSeconds = diff / secondInMillis;
if (elapsedSeconds > 0) {
sb.append(String.valueOf(elapsedSeconds) + "s ");
}
diff = diff % secondInMillis;
if (diff > 0) {
sb.append(String.valueOf(diff) + "ms");
}
return sb.toString();
}
protected static String getStrategyPrefix(SAXCollectionStrategy strategy) {
String strategyP = "noreduction";
if (SAXCollectionStrategy.EXACT.equals(strategy)) {
strategyP = "exact";
}
if (SAXCollectionStrategy.CLASSIC.equals(strategy)) {
strategy = SAXCollectionStrategy.CLASSIC;
strategyP = "classic";
}
return strategyP;
}
protected static String toLogStr(int[] p, double accuracy, double error) {
StringBuffer sb = new StringBuffer();
if (SAXCollectionStrategy.CLASSIC.index() == p[3]) {
sb.append("CLASSIC,");
}
else if (SAXCollectionStrategy.EXACT.index() == p[3]) {
sb.append("EXACT,");
}
else if (SAXCollectionStrategy.NOREDUCTION.index() == p[3]) {
sb.append("NOREDUCTION,");
}
sb.append(p[0]).append(COMMA);
sb.append(p[1]).append(COMMA);
sb.append(p[2]).append(COMMA);
sb.append(accuracy).append(COMMA);
sb.append(error);
return sb.toString();
}
protected static String toLogStr(int[][] params, SAXCollectionStrategy strategy, double accuracy,
double error) {
StringBuffer sb = new StringBuffer();
if (strategy.equals(SAXCollectionStrategy.CLASSIC)) {
sb.append("CLASSIC,");
}
else if (strategy.equals(SAXCollectionStrategy.EXACT)) {
sb.append("EXACT,");
}
else if (strategy.equals(SAXCollectionStrategy.NOREDUCTION)) {
sb.append("NOREDUCTION,");
}
sb.append(params[0][0]).append(COMMA);
sb.append(params[0][1]).append(COMMA);
sb.append(params[0][2]).append(COMMA);
sb.append(Double.valueOf(accuracy).toString()).append(COMMA);
sb.append(Double.valueOf(error).toString());
return sb.toString();
}
}