package edu.hawaii.jmotif.performance;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import weka.classifiers.Classifier;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Instance;
import weka.core.Instances;
import edu.hawaii.jmotif.text.SAXCollectionStrategy;
import edu.hawaii.jmotif.text.TextUtils;
import edu.hawaii.jmotif.text.WordBag;
public class SAXVSMClassifier extends weka.classifiers.AbstractClassifier implements Classifier {
/**
*
*/
private static final long serialVersionUID = 1L;
private int window;
private int paa;
private int alphabet;
private SAXCollectionStrategy strategy;
private Instances m_Instances;
private HashMap<String, HashMap<String, Double>> tfidf;
@Override
public void buildClassifier(Instances instances) throws Exception {
// can classifier handle the data?
getCapabilities().testWithFail(instances);
// remove instances with missing class
instances = new Instances(instances);
instances.deleteWithMissingClass();
// Copy the instances
m_Instances = new Instances(instances);
// build Vectors Map<String, List<double[]>> data
Map<String, List<double[]>> trainData = new HashMap<String, List<double[]>>();
for (Instance i : m_Instances) {
String classAttribute = i.classAttribute().value(Double.valueOf(i.classValue()).intValue());
double[] series = new double[i.numAttributes()];
for (int k = 0; k < i.numAttributes(); k++) {
series[k] = i.value(k);
}
if (trainData.containsKey(classAttribute)) {
trainData.get(classAttribute).add(series);
}
else {
trainData.put(classAttribute, new ArrayList<double[]>());
trainData.get(classAttribute).add(series);
}
// System.out.println(classAttribute);
}
// making training bags collection
List<WordBag> bags = TextUtils.labeledSeries2WordBags(trainData, this.paa, this.alphabet,
this.window, this.strategy);
// getting TFIDF done
HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags);
// System.out.println(TextUtils.bagsToTable(bags));
// normalize vectors
this.tfidf = TextUtils.normalizeToUnitVectors(tfidf);
}
@Override
public double classifyInstance(Instance instance) throws Exception {
double[] series = new double[instance.numAttributes()];
for (int k = 0; k < instance.numAttributes(); k++) {
series[k] = instance.value(k);
}
int[] params = { this.window, this.paa, this.alphabet, this.strategy.index() };
String value = TextUtils
.classify(TextUtils.seriesToWordBag("test", series, params), this.tfidf);
return m_Instances.classAttribute().indexOfValue(value);
}
@Override
public double[] distributionForInstance(Instance instance) throws Exception {
double[] series = new double[instance.numAttributes()];
for (int k = 0; k < instance.numAttributes(); k++) {
series[k] = instance.value(k);
}
int[] params = { this.window, this.paa, this.alphabet, this.strategy.index() };
WordBag bag = TextUtils.seriesToWordBag("test", series, params);
double[] res = new double[m_Instances.numClasses()];
for (int i = 0; i < m_Instances.numClasses(); i++) {
String aClass = m_Instances.classAttribute().value(i);
double sim = TextUtils.cosineSimilarity(bag, tfidf.get(aClass));
res[i] = sim;
}
return res;
}
@Override
public Capabilities getCapabilities() {
Capabilities result = super.getCapabilities(); // returns the object from
// weka.classifiers.Classifier
// attributes
result.enable(Capability.NUMERIC_ATTRIBUTES);
// class
result.enable(Capability.NOMINAL_CLASS);
return result;
}
public void setSAXParams(int i, int j, int k, String string) {
this.window = i;
this.paa = j;
this.alphabet = k;
// configuring strategy
//
this.strategy = SAXCollectionStrategy.NOREDUCTION;
if ("EXACT".equalsIgnoreCase(string)) {
strategy = SAXCollectionStrategy.EXACT;
}
if ("CLASSIC".equalsIgnoreCase(string)) {
strategy = SAXCollectionStrategy.CLASSIC;
}
}
}