/*
* copyright: Anthony Bagnall
* A filter for using the shapelet transform with hierarchical
* clustering of shapelets.
*
* Recommended usage: Build the shapelet transform outside of this class and pass in.
*
* FullShapeletTransform shape=new FullShapeletTransform();
* //Build and use shape here
*
* int nosClusters=10;
* ClusteredShapeletTransform cShape=new ClusteredShapeletTransform(shape,nosClusters);
*
* it will work like this with any of the numerous constructors
* ClusteredShapeletTransform cShape=new ClusteredShapeletTransform();
* Instances c=cShape.process(data)
*
*
*/
package weka.filters.timeseries.shapelet_transforms;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.shapelet.QualityMeasures;
import weka.core.shapelet.Shapelet;
import weka.filters.SimpleBatchFilter;
/**
*
* @author Jon Hills - j.hills@uea.ac.uk
*/
public class ClusteredShapeletTransform extends SimpleBatchFilter{
FullShapeletTransform st;
protected double[][] distanceMap;
protected ArrayList<int[]> clusterPairs;
protected ArrayList<Shapelet> clusteredShapelets;
protected ArrayList<Shapelet> allShapelets;
protected int noClust;
public static int DEFAULT_NUMCLUSTERS=1;
/*
*
*/
public ClusteredShapeletTransform(FullShapeletTransform shapes,int n){
st=shapes;
this.clusteredShapelets = new ArrayList<Shapelet>();
noClust=n;
}
/**
* Fully specified constructor.
*
* @param k The number of shapelets to store.
* @param minShapeletLength The minimum shapelet langth.
* @param maxShapeletLength The maximum shapelet length.
* @param qualityChoice The quality measure to use for assessing candidates.
* @param cluster Whether to cluster the shapelets for the transform.
* @param noClust The number of clusters.
*/
public ClusteredShapeletTransform(int k, int minShapeletLength,
int maxShapeletLength, QualityMeasures.ShapeletQualityChoice qualityChoice,
int noClust) {
st=new FullShapeletTransform(k, minShapeletLength, maxShapeletLength, qualityChoice);
this.noClust=noClust;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Partially specified constructor. Defaults to clustering. If
* clustering is used, defaults to one cluster, i.e., the best
* shapelet only.
*
* @param k The number of shapelets to store.
* @param minShapeletLength The minimum shapelet langth.
* @param maxShapeletLength The maximum shapelet length.
* @param qualityChoice The quality measure to use for assessing candidates.
*/
public ClusteredShapeletTransform(int k, int minShapeletLength,
int maxShapeletLength, QualityMeasures.ShapeletQualityChoice qualityChoice) {
st=new ShapeletTransform(k, minShapeletLength, maxShapeletLength, qualityChoice);
this.noClust=DEFAULT_NUMCLUSTERS;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Partially specified constructor. Defaults to Information Gain quality
* measure. Defaults to no clustering. If clustering is used, defaults to
* one cluster, i.e., the best shapelet only.
*
* @param k The number of shapelets to store.
* @param minShapeletLength The minimum shapelet langth.
* @param maxShapeletLength The maximum shapelet length.
*/
public ClusteredShapeletTransform(int k, int minShapeletLength,
int maxShapeletLength) {
st=new ShapeletTransform(k, minShapeletLength, maxShapeletLength);
this.noClust=DEFAULT_NUMCLUSTERS;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Partially specified constructor. Defaults to Information Gain quality
* measure.
*
* @param k The number of shapelets to store.
* @param minShapeletLength The minimum shapelet langth.
* @param maxShapeletLength The maximum shapelet length.
* @param cluster Whether to cluster the shapelets for the transform.
* @param noClust The number of clusters.
*/
public ClusteredShapeletTransform(int k, int minShapeletLength,
int maxShapeletLength, int noClust) {
st=new ShapeletTransform(k, minShapeletLength, maxShapeletLength);
this.noClust=noClust;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Partially specified constructor. Defaults to Information Gain quality
* measure. Minimum and maximum shapelet lengths must be set before use.
* Defaults to no clustering. Defaults to one cluster.
*
* @param k The number of shapelets to store.
*/
public ClusteredShapeletTransform(int k) {
st=new ShapeletTransform(k);
this.noClust=DEFAULT_NUMCLUSTERS;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Partially specified constructor. Defaults to Information Gain quality
* measure. Minimum and maximum shapelet lengths must be set before use.
*
* @param k The number of shapelets to store.
* @param cluster Whether or not to use clustering.
* @param noClust Then number of clusters.
*/
public ClusteredShapeletTransform(int k, boolean cluster, int noClust) {
st=new ShapeletTransform(k);
this.noClust=noClust;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Empty constructor. Defaults to Information Gain quality measure, no
* clustering, one cluster if clustering turned on. Shapelet lengths must
* be set. K must be set.
*/
public ClusteredShapeletTransform()
{
st=new ShapeletTransform();
this.noClust=DEFAULT_NUMCLUSTERS;
this.clusteredShapelets = new ArrayList<Shapelet>();
}
/**
* Transform datasets. If cluster=true, shapelets will be clustered into
* noClust clusters prior to transformation.
*
* @param data - the input data to be transformed (and to find the shapelets if this is the first run)
* @return the transformed Instances in terms of the distance from each shapelet
* @throws Exception - if the number of shapelets or the length parameters specified are incorrect
*/
@Override
public Instances process(Instances data) throws Exception{
int size=st.getNumberOfShapelets();
if(size < 1)
throw new Exception("Number of shapelets initialised incorrectly - please select value of k (Usage: setNumberOfShapelets");
if(size<noClust)
throw new Exception("Trying to produce more clusters than there are shapelets!");
// We only want the shapelets from st, so could optimize this to not work out the transform too. However, cleaner this way
if(!st.foundShapelets())
st.process(data);
allShapelets=st.shapelets;
clusterShapelets();
Instances output = determineOutputFormat(data);
for (int i = 0; i < data.numInstances(); i++) { // for each data
Instance toAdd = new DenseInstance(this.clusteredShapelets.size() + 1);
int shapeletNum = 0;
for (Shapelet s : this.clusteredShapelets) {
double dist = FullShapeletTransform.subsequenceDistance(s.content, data.instance(i));
toAdd.setValue(shapeletNum++, dist);
}
toAdd.setValue(this.clusteredShapelets.size(), data.instance(i).classValue());
output.add(toAdd);
}
return output;
}
/**
*
* @param inputFormat - the format of the input data
* @return a new Instances object in the desired output format
* @throws Exception - if all required attributes of the filter are not initialised correctly
*/
@Override
protected Instances determineOutputFormat(Instances inputFormat) throws Exception{
int s=st.getNumberOfShapelets();
if(s < 1 || s<noClust){
throw new Exception("ShapeletFilter not initialised correctly - please specify a value of k that is greater than or equal to 1. You entered s="+s+" num clusters ="+noClust);
}
FastVector atts = new FastVector();
String name;
for(int i = 0; i < noClust; i++){
name = "CShapelet_" + i;
atts.addElement(new Attribute(name));
}
Attribute target = inputFormat.attribute(inputFormat.classIndex());
FastVector vals = new FastVector(target.numValues());
for(int i = 0; i < target.numValues(); i++){
vals.addElement(target.value(i));
}
atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(), vals));
Instances result = new Instances("CShapelets" + inputFormat.relationName(), atts, inputFormat.numInstances());
result.setClassIndex(result.numAttributes() - 1);
return result;
}
/**
* Creates a set of clustered shapelets with a noClust clusters.
*/
public void clusterShapelets()
{
// System.out.println("Clustering shapelets: "+this.noClust);
double[][] shapeletSet = new double[allShapelets.size()][];
for(int i=0;i<shapeletSet.length;i++)
{
shapeletSet[i] = allShapelets.get(i).content;
}
distanceMap = getDistanceMap(shapeletSet);
clusterPairs = new ArrayList();
this.clusteredShapelets.clear();
//Adds an int[] of each index to clusterPairs
for(int i=0;i<distanceMap.length;i++)
{
int[] tmp = {i};
clusterPairs.add(tmp);
}
//Returns pair of indexes to clusterPairs/adjusted distanceMap
//Is the index of the ArrayList ever a factor? It should be done with
//just the stored indexes.
int[] bestPair = findClosestPair(distanceMap);
double[][] map = new double[2][];
while(clusterPairs.size()>noClust)
{
adjustClusterPairs(bestPair);
map = adjustDistanceMap();
bestPair = findClosestPair(map);
}
//Select the best shapelet in each cluster
//Make sure that the index stored in clusterPairs is the index of
//the shapelet stored in the shapelet ArrayList.
for(int i=0;i<clusterPairs.size();i++)
{
if(clusterPairs.get(i).length==1)
clusteredShapelets.add(allShapelets.get(clusterPairs.get(i)[0]));
else
{
double best = Double.MIN_VALUE;
int position = 0;
for(int j=0;j<clusterPairs.get(i).length;j++)
{
//Infogain will need to be changed to quality measure
if(allShapelets.get(clusterPairs.get(i)[j]).qualityValue >best)
{
best = allShapelets.get(clusterPairs.get(i)[j]).qualityValue;
position =j;
}
}
clusteredShapelets.add(allShapelets.get(clusterPairs.get(i)[position]));
//System.out.println("Added shapelet at position"+position);
}
}
}
/**
* Finds the pair on a distance map with the least distance between them.
*
* @param map The current distance map
* @return The indexes of the best-matching pair.
*/
private int[] findClosestPair(double[][] map)
{
int[] pair = new int[2];
double best = Double.MAX_VALUE;
for(int i=0;i<map.length;i++)
{
for(int j=i+1;j<map[i].length;j++)
{
if(map[i][j]<best)
{
best = map[i][j];
pair[0] = i;
pair[1] = j;
}
}
}
return pair;
}
/**
* Creates complete distance map with identities and redundant information.
*
* @param shapeletSet An array of shapelet content double arrays.
* @return The distance map for the shapelet set.
*/
private double[][] getDistanceMap(double[][] shapeletSet)
{
double[][] map = new double[shapeletSet.length][];
//Initialise double[]
for(int i=0;i<shapeletSet.length;i++)
{
double[] tmp = new double[shapeletSet.length];
map[i] = tmp;
}
for(int i=0;i<shapeletSet.length;i++)
{
map[i][i] = 0;
for(int j=i+1;j<shapeletSet.length;j++)
{
map[i][j] = findMinDistance(shapeletSet[i],shapeletSet[j]);
map[j][i] = map[i][j];
}
}
return map;
}
/**
* Returns the shapelet distance between two shapelets, that is, the
* shortest distance between the shorter shapelet and the best-matching
* subsequence of the longer shapelet.
*
* @param first One shapelet content array.
* @param second The other shapelet content array.
* @return The shapelet distance between the shapelets.
*/
private double findMinDistance(double[] first, double[] second)
{
double distance = 0;
double bestDist = Double.MAX_VALUE;
if (first.length == second.length){
bestDist = getDistance(first,second);
}
else{
if(first.length>second.length){
for(int i=0;i<(first.length-second.length)+1;i++){
double [] temp= Arrays.copyOfRange(first, i, i+second.length);
distance = getDistance(temp,second);
if(distance<bestDist)
bestDist=distance;
}
}
else{
for(int i=0;i<(second.length-first.length)+1;i++){
double [] temp= Arrays.copyOfRange(second, i, i+first.length);
distance = getDistance(temp,first);
if(distance<bestDist)
bestDist=distance;
}
}
}
return bestDist;
}
/**
* Returns squared Euclidean distance between two series of equal length.
*
* @param first The first series.
* @param second The second series.
* @return The Euclidean distance between the series.
*/
private double getDistance(double[] first, double[] second){
double distance = 0;
for(int i=0;i<first.length;i++)
distance = distance+ ((first[i]-second[i])*(first[i]-second[i]));
return Math.sqrt(distance);
}
/**
* Rebuilds distance map from scratch - not efficient.
*
* @return The adjusted distance map.
*/
private double[][] adjustDistanceMap()
{
double[][] map = new double[clusterPairs.size()][];
//Initialise distance map
for(int i=0;i<map.length;i++)
{
double[] tmp=new double[clusterPairs.size()];
map[i] = tmp;
}
//Retrieve distances from original distance map.
for(int i=0;i<clusterPairs.size();i++)
{
map[i][i]=0;
for(int j=i+1;j<clusterPairs.size();j++)
{
map[i][j] = averageDistance(clusterPairs.get(i),clusterPairs.get(j));
map[j][i] = map[i][j];
}
}
return map;
}
/**
* Returns the average distance for the distance map.
*
* @param first First cluster.
* @param second Second cluster.
* @return Average distance.
*/
private double averageDistance(int[] first,int[] second)
{
double dist = 0;
for(int i=0;i<first.length;i++)
{
for(int j=0;j<second.length;j++)
{
dist = dist+distanceMap[first[i]][second[j]];
}
}
dist = dist/(first.length*second.length);
return dist;
}
//
/**
* Takes a pair of indexes to the clusterPair ArrayList and
* merges the entries.
*
* @param pair A pair of indexes to the clusterPair ArrayList.
*/
private void adjustClusterPairs(int[] pair)
{
int[] first = clusterPairs.get(pair[0]);
int[] second = clusterPairs.get(pair[1]);
int[] tmp = new int[first.length+second.length];
for(int i=0;i<tmp.length;i++)
{
if(i<first.length)
{
tmp[i]=first[i];
}
else
{
tmp[i]=second[i-first.length];
}
}
clusterPairs.remove(pair[0]);
clusterPairs.add(pair[0],tmp);
clusterPairs.remove(pair[1]);
}
/**
* Returns the noClust variable.
*
* @return noClust.
*/
public int getNoClust()
{
return this.noClust;
}
/**
* Sets the number of clusters to use.
*
* @param num The number of clusters.
*/
public void setNoClust(int num){
this.noClust = num;
}
public void setShapeletTransform(ShapeletTransform s){
st=s;
}
public void outputLog(String outfile) throws Exception
{
PrintWriter cout = new PrintWriter( new FileWriter(outfile), true);
for(int i=0;i<clusteredShapelets.size();i++)
{
// System.out.println("******************************************");
Shapelet s = clusteredShapelets.get(i);
cout.println(s.qualityValue+","+s.seriesId+","+s.startPos);
cout.flush();
double[] con = s.getContent();
cout.print(con[0]);
cout.flush();
for(int j=1;j<con.length;j++)
{
cout.print(","+con[j]);
cout.flush();
}
cout.println();
cout.flush();
}
cout.close();
}
@Override
public String globalInfo() {
throw new UnsupportedOperationException("Not supported yet.");
}
}