/* * This class contains a static method for generating parameterised simulated * time-series datasets. The datasets are designed for shapelet approaches. * This will produce a two-class problem. */ package statistics.simulators; import java.util.Random; import weka.core.Instances; /** * * @author Jon Hills * j.hills@uea.ac.uk */ public class SimulateShapeletDataset { /** * This method creates and returns a set of Instances representing a * simulated two-class time-series problem. * * @param casesPerClass An array of two integers indicating the number of * instances of class 0 and class 1. * @param seriesLength The length of the series. All time series in the * dataset are the same length. * @return Instances representing the time-series dataset. The Instances * returned will be empty if the casesPerClass parameter does not contain * exactly two values. */ public static Instances getShapeletData(int []casesPerClass, int seriesLength) { if( casesPerClass.length != 2) { System.err.println("Incorrect parameters, dataset will not be co" + "rrect."); int[] tmp = {0,0}; casesPerClass = tmp; } ShapeletModel[] shapeMod = new ShapeletModel[2]; populateShapeletArray(shapeMod, seriesLength); DataSimulator sim = new DataSimulator(shapeMod); return sim.generateDataSet(seriesLength, casesPerClass); } /** * This is a support method for getShapeletData * * @param array An array of two ShapeletModel2 models, representing the * simulated shapes inserted into the respective classes. * @param seriesLength The length of the series. */ private static void populateShapeletArray(ShapeletModel [] s, int seriesLength) { double[] p1={1,seriesLength}; double[] p2={1,seriesLength}; //Create two ShapeleModels with different base Shapelets s[0]=new ShapeletModel(p1); ShapeletModel.ShapeType st=s[0].getShapeType(); s[1]=new ShapeletModel(p2); while(st==s[1].getShapeType()) s[1]=new ShapeletModel(p2); } /** * This method converts a set of Instances into two sets of instances, * randomly divided into training and test sets. * * @param orig The full set of Instances. * @param trainSize The number of cases in the training set. * @return An array of Instances of length two. */ public static Instances[] trainTestSplit(Instances orig, int trainSize) { Random r = new Random(); orig.randomize(r); Instances tr = new Instances(orig,0,trainSize); Instances te = new Instances(orig,trainSize,orig.numInstances()-(trainSize)); Instances[] tt = {tr,te}; return tt; } /** * * This creates a set of Instances representing a two-class problem with * a 50/50 balance of classes, 1100 instances of length 500. The set is * then split into training and testing 100/1000. */ public static void main(String[] args) { int[] casesPerClass = {550,550}; int seriesLength = 500; int trainSize = 100; Instances data = SimulateShapeletDataset.getShapeletData(casesPerClass, seriesLength); Instances [] trainTest = SimulateShapeletDataset.trainTestSplit(data, trainSize); } }