package edu.hawaii.jmotif.experiment.cbf; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.logging.ConsoleHandler; import java.util.logging.Formatter; import java.util.logging.Handler; import java.util.logging.Logger; import org.hackystat.utilities.logger.HackystatLogger; import cc.mallet.util.Randoms; import edu.hawaii.jmotif.sax.SAXFactory; import edu.hawaii.jmotif.sax.alphabet.Alphabet; import edu.hawaii.jmotif.sax.alphabet.NormalAlphabet; import edu.hawaii.jmotif.text.SAXCollectionStrategy; import edu.hawaii.jmotif.text.TextUtils; import edu.hawaii.jmotif.text.WordBag; import edu.hawaii.jmotif.timeseries.TSException; import edu.hawaii.jmotif.timeseries.TSUtils; import edu.hawaii.jmotif.util.BriefFormatter; /** * Helper-runner for CBF test. * * @author psenin * */ public class CBFVectors { private static final int[] SAMPLING_POINTS = { 10, 50, 125, 250, 500, 750, 1000, 10000, 25000, 50000, 75000, 100000, 150000, 200000, 500000, 1000000 }; private static final int REPEATS = 2; private static int WINDOW_SIZE = 60; private static int PAA_SIZE = 7; private static int ALPHABET_SIZE = 7; private static SAXCollectionStrategy strategy = SAXCollectionStrategy.CLASSIC; private static Logger consoleLogger; private static String LOGGING_LEVEL = "FINE"; private static Randoms randoms; private static final Alphabet a = new NormalAlphabet(); private static final String COMMA = ","; private static final String CR = "\n"; static { randoms = new Randoms(); consoleLogger = HackystatLogger.getLogger("debug.console", "preseries"); consoleLogger.setUseParentHandlers(false); for (Handler handler : consoleLogger.getHandlers()) { consoleLogger.removeHandler(handler); } ConsoleHandler handler = new ConsoleHandler(); Formatter formatter = new BriefFormatter(); handler.setFormatter(formatter); consoleLogger.addHandler(handler); HackystatLogger.setLoggingLevel(consoleLogger, LOGGING_LEVEL); } /** * @param args * @throws TSException * @throws IndexOutOfBoundsException * @throws IOException */ public static void main(String[] args) throws IndexOutOfBoundsException, TSException, IOException { // WINDOW_SIZE = Integer.valueOf(args[0]); // PAA_SIZE = Integer.valueOf(args[1]); // ALPHABET_SIZE = Integer.valueOf(args[2]); // // if ("exact".equalsIgnoreCase(args[3])) { // strategy = SAXCollectionStrategy.EXACT; // } // else if ("classic".equalsIgnoreCase(args[3])) { // strategy = SAXCollectionStrategy.CLASSIC; // } String msg = "W: " + WINDOW_SIZE + ", PAA: " + PAA_SIZE + ", A: " + ALPHABET_SIZE + ", S: " + getStrategyPrefix(strategy); consoleLogger.info(msg); System.out.println("# " + msg); for (int rep = 0; rep < REPEATS; rep++) { for (int currentStep : SAMPLING_POINTS) { // making training and test collections Map<String, List<double[]>> trainData = getDataSet(currentStep); // building vectors // List<WordBag> bags = labeledSeries2WordBags(trainData, PAA_SIZE, ALPHABET_SIZE, WINDOW_SIZE, strategy); HashMap<String, HashMap<String, Double>> tfidf = TextUtils.computeTFIDF(bags); tfidf = TextUtils.normalizeToUnitVectors(tfidf); int totalWords = tfidf.get("0").size(); int cylinderWords = countNonZero(tfidf.get("0")); int bellWords = countNonZero(tfidf.get("1")); int funnelWords = countNonZero(tfidf.get("2")); // the words nums System.out.print(currentStep + COMMA + totalWords + COMMA + cylinderWords + COMMA + bellWords + COMMA + funnelWords + CR); // if (currentStep == 1000) { // BufferedWriter bw = new BufferedWriter(new FileWriter("series.csv")); // for (Entry<String, List<double[]>> set : trainData.entrySet()) { // for (double[] series : set.getValue()) { // bw.write(set.getKey() + ","); // bw.write(Arrays.toString(series).toString().replace("[", "").replace("]", "") + CR); // } // } // bw.close(); // // bw = new BufferedWriter(new FileWriter("table.csv")); // bw.write(TextUtils.tfidfToTable(tfidf)); // bw.close(); // pairwise words int areaBell = 0; int areaFunnel = 0; int areaCylinder = 0; int nCB = 0; int nCF = 0; int nBF = 0; int nCBF = 0; for (String w : tfidf.get("0").keySet()) { // cylinder if (tfidf.get("0").get(w) > 0) { areaCylinder++; } // bell if (tfidf.get("1").get(w) > 0) { areaBell++; } // funnel if (tfidf.get("2").get(w) > 0) { areaFunnel++; } // pairs if (tfidf.get("0").get(w) > 0 && tfidf.get("1").get(w) > 0) { nCB++; } if (tfidf.get("1").get(w) > 0 && tfidf.get("2").get(w) > 0) { nBF++; } if (tfidf.get("0").get(w) > 0 && tfidf.get("2").get(w) > 0) { nCF++; } // all together if (tfidf.get("0").get(w) > 0 && tfidf.get("1").get(w) > 0 && tfidf.get("2").get(w) > 0) { nCBF++; } } System.out.println("areaBell=" + Integer.valueOf(areaBell - nCB - nBF - nCBF)); System.out.println("areaFunnel=" + Integer.valueOf(areaFunnel - nCF - nBF - nCBF)); System.out.println("areaCylinder=" + Integer.valueOf(areaCylinder - nCF - nCB - nCBF)); System.out.println("nCB=" + nCB); System.out.println("nCF=" + nCF); System.out.println("nBF=" + nBF); System.out.println("nCBF=" + nCBF); System.out.print(">" + currentStep + COMMA + totalWords + COMMA + cylinderWords + COMMA + bellWords + COMMA + funnelWords + COMMA + String.valueOf(Integer.valueOf(areaBell - nCB - nBF - nCBF)) + COMMA + String.valueOf(Integer.valueOf(areaFunnel - nCF - nBF - nCBF)) + COMMA + String.valueOf(Integer.valueOf(areaCylinder - nCF - nCB - nCBF)) + CR); // if (currentStep == 10) { // for (Entry<String, HashMap<String, Double>> e : tfidf.entrySet()) { // String className = e.getKey(); // ArrayList<Entry<String, Double>> values = new ArrayList<Entry<String, Double>>(); // values.addAll(e.getValue().entrySet()); // Collections.sort(values, new TfIdfEntryComparator()); // System.out.print("Class key: " + className + CR); // for (int i = 0; i < 10; i++) { // String pattern = values.get(i).getKey(); // Double weight = values.get(i).getValue(); // System.out.println("\"" + pattern + "\", " + weight); } // } } } private static int countNonZero(HashMap<String, Double> hashMap) { int res = 0; for (Entry<String, Double> e : hashMap.entrySet()) { if (e.getValue() > 0) { res++; } } return res; } private static List<WordBag> labeledSeries2WordBags(Map<String, List<double[]>> data, int paaSize, int alphabetSize, int windowSize, SAXCollectionStrategy strategy) throws IndexOutOfBoundsException, TSException { // make a map of resulting bags Map<String, WordBag> preRes = new HashMap<String, WordBag>(); // process series one by one building word bags for (Entry<String, List<double[]>> e : data.entrySet()) { String classLabel = e.getKey(); WordBag bag = new WordBag(classLabel); for (double[] series : e.getValue()) { // // series to words String oldStr = ""; for (int i = 0; i <= series.length - windowSize; i++) { double[] paa = TSUtils.optimizedPaa( TSUtils.zNormalize(TSUtils.subseries(series, i, windowSize)), paaSize); char[] sax = TSUtils.ts2String(paa, a.getCuts(alphabetSize)); if (SAXCollectionStrategy.CLASSIC.equals(strategy)) { if (oldStr.length() > 0 && SAXFactory.strDistance(sax, oldStr.toCharArray()) == 0) { continue; } } else if (SAXCollectionStrategy.EXACT.equals(strategy)) { if (oldStr.equalsIgnoreCase(String.valueOf(sax))) { continue; } } oldStr = String.valueOf(sax); bag.addWord(String.valueOf(sax)); } // // } preRes.put(classLabel, bag); System.out.println(classLabel + ", " + bag.getWordSet().size()); } List<WordBag> res = new ArrayList<WordBag>(); res.addAll(preRes.values()); return res; } private static Map<String, List<double[]>> damage(Map<String, List<double[]>> trainData, double d, double sd) { Map<String, List<double[]>> res = new HashMap<String, List<double[]>>(); for (Entry<String, List<double[]>> referenceSet : trainData.entrySet()) { List<double[]> newData = new ArrayList<double[]>(); int seriesCounter = 0; for (double[] referenceSeries : referenceSet.getValue()) { // if (seriesCounter > 3 && seriesCounter < 5) { // System.out.println(referenceSet.getKey() + " = " + Arrays.toString(referenceSeries)); // } int noiseStart = Double.valueOf(Math.floor(randoms.nextUniform(0D, 128D * (1 - d)))) .intValue(); int noiseEnd = noiseStart + Double.valueOf(128D * d).intValue(); for (int i = noiseStart; i < noiseEnd; i++) { referenceSeries[i] = randoms.nextGaussian(0, sd); } // if (seriesCounter > 3 && seriesCounter < 5) { // System.out.println(referenceSet.getKey() + "<-" + Arrays.toString(referenceSeries)); // } newData.add(referenceSeries); seriesCounter++; } res.put(referenceSet.getKey(), newData); } return res; } private static void saveData(String fileName, Map<String, List<double[]>> data) throws IOException { BufferedWriter wr = new BufferedWriter(new FileWriter(new File(fileName))); for (Entry<String, List<double[]>> cClass : data.entrySet()) { for (double[] series : cClass.getValue()) { wr.write(String.valueOf(cClass.getKey()) + " " + Arrays.toString(series).replace("[", "").replace(", ", " ").replace("]", "\n")); } } wr.close(); } private static Map<String, List<double[]>> getDataSet(int size) { Map<String, List<double[]>> res = new HashMap<String, List<double[]>>(); // ticks - i.e. time int[] t = new int[128]; for (int i = 0; i < 128; i++) { t[i] = i; } // cylinder sample List<double[]> cylinders = new ArrayList<double[]>(); for (int i = 0; i < size; i++) { cylinders.add(CBFGenerator.cylinder(t)); } res.put("0", cylinders); // bell sample List<double[]> bells = new ArrayList<double[]>(); for (int i = 0; i < size; i++) { bells.add(CBFGenerator.bell(t)); } res.put("1", bells); // funnel sample List<double[]> funnels = new ArrayList<double[]>(); for (int i = 0; i < size; i++) { funnels.add(CBFGenerator.funnel(t)); } res.put("2", funnels); return res; } protected static String getStrategyPrefix(SAXCollectionStrategy strategy) { String strategyP = "noreduction"; if (SAXCollectionStrategy.EXACT.equals(strategy)) { strategyP = "exact"; } if (SAXCollectionStrategy.CLASSIC.equals(strategy)) { strategy = SAXCollectionStrategy.CLASSIC; strategyP = "classic"; } return strategyP; } }