package net.seninp.jmotif.sax.bitmap;
import java.awt.Color;
import java.awt.Dimension;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.beust.jcommander.JCommander;
import net.seninp.jmotif.sax.SAXException;
import net.seninp.jmotif.sax.SAXProcessor;
import net.seninp.util.HeatChart;
import net.seninp.util.UCRUtils;
/**
* Converts a single timeseries into a vector of values that represent occurrence frequencies of
* n-grams.
*
* @author psenin
*/
public class UCRdataBitmapPrinter {
// formatting parameters
//
// private static final DecimalFormatSymbols otherSymbols = new DecimalFormatSymbols(Locale.US);
// private static DecimalFormat df = new DecimalFormat("0.000000", otherSymbols);
// and some constants
private static final String QUOTE = "\'";
private static final String COMMA = ",";
private static final String CR = "\n";
// classes needed for the workflow
//
private static final SAXProcessor sp = new SAXProcessor();
// private static final NormalAlphabet na = new NormalAlphabet();
// logging stuff
//
private static final Logger LOGGER = LoggerFactory.getLogger(UCRdataBitmapPrinter.class);
public static void main(String[] args) throws SAXException, IOException {
BitmapParameters params = new BitmapParameters();
JCommander jct = new JCommander(params, args);
if (0 == args.length) {
jct.usage();
}
else {
// get params printed
//
StringBuilder sb = new StringBuilder(1024);
sb.append("SAXBitmap CLI converter v.1").append(CR);
sb.append("parameters:").append(CR);
sb.append(" input file: ").append(BitmapParameters.IN_FILE).append(CR);
sb.append(" output file: ").append(BitmapParameters.OUT_FILE).append(CR);
sb.append(" SAX sliding window size: ").append(BitmapParameters.SAX_WINDOW_SIZE)
.append(CR);
sb.append(" SAX PAA size: ").append(BitmapParameters.SAX_PAA_SIZE).append(CR);
sb.append(" SAX alphabet size: ").append(BitmapParameters.SAX_ALPHABET_SIZE)
.append(CR);
sb.append(" SAX numerosity reduction: ").append(BitmapParameters.SAX_NR_STRATEGY)
.append(CR);
sb.append(" SAX normalization threshold: ").append(BitmapParameters.SAX_NORM_THRESHOLD)
.append(CR);
sb.append(" Bitmap shingle size: ").append(BitmapParameters.SHINGLE_SIZE).append(CR);
if (null == BitmapParameters.BITMAP_FILE) {
sb.append(" No bitmap will be produced").append(BitmapParameters.SHINGLE_SIZE).append(CR);
}
else {
sb.append(" Bitmap filename specified: ").append(BitmapParameters.BITMAP_FILE)
.append(CR);
}
sb.append(CR);
LOGGER.info("{}", sb.toString());
// read the file
//
Map<String, List<double[]>> data = UCRUtils.readUCRData(BitmapParameters.IN_FILE);
LOGGER.info("read from {}", BitmapParameters.IN_FILE);
LOGGER.info("{}", UCRUtils.datasetStats(data, ""));
// resulting shingle frequencies and the keys array
//
Map<String, List<Integer[]>> res = new HashMap<String, List<Integer[]>>();
TreeSet<String> shinglesSet = null;
for (Entry<String, List<double[]>> e : data.entrySet()) {
String classLabel = e.getKey();
for (double[] series : e.getValue()) {
Map<String, Integer> shingledData = sp.ts2Shingles(series,
BitmapParameters.SAX_WINDOW_SIZE, BitmapParameters.SAX_PAA_SIZE,
BitmapParameters.SAX_ALPHABET_SIZE, BitmapParameters.SAX_NR_STRATEGY,
BitmapParameters.SAX_NORM_THRESHOLD, BitmapParameters.SHINGLE_SIZE);
if (!(res.containsKey(classLabel))) {
res.put(classLabel, new ArrayList<Integer[]>());
}
if (null == shinglesSet) {
shinglesSet = new TreeSet<String>(shingledData.keySet());
}
Integer[] arr = new Integer[shinglesSet.size()];
int i = 0;
for (String shingle : shinglesSet) {
arr[i] = shingledData.get(shingle);
i++;
}
res.get(classLabel).add(arr);
}
}
// produce the output
//
LOGGER.info("writing shingled output...");
StringBuffer shinglesStr = new StringBuffer(
BitmapParameters.SHINGLE_SIZE * (shinglesSet.size() + 2));
for (String shingle : shinglesSet) {
shinglesStr.append(QUOTE).append(shingle).append(QUOTE).append(COMMA);
}
BufferedWriter bw = new BufferedWriter(new FileWriter(new File(BitmapParameters.OUT_FILE)));
bw.write("\'class_label\',"
+ shinglesStr.delete(shinglesStr.length() - 1, shinglesStr.length()).toString());
bw.write(CR);
for (Entry<String, List<Integer[]>> e : res.entrySet()) {
String classLabel = e.getKey();
for (Integer[] arr : e.getValue()) {
String str = Arrays.toString(arr).replaceAll("[\\[\\]\\s]", "");
bw.write("\'" + classLabel + "\'" + COMMA + str + CR);
}
}
bw.close();
LOGGER.info("done!");
// produce the bitmap
//
if (null == BitmapParameters.BITMAP_FILE) {
System.exit(10);
}
LOGGER.info("producing bitmap for the dataset");
// remove the columns which are all zeros, build an index of those
//
HashSet<Integer> zeroIndices = new HashSet<Integer>(shinglesSet.size());
for (int i = 0; i < shinglesSet.size(); i++) {
zeroIndices.add(i);
}
// count the number of rows needed and refine zeroed columns
//
int rows = 0;
for (Entry<String, List<Integer[]>> e : res.entrySet()) {
rows = rows + e.getValue().size();
for (Integer[] arr : e.getValue()) {
HashSet<Integer> tmpZeroes = new HashSet<Integer>();
for (int i = 0; i < arr.length; i++) {
if (arr[i].equals(0)) {
tmpZeroes.add(i);
}
}
zeroIndices.retainAll(tmpZeroes);
}
}
ArrayList<String> prunedShingles = new ArrayList<String>();
int counter = 0;
for (String shingle : shinglesSet) {
if (zeroIndices.contains(counter)) {
prunedShingles.add(shingle);
}
counter++;
}
LOGGER.info("dropped zero-column shingles: {}",
Arrays.toString(prunedShingles.toArray(new String[prunedShingles.size()])));
// future heatmap datastructure
//
double[][] heatmapData = new double[rows][shinglesSet.size() - zeroIndices.size()];
// make the Y labels data
//
ArrayList<String> yLabels = new ArrayList<String>();
//
// and fill the rows
int currRow = 0;
for (Entry<String, List<Integer[]>> e : res.entrySet()) {
int currArrayIdx = 0;
for (Integer[] arr : e.getValue()) {
yLabels.add(e.getKey() + "_" + currArrayIdx);
heatmapData[currRow] = toDoubleAray(arr, zeroIndices);
currRow++;
currArrayIdx++;
}
}
// makeup a heatmap
//
HeatChart chart = new HeatChart(heatmapData);
chart.setAxisColour(Color.WHITE);
chart.setAxisThickness(2);
chart.setYValues(yLabels.toArray(new String[yLabels.size()]));
chart.setShowYAxisValues(true);
chart.setXValues(
toShingleLabelsArray(shinglesSet.toArray(new String[shinglesSet.size()]), zeroIndices));
chart.setShowXAxisValues(true);
chart.setXValuesHorizontal(false);
chart.setTitle(BitmapParameters.IN_FILE);
chart.setCellSize(new Dimension(10, 10));
chart.saveToFile(new File(BitmapParameters.BITMAP_FILE));
}
}
private static String[] toShingleLabelsArray(String[] array, HashSet<Integer> zeroIndices) {
String[] res = new String[array.length - zeroIndices.size()];
int skip = 0;
for (int i = 0; i < array.length; i++) {
if (zeroIndices.contains(i)) {
skip++;
continue;
}
res[i - skip] = array[i];
}
return res;
}
/**
* Converts an array into array of doubles skipping specified indeces.
*
* @param intArray the input array.
* @param skipIndex skip index list.
* @return array of doubles.
*/
private static double[] toDoubleAray(Integer[] intArray, HashSet<Integer> skipIndex) {
double[] res = new double[intArray.length - skipIndex.size()];
int skip = 0;
for (int i = 0; i < intArray.length; i++) {
if (skipIndex.contains(i)) {
skip++;
continue;
}
res[i - skip] = intArray[i].doubleValue();
}
return res;
}
}