package net.seninp.util; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; /** * This implements few useful functions for reading/writing for UCR-formatted data. * * @author psenin * */ public class UCRUtils { private static final String CR = "\n"; /** * Reads bunch of series from file. First column treats as a class label. Rest as a real-valued * series. * * @param fileName the input filename. * @return time series read. * @throws IOException if error occurs. * @throws NumberFormatException if error occurs. */ public static Map<String, List<double[]>> readUCRData(String fileName) throws IOException, NumberFormatException { Map<String, List<double[]>> res = new HashMap<String, List<double[]>>(); BufferedReader br = new BufferedReader(new FileReader(new File(fileName))); String line = ""; while ((line = br.readLine()) != null) { if (line.trim().length() == 0) { continue; } String[] split = line.trim().split("[\\,\\s]+"); String label = split[0]; Double num = parseValue(label); String seriesType = label; if (!(Double.isNaN(num))) { seriesType = String.valueOf(num.intValue()); } double[] series = new double[split.length - 1]; for (int i = 1; i < split.length; i++) { series[i - 1] = Double.valueOf(split[i].trim()).doubleValue(); } if (!res.containsKey(seriesType)) { res.put(seriesType, new ArrayList<double[]>()); } res.get(seriesType).add(series); } br.close(); return res; } /** * Prints the dataset statistics. * * @param data the UCRdataset. * @param name the dataset name to use. * @return stats. */ public static String datasetStats(Map<String, List<double[]>> data, String name) { int globalMinLength = Integer.MAX_VALUE; int globalMaxLength = Integer.MIN_VALUE; double globalMinValue = Double.MAX_VALUE; double globalMaxValue = Double.MIN_VALUE; for (Entry<String, List<double[]>> e : data.entrySet()) { for (double[] dataEntry : e.getValue()) { globalMaxLength = (dataEntry.length > globalMaxLength) ? dataEntry.length : globalMaxLength; globalMinLength = (dataEntry.length < globalMinLength) ? dataEntry.length : globalMinLength; for (double value : dataEntry) { globalMaxValue = (value > globalMaxValue) ? value : globalMaxValue; globalMinValue = (value < globalMinValue) ? value : globalMinValue; } } } StringBuffer sb = new StringBuffer(); sb.append(name).append("classes: ").append(data.size()); sb.append(", series length min: ").append(globalMinLength); sb.append(", max: ").append(globalMaxLength); sb.append(", min value: ").append(globalMinValue); sb.append(", max value: ").append(globalMaxValue).append(";"); for (Entry<String, List<double[]>> e : data.entrySet()) { sb.append(name).append(" class: ").append(e.getKey()); sb.append(" series: ").append(e.getValue().size()).append(";"); } return sb.delete(sb.length() - 1, sb.length()).toString(); } private static Double parseValue(String string) { Double res = Double.NaN; try { Double r = Double.valueOf(string); res = r; } catch (NumberFormatException e) { assert true; } return res; } /** * Saves the dataset. * * @param data the dataset. * @param file the file handler. * @throws IOException if error occurs. */ public static void saveData(Map<String, List<double[]>> data, File file) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(file)); for (Entry<String, List<double[]>> classEntry : data.entrySet()) { String classLabel = classEntry.getKey(); for (double[] arr : classEntry.getValue()) { String arrStr = Arrays.toString(arr).replaceAll("[\\]\\[\\s]+", ""); bw.write(classLabel + "," + arrStr + CR); } } bw.close(); } }