package com.ckd.ts; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; /** * This class contains functions to extract K monthly values which are continuous from a given time series * Usage:- $JAVA_HOME/bin/java -cp TimeSeriesExtractor.jar com.ckd.ts.TimeSeriesExtractor [input directory] [output directory] [K] * Input Directory path is optional and the default value is "data" * Output Directory path is optional and the default value is "result" * Value of K is optional and default value is 10. * * It assumes input directory to have one file per key with filename as the key, * containing lines in the format (Timestamp, value) in each line. * * The output file has filename as key and contains a line of K values seperated by comma. * * @author biplap * */ public class TimeSeriesExtractor { public static String DATA_DIR = "data"; public static String RESULT_DIR = "result"; public static int K = 10; public static void main(String []args) { try { if (args.length > 2) { K = Integer.parseInt(args[2]); } if (args.length > 1) { RESULT_DIR = args[1]; } if (args.length > 0) { DATA_DIR = args[0]; } int len = RESULT_DIR.length(); if (RESULT_DIR.charAt(len-1) != '/'){ RESULT_DIR += "/"; } System.out.println("Data dir:- " + DATA_DIR); System.out.println("Result dir:- " + RESULT_DIR); File dataDir = new File(DATA_DIR); File resultDir = new File(RESULT_DIR); if (resultDir.exists() == false) { resultDir.mkdir(); } File[] files = dataDir.listFiles(); TimeSeriesExtractor tsExtractor = new TimeSeriesExtractor(); ArrayList<TimeVal> timeSeries = null; ArrayList<TimeVal> kTimeSeries = null; for (int i=0;i<files.length;i++) { timeSeries = tsExtractor.extractTimeSeriesFromFile(files[i]); kTimeSeries = tsExtractor.extractKTimeValue(timeSeries, K); if (kTimeSeries.size() < K) { continue; } tsExtractor.writeTimeSeries(kTimeSeries, RESULT_DIR+files[i].getName()); } } catch (Exception e) { e.printStackTrace(); } } /** * Extracts a time series from a given file * @param file * @return * @throws IOException * @throws ParseException */ public ArrayList<TimeVal> extractTimeSeriesFromFile(File file) throws IOException, ParseException { ArrayList<TimeVal> timeSeries = new ArrayList<TimeVal>(); BufferedReader br = new BufferedReader(new FileReader(file)); boolean wasLast = false; String str = null; String []strArr = null; DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); while (!wasLast) { TimeVal tv = new TimeVal(); str = br.readLine(); if (str == null) { wasLast = true; break; } strArr = str.split(","); if (strArr.length < 2) { continue; } Date date = df.parse(strArr[0]); float val = Float.parseFloat(strArr[1]); tv.setDate(date); tv.setValue(val); timeSeries.add(tv); } br.close(); return timeSeries; } /** * Extracts K continue month values from a given time series. * It creates a window of lenght K months and searches for the window which is most dense * After finding start of the most dense window, it uses most recent reading to fill the value * @param timeSeries * @param k * @return */ public ArrayList<TimeVal> extractKTimeValue(ArrayList<TimeVal> timeSeries, int k) { ArrayList<TimeVal> kTimeSeries = new ArrayList<TimeVal>(); if (k <= timeSeries.size()) { Calendar firstDate = Calendar.getInstance(); Calendar lastDate = Calendar.getInstance(); firstDate.setTime(timeSeries.get(0).getDate()); lastDate.setTime(timeSeries.get(timeSeries.size()-1).getDate()); Calendar windowStart = Calendar.getInstance(); windowStart.setTime(timeSeries.get(0).getDate()); Calendar windowEnd = Calendar.getInstance(); windowEnd.setTime(timeSeries.get(0).getDate()); windowEnd.add(Calendar.MONTH, k); int maxWindowStartIndex = 0; int maxRecordings = 0; int recordings = 0; int listCount = 0; while (windowStart.after(lastDate)==false && windowEnd.after(lastDate)==false) { recordings = 0; Calendar seriesDate = Calendar.getInstance(); for (int i=0;i<timeSeries.size();i++) { seriesDate.setTime(timeSeries.get(i).getDate()); if (seriesDate.before(windowStart) == false && seriesDate.after(windowEnd)==false) { recordings++; } } if (recordings > maxRecordings) { maxRecordings = recordings; maxWindowStartIndex = listCount; } listCount++; windowStart.setTime(timeSeries.get(listCount).getDate()); windowEnd.setTime(timeSeries.get(listCount).getDate()); windowEnd.add(Calendar.MONTH, k); } Calendar maxStart = Calendar.getInstance(); maxStart.setTime(timeSeries.get(maxWindowStartIndex).getDate()); TimeVal tv = new TimeVal(); tv.setDate(timeSeries.get(maxWindowStartIndex).getDate()); tv.setValue(timeSeries.get(maxWindowStartIndex).getValue()); kTimeSeries.add(tv); for (int i=1;i<k;i++) { maxStart.add(Calendar.MONTH, 1); tv = new TimeVal(); Date current = new Date(maxStart.getTimeInMillis()); tv.setDate(current); for (int j=timeSeries.size()-1;j>=0;j--) { Date date = timeSeries.get(j).getDate(); if (current.compareTo(date) >= 0) { tv.setValue(timeSeries.get(j).getValue()); kTimeSeries.add(tv); break; } } } } return kTimeSeries; } /** * Writes the extracted timeseries in a given directory * @param timeSeries * @param file * @throws IOException */ public void writeTimeSeries(ArrayList<TimeVal> timeSeries, String file) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(file)); StringBuilder sb = new StringBuilder(String.valueOf(timeSeries.get(0).getValue())); for (int i=1;i<timeSeries.size();i++) { sb.append(","); sb.append(String.valueOf(timeSeries.get(i).getValue())); } bw.write(sb.toString()+"\n"); bw.close(); } }