/*
* The HRT Project.
* This work is licensed under the
* Creative Commons Attribution-NonCommercial 3.0 Unported License.
* To view a copy of this license,
* visit http://creativecommons.org/licenses/by-nc/3.0/
* or send a letter to
* Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA.
*/
package org.hrva.capture;
import java.io.*;
import java.net.MalformedURLException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.text.MessageFormat;
import java.util.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.kohsuke.args4j.Argument;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
/**
* Reformats the tail of a GPS Log file to extract essential fields.
*
* <p>
* There are three formats.
* </p>
*
* <code>
* 07:04:42 02/15 V.1.2233 H.0.0 MT_LOCATION Lat/Lon:370620935/-763413842
* [Valid] Adher:-1 [Valid] Odom:2668 [Valid] DGPS:On FOM:2
* </code>
* <br/>
* <code>
* 07:04:42 02/15 V.1.3515 H.0.0 MT_TIMEPOINTCROSSING Time:07:04:37 Dwell:22
* Rte:65 Dir:2 TP:352 Stop:69 Svc:1 Blk:203 Lat/Lon:370425333/-764286136
* [Valid] Adher:-1 [Valid] Odom:1712 [Valid] DGPS:On FOM:2
* </code>
* <br/>
* <code>
* 07:04:42 02/15 V.1.2236 H.0.0 MT_TIMEPOINTCROSSING Time:07:04:36 Arrival
* Rte:4 Dir:2 TP:329 Stop:45 Svc:1 Blk:221 Lat/Lon:370315618/-763461352 [Valid]
* Adher:2 [Valid] Odom:1924 [Valid] DGPS:On FOM:2
* <code>
*
* <p>
* The output format is CSV
* </p>
*
* <code>
* Date,Time,Vehicle,Lat/Lon,Location Valid/Invalid,Adherence,Adherence
* Valid/Invalid,Route,Direction,Stop
* </code>
*
* <p>Typical use case</p>
*
* <code><pre>
* File target = new File(extract_filename);
* Writer wtr = new FileWriter(target, true);
* File source = new File(filename);
* Reader rdr = new FileReader(source)
* reformat(rdr, wtr);
* rdr.close();
* wtr.close();
* </pre></code>
*
* <p>At the command line, it might look like this.</p>
* <code><pre>
* java -cp LogCapture/dist/LogCapture.jar org.hrva.capture.Reformat -o extract.csv extract.txt
* </pre></code>
*
*
* @author slott
*/
public class Reformat {
/** Properties for this application. */
Properties global;
/** Output file name. */
@Option(name = "-o", usage = "Output file name.")
String extract_filename = "hrtrtf.csv";
/** Verbose debugging. */
@Option(name = "-v", usage = "Vebose logging")
boolean verbose= false;
/** Command-line Arguments. */
@Argument
List<String> arguments = new ArrayList<String>();
/** CSV Headings. */
String[] headings = {
"Date", "Time", "Vehicle", "Lat", "Lon", "Location Valid/Invalid",
"Adherence", "Adherence Valid/Invalid", "Route", "Direction", "Stop"
};
/** Default year used to fill in incomplete dates. */
Calendar now;
/** Is a CSV header row required? Only if the file is new. */
boolean include_header= true;
/** Logger. */
final Log logger = LogFactory.getLog(Reformat.class);
/**
* This row is invalid.
*/
class InvalidRow extends Exception {
public InvalidRow() {
super();
}
public InvalidRow(String message) {
super(message);
}
};
/**
* Command-line program to tail a log and then push file to the HRT couch
* DB.
* <p>All this does is read properties and invoke run_main</p>
*
* @param args arguments
*/
public static void main(String[] args) {
Log log = LogFactory.getLog(Reformat.class);
File prop_file = new File("hrtail.properties");
Properties config = new Properties();
try {
config.load(new FileInputStream(prop_file));
} catch (IOException ex) {
log.warn( "Can't find "+prop_file.getName(), ex );
try {
log.debug(prop_file.getCanonicalPath());
} catch (IOException ex1) {
}
}
Reformat fmt = new Reformat(config);
try {
fmt.run_main(args);
} catch (CmdLineException ex1) {
log.fatal("Invalid Options", ex1);
} catch (MalformedURLException ex2) {
log.fatal("Invalid CouchDB URL", ex2);
} catch (IOException ex3) {
log.fatal(ex3);
}
}
/**
* Build the LogTail instance.
*
* @param global The hrtail.properties file
*/
public Reformat(Properties global) {
super();
this.global= global;
// Might be overridden or updated for testability purposes.
now = Calendar.getInstance();
}
/**
* Reformats log extract file(s).
*
* <p>Each file in the command-line arguments is opened, read, reformatted
* and written to the output CSV file.
* </p>
*
* @param args the command line arguments
* @throws CmdLineException
* @throws FileNotFoundException
* @throws IOException
*/
public void run_main(String[] args) throws CmdLineException, FileNotFoundException, IOException {
CmdLineParser parser = new CmdLineParser(this);
parser.parseArgument(args);
File target = new File(extract_filename);
include_header= target.length() == 0;
Writer wtr = new FileWriter(target, true);
try {
for (String filename : arguments) {
Object[] details = { filename, extract_filename };
logger.info( MessageFormat.format("Reformatting {0} to {1}",details));
File source = new File(filename);
Reader rdr= new FileReader(source);
reformat(rdr, wtr);
rdr.close();
}
} finally {
wtr.close();
}
}
/**
* Reformat a source reader to append to a source writer.
*
* <p>
* This will apply the extract_fields function to each row
* of the reader. If the row does not raise some kind of exception,
* the resulting mapping is written to the output CSV-format
* file.
* </p>
*
* @param source Reader for an input file.
* @param target Writer for the Output file.
* @throws IOException
*/
public void reformat(Reader source, Writer target) throws IOException {
CSVWriter csvwtr = new CSVWriter(target, headings);
// Only needed once!
if( include_header ) {
csvwtr.writeheading();
include_header= false;
}
// Note that the input file may be broken at a bad byte boundary...
// Open input for reading and hope for the test
BufferedReader rdr = new BufferedReader(source);
try {
String line = rdr.readLine();
while (line != null) {
try {
Map<String, String> csv;
csv = extract_fields(line);
if (csv == null) {
// filtered
} else {
csvwtr.writerow(csv);
}
} catch (InvalidRow ex) {
logger.warn("Invalid '" + line + "'");
}
line = rdr.readLine();
}
} finally {
rdr.close();
}
}
/**
* Split the label from the value, and confirm
* the label as well as a non-zero length value.
* @param word
* @param label
* @return
* @throws org.hrva.hrtail.Reformat.InvalidRow
*/
String label_value(String word, String label) throws InvalidRow {
String[] lv = word.split(":", 2);
if (lv.length != 2) {
throw new InvalidRow();
}
if (!lv[0].equals(label)) {
throw new InvalidRow();
}
if (lv[1].length() == 0) {
throw new InvalidRow();
}
return lv[1];
}
final SimpleDateFormat time_fmt = new SimpleDateFormat("HH:mm:ss");
/**
* Get a time value.
* @param word
* @return
* @throws org.hrva.hrtail.Reformat.InvalidRow
*/
String get_time(String word) throws InvalidRow {
try {
time_fmt.parse(word);
} catch (ParseException ex) {
throw new InvalidRow();
}
return word;
}
final SimpleDateFormat input_date_fmt = new SimpleDateFormat("MM/dd");
final SimpleDateFormat output_date_fmt = new SimpleDateFormat("yyyy-MM-dd");
/**
* Get a date value, converting the year to be the current year.
* @param word
* @return
* @throws org.hrva.hrtail.Reformat.InvalidRow
*/
String get_date(String word) throws InvalidRow {
Calendar date = Calendar.getInstance();
try {
date.setTime(input_date_fmt.parse(word));
} catch (ParseException ex) {
throw new InvalidRow();
}
date.set(Calendar.YEAR, now.get(Calendar.YEAR));
return output_date_fmt.format(date.getTime());
}
/**
* Get the latitude portion of a lat/lon string.
* @param lat_lon
* @return
* @throws org.hrva.hrtail.Reformat.InvalidRow
*/
String get_lat(String lat_lon) throws InvalidRow {
try {
String[] ll_item = lat_lon.split("/");
String p1 = ll_item[0].substring(0, 2);
String p2 = ll_item[0].substring(2, ll_item[0].length());
return p1 + "." + p2;
} catch (Exception ex) {
throw new InvalidRow();
}
}
/**
* Get the longitude portion of a lat/lon string.
*
* @param lat_lon
* @return
* @throws org.hrva.hrtail.Reformat.InvalidRow
*/
String get_lon(String lat_lon) throws InvalidRow {
try {
String[] ll_item = lat_lon.split("/");
String p1 = ll_item[1].substring(0, 3);
String p2 = ll_item[1].substring(3, ll_item[1].length());
return p1 + "." + p2;
} catch (Exception ex) {
throw new InvalidRow();
}
}
String get_valid(String word) throws InvalidRow {
if( word.equals("[Valid]") ) return "V";
return "I";
}
/**
* Extract individual fields from an input line, creating
* a mapping from column title to string value.
*
* <p>Any invalid input throws an InvalidRow exception.</p>
*
* <p>Examples</p>
* <code>
* 07:04:42 02/15 V.1.2233 H.0.0 MT_LOCATION Lat/Lon:370620935/-763413842
* [Valid] Adher:-1 [Valid] Odom:2668 [Valid] DGPS:On FOM:2
* </code>
*
* <code>
* 07:04:42 02/15 V.1.2236 H.0.0 MT_TIMEPOINTCROSSING Time:07:04:36 Arrival
* Rte:4 Dir:2 TP:329 Stop:45 Svc:1 Blk:221 Lat/Lon:370315618/-763461352
* [Valid] Adher:2 [Valid] Odom:1924 [Valid] DGPS:On FOM:2
* </code>
*
* @param line
* @return Map<String,String> from column title to value.
* @throws org.hrva.hrtail.Reformat.InvalidRow
*/
public Map<String, String> extract_fields(String line) throws InvalidRow {
Map<String, String> row = null;
String[] words = line.split("\\s");
if (words.length < 5) {
throw new InvalidRow();
} else if (words[4].equals("MT_LOCATION") && words.length == 13) {
row = new TreeMap<String, String>();
row.put("Time", get_time(words[0]));
row.put("Date", get_date(words[1]));
row.put("Vehicle", words[2]);
row.put("H", words[3]);
String lat_lon = label_value(words[5], "Lat/Lon");
row.put("Lat", get_lat(lat_lon));
row.put("Lon", get_lon(lat_lon));
row.put("Location Valid/Invalid", get_valid(words[6]));
row.put("Adherence", label_value(words[7], "Adher"));
row.put("Adherence Valid/Invalid", get_valid(words[8]));
row.put("Odom", label_value(words[9], "Odom"));
row.put("Odom Valid/Invalid", get_valid(words[10]));
row.put("DGPS", label_value(words[11], "DGPS"));
row.put("FOM", label_value(words[12], "FOM"));
} else if (words[4].equals("MT_TIMEPOINTCROSSING") && words.length == 21) {
// Two flavors -- keep Arrival. Drop Dwell.
if (words[6].equals("Arrival")) {
row = new TreeMap<String, String>();
row.put("Time", get_time(words[0]));
row.put("Date", get_date(words[1]));
row.put("Vehicle", words[2]);
row.put("H", words[3]);
//Time:07:04:36 Arrival
row.put("Arrival", label_value(words[5], "Time"));
//Rte:4 Dir:2 TP:329 Stop:45 Svc:1 Blk:221
row.put("Route", label_value(words[7], "Rte"));
row.put("Direction", label_value(words[8], "Dir"));
row.put("TP", label_value(words[9], "TP"));
row.put("Stop", label_value(words[10], "Stop"));
row.put("Svc", label_value(words[11], "Svc"));
row.put("Blk", label_value(words[12], "Blk"));
String lat_lon = label_value(words[13], "Lat/Lon");
row.put("Lat", get_lat(lat_lon));
row.put("Lon", get_lon(lat_lon));
row.put("Location Valid/Invalid", get_valid(words[14]));
row.put("Adherence", label_value(words[15], "Adher"));
row.put("Adherence Valid/Invalid", get_valid(words[16]));
row.put("Odom", label_value(words[17], "Odom"));
row.put("Odom Valid/Invalid", get_valid(words[18]));
row.put("DGPS", label_value(words[19], "DGPS"));
row.put("FOM", label_value(words[20], "FOM"));
}
} else {
/*
* Debugging:
*
System.out.println("length " + words.length);
System.out.println("line " + line);
for (String w : words) {
System.out.println(" '" + w + "'");
}
*/
throw new InvalidRow();
}
return row;
}
}