package edu.umd.rhsmith.diads.meater.modules.tweater.queries.legacy; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.LinkedList; import java.util.List; import twitter4j.GeoLocation; import com.mdimension.jchronic.Chronic; import edu.umd.rhsmith.diads.meater.core.app.MEaterConfigurationException; import edu.umd.rhsmith.diads.meater.modules.tweater.queries.QueryFollow; import edu.umd.rhsmith.diads.meater.modules.tweater.queries.QueryItem; import edu.umd.rhsmith.diads.meater.modules.tweater.queries.QueryItemTime; import edu.umd.rhsmith.diads.meater.modules.tweater.queries.QueryLocation; import edu.umd.rhsmith.diads.meater.modules.tweater.queries.QueryPhrase; import edu.umd.rhsmith.diads.meater.modules.tweater.queries.QueryTrack; import edu.umd.rhsmith.diads.meater.util.Util; /** * Reads query items from a CSV file. * * @author dmonner */ public class CsvQuerySource extends QuerySource { /** * The file from which to read the query */ private final File infile; public CsvQuerySource(CsvQuerySourceInitializer init) throws MEaterConfigurationException { super(init); this.infile = new File(init.getFilename()); } /* * (non-Javadoc) * * @see edu.umd.cs.dmonner.tweater.QueryBuilder#update() */ @Override public List<QueryItemTime> getQueriesFromSource() { logFine(MSG_READING_FILE_FMT, infile); final List<QueryItemTime> all = new LinkedList<QueryItemTime>(); BufferedReader in = null; try { // get reader for file in = new BufferedReader(new FileReader(infile)); String line = null; int lineno = 0; // read all lines in file while ((line = in.readLine()) != null) { lineno++; line = line.trim(); // skip comment lines if (line.startsWith("#") || line.isEmpty()) continue; final String[] fields = Util.splitUnlessQuoted(line, ",", "\""); // be sure we have the correct number of fields if (fields.length != 4) { logWarning(MSG_ERR_NUMFIELDS_FMT, fields.length, lineno); continue; } // extract the base fields final long start = Chronic.parse( Util.unquoteString(fields[0].trim())).getBegin() * 1000L; final long end = Chronic.parse( Util.unquoteString(fields[1].trim())).getEnd() * 1000L; final String type = Util.unquoteString(fields[2].trim()); final String item = Util.unquoteString(fields[3].trim()); // now try to build a query QueryItem qitem = null; if (type.equalsIgnoreCase("phrase")) qitem = new QueryPhrase(lineno, item); else if (type.equalsIgnoreCase("keywords") || type.equalsIgnoreCase("keyword") || type.equalsIgnoreCase("track")) qitem = new QueryTrack(lineno, item); else if (type.equalsIgnoreCase("user") || type.equalsIgnoreCase("follow")) { try { qitem = new QueryFollow(lineno, Long.parseLong(item)); } catch (final NumberFormatException ex) { logWarning(MSG_ERR_USERID_FMT, item, lineno); } } else if (type.equalsIgnoreCase("location")) { try { String[] coords = item.split(";"); double longSW = Float.parseFloat(coords[0]), latSW = Float .parseFloat(coords[1]), longNE = Float .parseFloat(coords[2]), latNE = Float .parseFloat(coords[3]); GeoLocation pointSW = new GeoLocation(latSW, longSW); GeoLocation pointNE = new GeoLocation(latNE, longNE); qitem = new QueryLocation(lineno, pointSW, pointNE); } catch (final NumberFormatException ex) { logWarning(MSG_ERR_LOCATION_FMT, item, lineno); } catch (final ArrayIndexOutOfBoundsException ex) { logWarning(MSG_ERR_LOCATION_FMT, item, lineno); } catch (final IllegalArgumentException ex) { logWarning(MSG_ERR_ILLEGAL_FMT); } } else { logWarning(MSG_ERR_INVALID_TYPE_FMT, lineno); } if (qitem != null) all.add(new QueryItemTime(qitem, start, end)); } logFine(MSG_UPDATE_COMPLETE); return all; } catch (final IOException ex) { logSevere(MSG_ERR_IO_FMT, infile.getPath(), Util.traceMessage(ex)); } finally { if (in != null) { try { in.close(); } catch (final IOException ex) { } } } logWarning(MSG_ERR_FAILED_FMT, infile); return null; } /* * -------------------------------- * Messages * -------------------------------- */ private static final String MSG_READING_FILE_FMT = "Beginning CSVQueryBuilder update; reading file %s... "; private static final String MSG_UPDATE_COMPLETE = "Completed CSVQueryBuilder update."; private static final String MSG_ERR_FAILED_FMT = "CSVQueryBuilder update FAILED from file %s!"; private static final String MSG_ERR_NUMFIELDS_FMT = "Malformed input! Expected 4 fields, found %, line number %d"; private static final String MSG_ERR_INVALID_TYPE_FMT = "Malformed input! Type of query must be \"phrase\", \"keywords\", or \"user\" at line number %d"; private static final String MSG_ERR_USERID_FMT = "Malformed input! Expected a user id number (not \"%s\") on line number %d"; private static final String MSG_ERR_ILLEGAL_FMT = "Illegal input! %s on line number %d"; private static final String MSG_ERR_LOCATION_FMT = "Malformed input! Expected four semicolon-delimeted coordinates (not \"%s\") on line number %d"; private static final String MSG_ERR_IO_FMT = "Problem reading input file \"%s\":\n%s"; }