package edu.cmu.test; import java.io.BufferedReader; import java.util.List; import twitter4j.Status; import twitter4j.json.DataObjectFactory; import edu.cmu.geolocator.GlobalParam; import edu.cmu.geolocator.coder.CoderFactory; import edu.cmu.geolocator.io.GetReader; import edu.cmu.geolocator.model.CandidateAndFeature; import edu.cmu.geolocator.model.LocEntityAnnotation; import edu.cmu.geolocator.model.LocGroupFeatures; import edu.cmu.geolocator.model.Tweet; import edu.cmu.geolocator.parser.ParserFactory; public class PipelineTest { public static void main(String args[]) throws Exception { // If you have changed where your GazIndex lies, you have to specify where GazIndex lies // by using the following: GlobalParam.setGazIndex("GazIndex"); // The path for the file that you want to parse, one line per tweet JSON file. String path = "D:\\Users\\Think\\workspace64\\geolocator-3.0\\SampleInput\\shortInputSample.txt"; BufferedReader br = GetReader.getUTF8FileReader(path); String line = null; while ((line = br.readLine()) != null) { // create Tweet Status from the JSON file. Status is the structure containing all the // information in tweet. Status status = DataObjectFactory.createStatus(line); // create the tweet object from the status. Tweet tweet = new Tweet(status); // If you are not reading JSON, and just parse the string, you should use the following to // wrap a sentence in a tweet: // Tweet tweet = new Tweet("sentence"); System.out.println("////////////////////////////////////////////////////////////////\n" + tweet.getText()); System.out.println("[MESSAGE]: " + tweet.getText()); System.out.println("[COORD]: " + tweet.getLatitude() + " " + tweet.getLongitude()); System.out.println("[USER LOCATION]:" + tweet.getUserLocation()); System.out.println("[PLACE FIELD]:" + tweet.getPlace()); // generate the parsed toponyms from the tweet. System.out.println("GEOPARSING... "); List<LocEntityAnnotation> topos = ParserFactory.getEnAggrParser().parse(tweet); tweet.setToponyms(topos); // print the extracted toponyms for (LocEntityAnnotation topo : topos) System.out.println(topo.getTokenString() + " [TYPE]: " + topo.getNEType() + " [PROB]:" + topo.getNETypeProb()); List<CandidateAndFeature> resolved = null; if (topos == null) System.out.println("NO TOPONYMS PARSED."); else { System.out.println("" + topos.size() + " TOPONYMS FOUND.\nGEOCODING..."); long previous = System.currentTimeMillis(); // resolve the place resolved = CoderFactory.getMaxPopGeoCoder().resolve(tweet, LocGroupFeatures.DEBUGMODE, LocGroupFeatures.FILTERZEROPOP); System.out.println("[TIME SPENT]:" + (System.currentTimeMillis() - previous)); if (resolved == null) System.out.println("[NO TOPONYMS RESOLVED]"); else { System.out.println("[RESOLVED RESULTS ARE]:"); // Note that we could output multiple results for one place. // This is for the user to decide which is the best they want. // We may improve this later to output only one result. for (CandidateAndFeature c : resolved) { System.out.println(c.getAsciiName() + " Country:" + c.getCountryCode() + " State:" + c.getAdm1Code() + " Latitude:" + c.getLatitude() + " Longitude:" + c.getLongitude() + " [Prob]:" + c.getProb()); } } } } } }