package edu.cmu.test;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.text.BreakIterator;
import java.util.List;
import java.util.Locale;
import edu.cmu.geolocator.GlobalParam;
import edu.cmu.geolocator.coder.CoderFactory;
import edu.cmu.geolocator.model.CandidateAndFeature;
import edu.cmu.geolocator.model.LocEntityAnnotation;
import edu.cmu.geolocator.model.LocGroupFeatures;
import edu.cmu.geolocator.model.Tweet;
import edu.cmu.geolocator.parser.ParserFactory;
public class CmdLineTest {
public static void main(String argv[]) throws Exception {
GlobalParam.setGazIndex("C:\\chenxu\\geolocator-3.0\\GazIndex");
InputStreamReader isr = new InputStreamReader(System.in);
BufferedReader br = new BufferedReader(isr);
String s = null;
System.out.println(">");
while ((s = br.readLine()) != null) {
System.out.println(">");
if (s.length() < 1) {
System.out.println(">");
continue;
}
BreakIterator boundary = BreakIterator.getSentenceInstance(Locale.US);
boundary.setText(s);
int start = boundary.first();
for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
Tweet tweet = new Tweet(s.substring(start,end));
System.out.println("geoparsing...");
List<LocEntityAnnotation> topos = ParserFactory.getACENERParser().parse(tweet); // List<LocEntityAnnotation> topos = ParserFactory.getEnToponymParser().parse(tweet);
tweet.setToponyms(topos);
System.out.println("geocoding...");
List<CandidateAndFeature> resolved = CoderFactory.getENAggGeoCoder().resolve(tweet,
LocGroupFeatures.DEBUGMODE, LocGroupFeatures.FILTERLESS1000POP);
if (topos == null || topos.size()==0) {
System.err.println("No resolved toponyms");
continue;
}
for (LocEntityAnnotation topo : topos) {
System.out.println(topo.getTokenString() + " " + topo.getNEType() + " "+topo.getToksStart()+" "+topo.getToksEnd()
+" " + topo.getNETypeProb());
}
if (resolved == null) {
System.err.println("No resolved coordinates");
continue;
}
for (CandidateAndFeature code : resolved) {
System.out.println(code.getAsciiName() + " " + code.getCountryCode() + " "
+ code.getLatitude() + "" + code.getLongitude() + "[Prob]:" + code.getProb());
}
System.out.println(">");
System.out.println(s.substring(start, end));
}
Tweet tweet = new Tweet(s);
System.out.println("geoparsing...");
List<LocEntityAnnotation> topos = ParserFactory.getEnAggrParser().parse(tweet);
// List<LocEntityAnnotation> topos = ParserFactory.getEnToponymParser().parse(tweet);
tweet.setToponyms(topos);
System.out.println("geocoding...");
List<CandidateAndFeature> resolved = CoderFactory.getENAggGeoCoder().resolve(tweet,
LocGroupFeatures.DEBUGMODE, LocGroupFeatures.FILTERLESS1000POP);
if (topos == null || topos.size()==0) {
System.err.println("No resolved toponyms");
continue;
}
for (LocEntityAnnotation topo : topos) {
System.out.println(topo.getTokenString() + " " + topo.getNEType() + " "+topo.getToksStart()+" "+topo.getToksEnd()
+" " + topo.getNETypeProb()+ topo.getToksStart()+topo.getToksEnd());
}
if (resolved == null) {
System.err.println("No resolved coordinates");
continue;
}
for (CandidateAndFeature code : resolved) {
System.out.println(code.getAsciiName() + " " + code.getCountryCode() + " "
+ code.getLatitude() + "" + code.getLongitude() + "[Prob]:" + code.getProb());
}
System.out.println(">");
}
}
}