package tathya; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileReader; import java.io.FileWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.List; import tathya.semantics.Triple; import tathya.semantics.TripletExtractor; import tathya.semantics.Word; import tathya.text.tokenizer.TwitterTokenizer; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; import edu.stanford.nlp.trees.GrammaticalStructure; import edu.stanford.nlp.trees.GrammaticalStructureFactory; import edu.stanford.nlp.trees.PennTreebankLanguagePack; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreePrint; import edu.stanford.nlp.trees.TreebankLanguagePack; public class Main { /** * @param args */ public static void main(String[] args) { LexicalizedParser lp = new LexicalizedParser( "/Users/akishore/Desktop/tathya/englishPCFG.ser.gz"); lp.setOptionFlags(new String[] { "-maxLength", "80", "-retainTmpSubcategories" }); TwitterTokenizer tok = new TwitterTokenizer(); try { BufferedReader fr = new BufferedReader(new FileReader( "/Users/akishore/Desktop/event.data")); BufferedWriter wr = new BufferedWriter(new FileWriter( "/Users/akishore/Desktop/tuples")); String sentence; while ((sentence = fr.readLine()) != null) { String[] tokens = sentence.split("\005"); // String sentence = // "Today Apple announced the Apple iPad and like many of you, we at Adobe are looking forward to getting our hands on� http://goo.gl/fb/qUnD"; // String sentence = // "Apple Unveils the \"iPad\": Apple showed off it's latest invention, the iPad, in San Francisco� http://goo.gl/fb/GcsF"; List<String> sentences = tok.tokenize(tokens[1]); wr.write("\n" + tokens[0] + "\t" + tokens[1] + "\n"); for (String s : sentences) { s = s.trim(); // System.out.println(s.trim()); String[] sent = s.split(" "); Tree parse = (Tree) lp.apply(Arrays.asList(sent)); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp .grammaticalStructureFactory(); GrammaticalStructure gs = gsf .newGrammaticalStructure(parse); Collection tdl = gs.typedDependenciesCollapsed(); // System.out.println(tdl); // System.out.println(); TreePrint tp = new TreePrint( "penn,typedDependenciesCollapsed"); // tp.printTree(parse); Tree[] children = parse.children(); // System.out.println(); TripletExtractor t = new TripletExtractor(); ArrayList<Triple> triples = t.extract(parse); for (Triple triple : triples) { // System.out // .println("--------------------------------------------------"); // if (triple.hasSubject()) { // System.out.println("subject:\t" // + triple.getSubject().getText()); // if (triple.getSubject().hasAttributes()) { // System.out.println("attributes-->"); // for (Word w : triple.getSubject() // .getAttributes()) { // System.out.println("\t\t" + w.getText()); // } // } // } // if (triple.hasPredicate()) { // System.out.println("predicate:\t" // + triple.getPredicate().getText()); // if (triple.getPredicate().hasAttributes()) { // System.out.println("attributes-->"); // for (Word w : triple.getPredicate() // .getAttributes()) { // System.out.println("\t\t" + w.getText()); // } // } // } // if (triple.hasObject()) { // System.out.println("object:\t" // + triple.getObject().getText()); // if (triple.getObject().hasAttributes()) { // System.out.println("attributes-->"); // for (Word w : triple.getObject() // .getAttributes()) { // System.out.println("\t\t" + w.getText()); // } // } // } // System.out // .println("--------------------------------------------------"); String subject = ""; StringBuffer attr1 = new StringBuffer(); String object = ""; StringBuffer attr2 = new StringBuffer(); String predicate = ""; StringBuffer attr3 = new StringBuffer(); if (triple.hasSubject()) { subject = triple.getSubject().getText(); for (Word w : triple.getSubject().getAttributes()) { attr1.append(w.getText() + " "); } } if (triple.hasObject()) { object = triple.getObject().getText(); for (Word w : triple.getObject().getAttributes()) { attr2.append(w.getText() + " "); } } if (triple.hasPredicate()) { predicate = triple.getPredicate().getText(); for (Word w : triple.getPredicate().getAttributes()) { attr3.append(w.getText() + " "); } } wr.write(subject + "\t(" + attr1.toString().trim() + ")\t" + predicate + "\t(" + attr2.toString().trim() + ")\t" + object + "\t(" + attr3.toString().trim() + ")\tTweet: " + s + "\n"); wr.flush(); } } } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(1); } } }