package senna; import java.util.ArrayList; import tathya.semantics.Event; import twitter.PreprocessTwitterData; public class NounPhraseExtractor { public ArrayList<String> getNounPhrases(String sennaOutput) { SennaLines sl = new SennaLines(); ArrayList<String> nounPhrases = new ArrayList<String>(); String[] lineArr = sennaOutput.split("\n"); ArrayList<String> words = new ArrayList<String>(); ArrayList<String> chunkerTokens = new ArrayList<String>(); for (String line : lineArr) { String[] tokens = line.trim().split("[ \t]+"); if (tokens.length < 3) continue; words.add(tokens[0].trim()); chunkerTokens.add(tokens[2].trim()); } for (int i = 0; i < chunkerTokens.size(); i++) { if (chunkerTokens.get(i).equalsIgnoreCase("s-np")) { nounPhrases.add(words.get(i)); } else if (chunkerTokens.get(i).equalsIgnoreCase("b-np")) { String np = ""; while (!chunkerTokens.get(i).equalsIgnoreCase("e-np")) { np += (words.get(i) + " "); i++; } np += (words.get(i) + " "); nounPhrases.add(np.trim()); } } return nounPhrases; } public static void main(String[] args) { NounPhraseExtractor npe = new NounPhraseExtractor(); try { PreprocessTwitterData ptd = new PreprocessTwitterData(); RunSenna rs = new RunSenna(); String tweet = "#lazyweb There are a lot of StackOverflow like sites for stats now. What is the best site for asking statistical questions? Quora?"; String cleaned = ptd.cleanText(tweet); System.out.println("cleaned: "+cleaned); for(String s : cleaned.split("\n")){ String sennaOutput = rs.getSennaOutput(s.trim()); ArrayList<String> nPhrases = npe.getNounPhrases(sennaOutput); System.out.println("NPs"); for(String nPhrase : nPhrases){ System.out.println(nPhrase); } } } catch (Exception e) { e.printStackTrace(); } } }