package edu.stanford.nlp.patterns.dep; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.patterns.CandidatePhrase; import edu.stanford.nlp.patterns.DataInstance; import edu.stanford.nlp.patterns.PatternFactory; import edu.stanford.nlp.patterns.PatternsAnnotations; import edu.stanford.nlp.patterns.surface.Token; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.trees.GrammaticalRelation; import edu.stanford.nlp.util.ArgumentParser; import edu.stanford.nlp.util.Pair; import java.util.*; import java.util.regex.Pattern; /** * Created by Sonal Gupta on 10/31/14. */ public class DepPatternFactory extends PatternFactory{ @ArgumentParser.Option(name="ignoreRels") static String ignoreRels = ""; @ArgumentParser.Option(name="upDepth") static int upDepth = 2; @ArgumentParser.Option(name="allowedTagsForTrigger") static String allowedTagsForTrigger = ".*"; static Set<Pattern> allowedTagPatternForTrigger = new HashSet<>(); static Set<GrammaticalRelation> ignoreRelsSet = new HashSet<>(); static public void setUp(Properties props){ ArgumentParser.fillOptions(DepPatternFactory.class, props); ArgumentParser.fillOptions(PatternFactory.class, props); for(String s: ignoreRels.split("[,;]")){ ignoreRelsSet.add(GrammaticalRelation.valueOf(s)); } for(String s: allowedTagsForTrigger.split("[,;]")){ allowedTagPatternForTrigger.add(Pattern.compile(s)); } } public static Map<Integer, Set<DepPattern>> getPatternsAroundTokens(DataInstance sent, Set<CandidatePhrase> stopWords) { return getPatternsForAllPhrases(sent, stopWords); } static Map<Integer, Set<DepPattern>> getPatternsForAllPhrases(DataInstance sent, Set<CandidatePhrase> commonWords) { SemanticGraph graph = ((DataInstanceDep)sent).getGraph(); Map<Integer, Set<DepPattern>> pats4Sent = new HashMap<>(); if (graph == null || graph.isEmpty()){ System.out.println("graph is empty or null!"); return null; } Set<IndexedWord> allNodes; try { allNodes = graph.getLeafVertices(); } catch (IllegalArgumentException i) { return null; } for (IndexedWord w : allNodes) { //because index starts at 1!!!! pats4Sent.put(w.index() -1, getContext(w, graph, commonWords, sent)); } return pats4Sent; } static public DepPattern patternToDepPattern(Pair<IndexedWord, GrammaticalRelation> p, DataInstance sent) { Token token = new Token(PatternFactory.PatternType.DEP); CoreLabel backingLabel = sent.getTokens().get(p.first().index() -1); assert backingLabel.containsKey(PatternsAnnotations.ProcessedTextAnnotation.class) : "the keyset are " + backingLabel.toString(CoreLabel.OutputFormat.ALL); token.addORRestriction(PatternsAnnotations.ProcessedTextAnnotation.class, backingLabel.get(PatternsAnnotations.ProcessedTextAnnotation.class)); return new DepPattern(token, p.second()); } private static boolean ifIgnoreRel(GrammaticalRelation rel) { if(ignoreRelsSet.contains(rel)) return true; else return false; } static Set<DepPattern> getContext(IndexedWord w, SemanticGraph graph, Set<CandidatePhrase> stopWords, DataInstance sent){ Set<DepPattern> patterns = new HashSet<>(); IndexedWord node = w; int depth = 1; while (depth <= upDepth) { IndexedWord parent = graph.getParent(node); if (parent == null) break; GrammaticalRelation rel = graph.reln(parent, node); for (Pattern tagPattern : allowedTagPatternForTrigger) { if (tagPattern.matcher(parent.tag()).matches()) { if (!ifIgnoreRel(rel) && !stopWords.contains(CandidatePhrase.createOrGet(parent.word())) && parent.word().length() > 1) { Pair<IndexedWord, GrammaticalRelation> pattern = new Pair<>(parent, rel); DepPattern patterndep = patternToDepPattern(pattern, sent); if (depth <= upDepth){ patterns.add(patterndep); } // if (depth <= maxDepth) { // Counter<String> phrasesForPattern = phrasesForPatternForSent.get(patternStr); // if (phrasesForPattern == null) // phrasesForPattern = new ClassicCounter<String>(); // phrasesForPattern.incrementCount(phrase); // phrasesForPatternForSent.put(patternStr, phrasesForPattern); // } // if (DEBUG >= 1) // System.out.println("for phrase " + phrase + " pattern is " + patternStr); } } } node = parent; depth++; } return patterns; } public static Set getContext(DataInstance sent, int i, Set<CandidatePhrase> stopWords) { SemanticGraph graph = ((DataInstanceDep)sent).getGraph(); //nodes are indexed from 1 -- so wrong!! try{ IndexedWord w = graph.getNodeByIndex(i+1); return getContext(w, graph, stopWords, sent);}catch(IllegalArgumentException e){ return Collections.emptySet(); } } }