package edu.stanford.nlp.trees.tregex;
import edu.stanford.nlp.util.Function;
import edu.stanford.nlp.trees.ParentalTreeWrapper;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Pair;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class DescriptionPattern extends TregexPattern {
private final Relation rel;
private final boolean negDesc;
private final Pattern descPattern;
private final String stringDesc;
/** The name to give the matched node */
private final String name;
/** If this pattern is a link, this is the node linked to */
private final String linkedName;
private final boolean isLink;
// todo: conceptually final, but we'd need to rewrite TregexParser
// to make it so.
private TregexPattern child;
// also conceptually final, but it depends on the child
/**
* whether or not this node can change variables. helps determine
* which nodes to change when backtracking
*/
private boolean changesVariables;
private final List<Pair<Integer,String>> variableGroups; // specifies the groups in a regex that are captured as matcher-global string variables
private final Function<String, String> basicCatFunction;
public DescriptionPattern(Relation rel, boolean negDesc, String desc,
String name, boolean useBasicCat,
List<Pair<Integer,String>> variableGroups,
boolean isLink, String linkedName) {
this.rel = rel;
this.negDesc = negDesc;
this.isLink = isLink;
this.linkedName = linkedName;
if (desc != null) {
stringDesc = desc;
if (desc.equals("__")) {
descPattern = Pattern.compile(".*");
} else if (desc.matches("/.*/")) {
descPattern = Pattern.compile(desc.substring(1, desc.length() - 1));
} else { // raw description
descPattern = Pattern.compile("^(" + desc + ")$");
}
} else {
assert name != null;
stringDesc = " ";
descPattern = null;
}
this.name = name;
setChild(null);
this.basicCatFunction = (useBasicCat ? currentBasicCatFunction : null);
// System.out.println("Made " + (negDesc ? "negated " : "") + "DescNode with " + desc);
this.variableGroups = variableGroups;
}
@Override
public String localString() {
return rel.toString() + ' ' + (negDesc ? "!" : "") + (basicCatFunction != null ? "@" : "") + stringDesc + (name == null ? "" : '=' + name);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (isNegated()) {
sb.append('!');
}
if (isOptional()) {
sb.append('?');
}
sb.append(rel.toString());
sb.append(' ');
if (child != null) {
sb.append('(');
}
if (negDesc) {
sb.append('!');
}
if (basicCatFunction != null) {
sb.append('@');
}
sb.append(stringDesc);
if (isLink) {
sb.append("~");
sb.append(linkedName);
}
if (name != null) {
sb.append('=');
sb.append(name);
}
sb.append(' ');
if (child != null) {
sb.append(child.toString());
sb.append(')');
}
return sb.toString();
}
public void setChild(TregexPattern n) {
child = n;
changesVariables = ((descPattern != null || isLink) && name != null);
changesVariables = (changesVariables ||
(child != null && child.getChangesVariables()));
}
@Override
public List<TregexPattern> getChildren() {
if (child == null) {
return Collections.emptyList();
} else {
return Collections.singletonList(child);
}
}
@Override
boolean getChangesVariables() {
return changesVariables;
}
@Override
public TregexMatcher matcher(Tree root, Tree tree,
Map<String, Tree> namesToNodes,
VariableStrings variableStrings) {
return new DescriptionMatcher(this, root, tree, namesToNodes,
variableStrings);
}
// TODO: Why is this a static class with a pointer to the containing
// class? There seems to be no reason for such a thing
private static class DescriptionMatcher extends TregexMatcher {
private Iterator<Tree> treeNodeMatchCandidateIterator;
private final DescriptionPattern myNode;
private TregexMatcher childMatcher; // a DescriptionMatcher only has a single child; if it is the left side of multiple relations, a CoordinationMatcher is used.
private Tree nextTreeNodeMatchCandidate; // the Tree node that this DescriptionMatcher node is trying to match on.
private boolean finished = false; // when finished = true, it means I have exhausted my potential tree node match candidates.
private boolean matchedOnce = false;
private boolean committedVariables = false;
// universal: childMatcher is null if and only if
// myNode.child == null OR resetChild has never been called
public DescriptionMatcher(DescriptionPattern n, Tree root, Tree tree, Map<String, Tree> namesToNodes, VariableStrings variableStrings) {
super(root, tree, namesToNodes,variableStrings);
myNode = n;
resetChildIter();
}
@Override
void resetChildIter() {
decommitVariableGroups();
removeNamedNodes();
treeNodeMatchCandidateIterator = myNode.rel.searchNodeIterator(tree, root);
finished = false;
nextTreeNodeMatchCandidate = null;
if (childMatcher != null) {
// need to tell the children to clean up any preexisting data
childMatcher.resetChildIter();
}
}
private void resetChild() {
if (childMatcher == null) {
if (myNode.child == null) {
matchedOnce = false;
} else {
childMatcher = myNode.child.matcher(root, nextTreeNodeMatchCandidate, namesToNodes,variableStrings);
}
} else {
childMatcher.resetChildIter(nextTreeNodeMatchCandidate);
}
}
@Override
boolean getChangesVariables() {
return myNode.getChangesVariables();
}
/* goes to the next node in the tree that is a successful match to my description pattern.
* This is the hotspot method in running tregex, but not clear how to make it faster. */
// when finished = false; break; is called, it means I successfully matched.
private void goToNextTreeNodeMatch() {
decommitVariableGroups(); // make sure variable groups are free.
removeNamedNodes(); // if we named a node, it should now be unnamed
finished = true;
Matcher m = null;
while (treeNodeMatchCandidateIterator.hasNext()) {
nextTreeNodeMatchCandidate = treeNodeMatchCandidateIterator.next();
if (myNode.descPattern == null) {
// this is a backreference or link
if (myNode.isLink) {
Tree otherTree = namesToNodes.get(myNode.linkedName);
if (otherTree != null) {
String otherValue = myNode.basicCatFunction == null ? otherTree.value() : myNode.basicCatFunction.apply(otherTree.value());
String myValue = myNode.basicCatFunction == null ? nextTreeNodeMatchCandidate.value() : myNode.basicCatFunction.apply(nextTreeNodeMatchCandidate.value());
if (otherValue.equals(myValue)) {
finished = false;
break;
}
}
} else if (namesToNodes.get(myNode.name) == nextTreeNodeMatchCandidate) {
finished = false;
break;
}
} else { // try to match the description pattern.
// cdm: Nov 2006: Check for null label, just make found false
// String value = (myNode.basicCatFunction == null ? nextTreeNodeMatchCandidate.value() : myNode.basicCatFunction.apply(nextTreeNodeMatchCandidate.value()));
// m = myNode.descPattern.matcher(value);
// boolean found = m.find();
boolean found;
String value = nextTreeNodeMatchCandidate.value();
if (value == null) {
found = false;
} else {
if (myNode.basicCatFunction != null) {
value = myNode.basicCatFunction.apply(value);
}
m = myNode.descPattern.matcher(value);
found = m.find();
}
if (found) {
for (Pair<Integer,String> varGroup : myNode.variableGroups) { // if variables have been captured from a regex, they must match any previous matchings
String thisVariable = varGroup.second();
String thisVarString = variableStrings.getString(thisVariable);
if (thisVarString != null && ! thisVarString.equals(m.group(varGroup.first()))) { // failed to match a variable
found = false;
break;
}
}
}
if (found != myNode.negDesc) {
finished = false;
break;
}
}
}
if (!finished) { // I successfully matched.
resetChild(); // reset my unique TregexMatcher child based on the Tree node I successfully matched at.
// cdm bugfix jul 2009: on next line need to check for descPattern not null, or else this is a backreference or a link to an already named node, and the map should _not_ be updated
if ((myNode.descPattern != null || myNode.isLink) && myNode.name != null) {
// note: have to fill in the map as we go for backreferencing
namesToNodes.put(myNode.name, nextTreeNodeMatchCandidate);
}
commitVariableGroups(m); // commit my variable groups.
}
// finished is false exiting this if and only if nextChild exists
// and has a label or backreference that matches
// (also it will just have been reset)
}
private void commitVariableGroups(Matcher m) {
committedVariables = true; // commit all my variable groups.
for(Pair<Integer,String> varGroup : myNode.variableGroups) {
String thisVarString = m.group(varGroup.first());
variableStrings.setVar(varGroup.second(),thisVarString);
}
}
private void decommitVariableGroups() {
if (committedVariables) {
for(Pair<Integer,String> varGroup : myNode.variableGroups) {
variableStrings.unsetVar(varGroup.second());
}
}
committedVariables = false;
}
private void removeNamedNodes() {
if ((myNode.descPattern != null || myNode.isLink) &&
myNode.name != null) {
namesToNodes.remove(myNode.name);
}
}
/* tries to match the unique child of the DescriptionPattern node to a Tree node. Returns "true" if succeeds.*/
private boolean matchChild() {
// entering here (given that it's called only once in matches())
// we know finished is false, and either nextChild == null
// (meaning goToNextChild has not been called) or nextChild exists
// and has a label or backreference that matches
if (nextTreeNodeMatchCandidate == null) { // I haven't been initialized yet, so my child certainly can't be matched yet.
return false;
}
if (childMatcher == null) {
if (!matchedOnce) {
matchedOnce = true;
return true;
}
return false;
}
return childMatcher.matches();
}
// find the next local match
@Override
public boolean matches() {
// this is necessary so that a negated/optional node matches only once
if (finished) {
return false;
}
while (!finished) {
if (matchChild()) {
if (myNode.isNegated()) {
// negated node only has to fail once
finished = true;
return false; // cannot be optional and negated
} else {
if (myNode.isOptional()) {
finished = true;
}
return true;
}
} else {
goToNextTreeNodeMatch();
}
}
if (myNode.isNegated()) { // couldn't match my relation/pattern, so succeeded!
return true;
} else { // couldn't match my relation/pattern, so failed!
decommitVariableGroups();
removeNamedNodes();
nextTreeNodeMatchCandidate = null;
// didn't match, but return true anyway if optional
return myNode.isOptional();
}
}
@Override
public Tree getMatch() {
if (nextTreeNodeMatchCandidate == null)
return null;
if (!(nextTreeNodeMatchCandidate instanceof ParentalTreeWrapper))
throw new AssertionError();
return ((ParentalTreeWrapper) nextTreeNodeMatchCandidate).getBackingTree();
}
} // end class DescriptionMatcher
private static final long serialVersionUID = 1179819056757295757L;
}