package edu.stanford.nlp.trees.tregex.tsurgeon;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* @author Roger Levy (rog@stanford.edu)
*/
class RelabelNode extends TsurgeonPattern {
// Overly complicated pattern to identify regexes surrounded by /,
// possibly with / escaped inside the regex.
// The purpose of the [^/]*[^/\\\\] is to match characters that
// aren't / and to allow escaping of other characters.
// The purpose of the \\\\/ is to allow escaped / inside the pattern.
// The purpose of the \\\\\\\\ is to allow escaped \ at the end of
// the pattern, so you can match, for example, /\\/. There need to
// be 8x\ because both java and regexes need escaping, resulting in 4x.
static final String regexPatternString =
"((?:(?:[^/]*[^/\\\\])|\\\\/)*(?:\\\\\\\\)*)";
static final Pattern regexPattern =
Pattern.compile("/" + regexPatternString + "/");
/**
* This pattern finds relabel snippets that use a named node.
*/
static final String nodePatternString = "(=\\{[a-zA-Z0-9_]+\\})";
static final Pattern nodePattern = Pattern.compile(nodePatternString);
/**
* This pattern finds relabel snippets that use a captured variable.
*/
static final String variablePatternString = "(%\\{[a-zA-Z0-9_]+\\})";
static final Pattern variablePattern =
Pattern.compile(variablePatternString);
/**
* Finds one chunk of a general relabel operation, either named node
* or captured variable
*/
static final String oneGeneralReplacement =
("(" + nodePatternString + "|" + variablePatternString + ")");
static final Pattern oneGeneralReplacementPattern =
Pattern.compile(oneGeneralReplacement);
/**
* Identifies a node using the regex replacement strategy.
*/
static final Pattern substPattern =
Pattern.compile("/" + regexPatternString + "/(.*)/");
enum RelabelMode { FIXED, REGEX };
private final RelabelMode mode;
private final String newLabel;
private final Pattern labelRegex;
private final String replacementString;
private final List<String> replacementPieces;
public RelabelNode(TsurgeonPattern child, String newLabel) {
super("relabel", new TsurgeonPattern[] { child });
Matcher m1 = substPattern.matcher(newLabel);
if (m1.matches()) {
mode = RelabelMode.REGEX;
this.labelRegex = Pattern.compile(m1.group(1));
this.replacementString = m1.group(2);
replacementPieces = new ArrayList<>();
Matcher generalMatcher =
oneGeneralReplacementPattern.matcher(m1.group(2));
int lastPosition = 0;
while (generalMatcher.find()) {
if (generalMatcher.start() > lastPosition) {
replacementPieces.add(replacementString.substring(lastPosition, generalMatcher.start()));
}
lastPosition = generalMatcher.end();
String piece = generalMatcher.group();
if (piece.equals(""))
continue;
replacementPieces.add(generalMatcher.group());
}
if (lastPosition < replacementString.length()) {
replacementPieces.add(replacementString.substring(lastPosition));
}
this.newLabel = null;
} else {
mode = RelabelMode.FIXED;
Matcher m2 = regexPattern.matcher(newLabel);
if (m2.matches()) {
// fixed relabel but surrounded by regex slashes
String unescapedLabel = m2.group(1);
this.newLabel = removeEscapeSlashes(unescapedLabel);
} else {
// just a node name to relabel to
this.newLabel = newLabel;
}
this.replacementString = null;
this.replacementPieces = null;
this.labelRegex = null;
}
}
private static String removeEscapeSlashes(String in) {
StringBuilder out = new StringBuilder();
int len = in.length();
boolean lastIsBackslash = false;
for (int i = 0; i < len; i++) {
char ch = in.charAt(i);
if (ch == '\\') {
if (lastIsBackslash || i == len - 1 ) {
out.append(ch);
lastIsBackslash = false;
} else {
lastIsBackslash = true;
}
} else {
out.append(ch);
lastIsBackslash = false;
}
}
return out.toString();
}
@Override
public TsurgeonMatcher matcher(Map<String,Tree> newNodeNames, CoindexationGenerator coindexer) {
return new RelabelMatcher(newNodeNames, coindexer);
}
private class RelabelMatcher extends TsurgeonMatcher {
public RelabelMatcher(Map<String,Tree> newNodeNames, CoindexationGenerator coindexer) {
super(RelabelNode.this, newNodeNames, coindexer);
}
@Override
public Tree evaluate(Tree tree, TregexMatcher tregex) {
Tree nodeToRelabel = childMatcher[0].evaluate(tree, tregex);
switch (mode) {
case FIXED: {
nodeToRelabel.label().setValue(newLabel);
break;
}
case REGEX: {
Matcher m = labelRegex.matcher(nodeToRelabel.label().value());
StringBuilder label = new StringBuilder();
for (String chunk : replacementPieces) {
if (variablePattern.matcher(chunk).matches()) {
String name = chunk.substring(2, chunk.length() - 1);
label.append(Matcher.quoteReplacement(tregex.getVariableString(name)));
} else if (nodePattern.matcher(chunk).matches()) {
String name = chunk.substring(2, chunk.length() - 1);
label.append(Matcher.quoteReplacement(tregex.getNode(name).value()));
} else {
label.append(chunk);
}
}
nodeToRelabel.label().setValue(m.replaceAll(label.toString()));
break;
}
default:
throw new AssertionError("Unsupported relabel mode " + mode);
}
return tree;
}
}
@Override
public String toString() {
String result;
switch(mode) {
case FIXED:
return label + '(' + children[0].toString() + ',' + newLabel + ')';
case REGEX:
return label + '(' + children[0].toString() + ',' + labelRegex.toString() + ',' + replacementString + ')';
default:
throw new AssertionError("Unsupported relabel mode " + mode);
}
}
}