package edu.stanford.nlp.patterns.surface;
import java.io.Serializable;
import java.util.*;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.patterns.Pattern;
import edu.stanford.nlp.patterns.PatternFactory;
import edu.stanford.nlp.patterns.PatternsAnnotations;
import edu.stanford.nlp.util.*;
/**
* To represent a surface pattern in more detail than TokenSequencePattern (this
* class object is eventually compiled as TokenSequencePattern via the toString
* method). See {@link PatternToken} for more info on how matching of target
* phrases is done.
*
* Author: Sonal Gupta (sonalg@stanford.edu)
*/
public class SurfacePattern extends Pattern implements Serializable, Comparable<SurfacePattern>{
@Override
public CollectionValuedMap<String, String> getRelevantWords() {
CollectionValuedMap<String, String> relwordsThisPat = new CollectionValuedMap<>();
Token[] next = getNextContext();
getRelevantWordsBase(next, relwordsThisPat);
Token[] prev = getPrevContext();
getRelevantWordsBase(prev, relwordsThisPat);
return relwordsThisPat;
}
@Override
public int equalContext(Pattern p) {
return equalContext((SurfacePattern)p);
}
private static final long serialVersionUID = 1L;
public Token[] prevContext;
public Token[] nextContext;
// String prevContextStr = "", nextContextStr = "";
public PatternToken token;
// protected String[] originalPrev;
// protected String[] originalNext;
// protected String originalPrevStr = "";
// protected String originalNextStr = "";
// protected String toString;
protected int hashcode;
protected SurfacePatternFactory.Genre genre;
public SurfacePatternFactory.Genre getGenre() {
return genre;
}
public void setGenre(SurfacePatternFactory.Genre genre) {
this.genre = genre;
}
public SurfacePattern(Token[] prevContext, PatternToken token, Token[] nextContext, SurfacePatternFactory.Genre genre) {
super(PatternFactory.PatternType.SURFACE);
this.setPrevContext(prevContext);
this.setNextContext(nextContext);
this.setToken(token);
this.genre = genre;
hashcode = toString().hashCode();
}
public SurfacePattern copyNewToken(){
return new SurfacePattern(this.prevContext, token.copy(), this.nextContext, genre);
}
public static Token getContextToken(CoreLabel tokenj) {
Token token = new Token(PatternFactory.PatternType.SURFACE);
token.addORRestriction(PatternsAnnotations.ProcessedTextAnnotation.class, tokenj.get(PatternsAnnotations.ProcessedTextAnnotation.class));
return token;
}
// public static String getContextStr(CoreLabel tokenj, boolean useLemmaContextTokens, boolean lowerCaseContext) {
// String str = "";
//
// if (useLemmaContextTokens) {
// String tok = tokenj.lemma();
// if (lowerCaseContext)
// tok = tok.toLowerCase();
// str = "[{lemma:/" + Pattern.quote(tok.replaceAll("/", "\\\\/"))+ "/}] ";
// //str = "[{lemma:/\\Q" + tok.replaceAll("/", "\\\\/") + "\\E/}] ";
// } else {
// String tok = tokenj.word();
// if (lowerCaseContext)
// tok = tok.toLowerCase();
// str = "[{word:/" + Pattern.quote(tok.replaceAll("/", "\\\\/")) + "/}] ";
// //str = "[{word:/\\Q" + tok.replaceAll("/", "\\\\/") + "\\E/}] ";
//
// }
// return str;
// }
public static String getContextStr(String w) {
String str = "[/" + java.util.regex.Pattern.quote(w.replaceAll("/", "\\\\/")) + "/] ";
//String str = "[/\\Q" + w.replaceAll("/", "\\\\/") + "\\E/] ";
return str;
}
public String toString(List<String> notAllowedClasses) {
String prevContextStr = "", nextContextStr = "";
if (prevContext != null)
prevContextStr = StringUtils.join(prevContext, " ");
if (nextContext != null)
nextContextStr = StringUtils.join(nextContext, " ");
return (prevContextStr + " " + getToken().getTokenStr(notAllowedClasses) + " " + nextContextStr).trim();
}
public String toString(String morePreviousPattern, String moreNextPattern, List<String> notAllowedClasses) {
String prevContextStr = "", nextContextStr = "";
if (prevContext != null)
prevContextStr = StringUtils.join(prevContext, " ");
if (nextContext != null)
nextContextStr = StringUtils.join(nextContext, " ");
return (prevContextStr + " " + morePreviousPattern + " " + getToken().getTokenStr(notAllowedClasses) + " " + moreNextPattern + " " + nextContextStr)
.trim();
}
public String getPrevContextStr() {
String prevContextStr = "";
if (prevContext != null)
prevContextStr = StringUtils.join(prevContext, " ");
return prevContextStr;
}
public String getNextContextStr() {
String nextContextStr = "";
if (nextContext != null)
nextContextStr = StringUtils.join(nextContext, " ");
return nextContextStr;
}
// returns 0 is exactly equal, Integer.MAX_VALUE if the contexts are not same.
// If contexts are same : it returns (objects restrictions on the token minus
// p's restrictions on the token). So if returns negative then p has more
// restrictions.
public int equalContext(SurfacePattern p) {
if (p.equals(this))
return 0;
if (Arrays.equals(this.prevContext, p.getPrevContext()) && Arrays.equals(this.nextContext, p.getNextContext())) {
int this_restriction = 0, p_restriction = 0;
if (this.getToken().useTag)
this_restriction++;
if (p.getToken().useTag)
p_restriction++;
if (this.getToken().useNER)
this_restriction++;
if (p.getToken().useNER)
p_restriction++;
if (this.getToken().useTargetParserParentRestriction)
this_restriction++;
if (p.getToken().useTargetParserParentRestriction)
p_restriction++;
this_restriction -= this.getToken().numWordsCompound;
p_restriction -= this.getToken().numWordsCompound;
return this_restriction - p_restriction;
}
return Integer.MAX_VALUE;
}
@Override
public boolean equals(Object b) {
if (!(b instanceof SurfacePattern))
return false;
SurfacePattern p = (SurfacePattern) b;
// if (toString().equals(p.toString()))
if (!token.equals(p.token))
return false;
if ((this.prevContext == null && p.prevContext != null) || (this.prevContext != null && p.prevContext == null))
return false;
if ((this.nextContext == null && p.nextContext != null) || (this.nextContext != null && p.nextContext == null))
return false;
if (this.prevContext != null && !Arrays.equals(this.prevContext, p.prevContext))
return false;
if (this.nextContext != null && !Arrays.equals(this.nextContext, p.nextContext))
return false;
return true;
}
@Override
public int hashCode() {
return hashcode;
}
@Override
public String toString() {
return toString(null);
}
public String toStringToWrite() {
return getPrevContextStr() + "##" + getToken().toStringToWrite() + "##" + getNextContextStr();
}
public String[] getSimplerTokensPrev() {
return getSimplerTokens(prevContext);
}
public String[] getSimplerTokensNext() {
return getSimplerTokens(nextContext);
}
// static Pattern p1 = Pattern.compile(Pattern.quote("[") + "\\s*" + Pattern.quote("{") + "\\s*(lemma|word)\\s*:\\s*/" + Pattern.quote("\\Q") + "(.*)"
// + Pattern.quote("\\E") + "/\\s*" + Pattern.quote("}") + "\\s*" + Pattern.quote("]"));
//
// static Pattern p2 = Pattern.compile(Pattern.quote("[") + "\\s*" + Pattern.quote("{") + "\\s*(.*)\\s*:\\s*(.*)\\s*" + Pattern.quote("}") + "\\s*"
// + Pattern.quote("]"));
public String[] getSimplerTokens(Token[] p){
if (p == null)
return null;
String[] sim = new String[p.length];
for (int i = 0; i < p.length; i++) {
assert p[i] != null : "How is the any one " + Arrays.toString(p) + " null!";
sim[i] = p[i].getSimple();
}
return sim;
}
/*
public String[] getSimplerTokens(String[] p) {
if (p == null)
return null;
String[] sim = new String[p.length];
for (int i = 0; i < p.length; i++) {
assert p[i] != null : "How is the any one " + Arrays.toString(p) + " null!";
if (p1 == null)
throw new RuntimeException("how is p1 null");
Matcher m = p1.matcher(p[i]);
if (m.matches()) {
sim[i] = m.group(2);
} else {
Matcher m2 = p2.matcher(p[i]);
if (m2.matches()) {
sim[i] = m2.group(2);
} else if (p[i].startsWith("$FILLER"))
sim[i] = "FW";
else if (p[i].startsWith("$STOP"))
sim[i] = "SW";
else
throw new RuntimeException("Cannot understand " + p[i]);
}
}
return sim;
}
*/
public String toStringSimple() {
String[] simprev = getSimplerTokensPrev();
String[] simnext = getSimplerTokensNext();
String prevstr = simprev == null ? "" : StringUtils.join(simprev, " ");
String nextstr = simnext == null ? "" : StringUtils.join(simnext, " ");
String sim = prevstr.trim() + " <b>" + getToken().toStringToWrite() + "</b> " + nextstr.trim();
return sim;
}
public Token[] getPrevContext() {
return prevContext;
}
public void setPrevContext(Token[] prevContext) {
this.prevContext = prevContext;
}
public Token[] getNextContext() {
return nextContext;
}
public void setNextContext(Token[] nextContext) {
this.nextContext = nextContext;
}
public PatternToken getToken() {
return token;
}
public void setToken(PatternToken token) {
this.token = token;
}
// private String getOriginalPrevStr() {
// String originalPrevStr = "";
// if (originalPrev != null)
// originalPrevStr = StringUtils.join(originalPrev, " ");
//
// return originalPrevStr;
// }
// public void setOriginalPrevStr(String originalPrevStr) {
// this.originalPrevStr = originalPrevStr;
// }
// public String getOriginalNextStr() {
// String originalNextStr = "";
// if (originalNext != null)
// originalNextStr = StringUtils.join(originalNext, " ");
// return originalNextStr;
// }
// public void setOriginalNextStr(String originalNextStr) {
// this.originalNextStr = originalNextStr;
// }
// public String[] getOriginalPrev() {
// return originalPrev;
// }
//
// public void setOriginalPrev(String[] originalPrev) {
// this.originalPrev = originalPrev;
// }
//
// public String[] getOriginalNext() {
// return originalNext;
// }
//
// public void setOriginalNext(String[] originalNext) {
// this.originalNext = originalNext;
// }
public static boolean sameGenre(SurfacePattern p1, SurfacePattern p2) {
return p1.getGenre().equals(p2.getGenre());
}
/**
* True if array1 contains array2. Also true if both array1 and array2 are
* null
*
* @param array1
* @param array2
* @return
*/
static public boolean subsumesArray(Object[] array1, Object[] array2) {
if ((array1 == null && array2 == null)) {
return true;
}
// only one of them is null
if (array1 == null || array2 == null) {
return false;
}
if (array2.length > array1.length) {
return false;
}
for (int i = 0; i < array1.length; i++) {
if (array1[i].equals(array2[0])) {
boolean found = true;
for (int j = 0; j < array2.length; j++) {
if (array1.length <= i + j || !array2[j].equals(array1[i + j])) {
found = false;
break;
}
}
if (found) {
return true;
}
}
}
return false;
}
/**
* True p1 subsumes p2 (p1 has longer context than p2)
*
* @param p1
* @param p2
* @return
*/
public static boolean subsumes(SurfacePattern p1, SurfacePattern p2) {
if (subsumesArray(p1.getNextContext(), p2.getNextContext()) && subsumesArray(p1.getPrevContext(), p2.getPrevContext())) {
return true;
}
return false;
}
// true if one pattern subsumes another
public static boolean subsumesEitherWay(SurfacePattern p1, SurfacePattern p2) {
if (subsumes(p1, p2) || subsumes(p2, p1)) {
return true;
}
return false;
}
public static boolean sameRestrictions(SurfacePattern p1, SurfacePattern p2) {
PatternToken token1 = p1.token;
PatternToken token2 = p2.token;
if (token1.equals(token2))
return true;
else
return false;
}
@Override
public int compareTo(SurfacePattern o) {
int numthis = this.getPreviousContextLen() + this.getNextContextLen();
int numthat = o.getPreviousContextLen() + o.getNextContextLen();
if (numthis > numthat) {
return -1;
} else if (numthis < numthat) {
return 1;
} else
return this.toString().compareTo(o.toString());
}
public int getPreviousContextLen() {
if (this.prevContext == null)
return 0;
else
return this.prevContext.length;
}
public int getNextContextLen() {
if (this.nextContext == null)
return 0;
else
return this.nextContext.length;
}
public static boolean sameLength(SurfacePattern p1, SurfacePattern p2) {
if (p1.getPreviousContextLen() == p2.getPreviousContextLen() && p1.getNextContextLen() == p2.getNextContextLen())
return true;
else
return false;
}
public void setNumWordsCompound(Integer numWordsCompound) {
token.numWordsCompound = numWordsCompound;
}
// public static SurfacePattern parse(String s) {
// String[] t = s.split("##", -1);
// String prev = t[0];
// PatternToken tok = PatternToken.parse(t[1]);
// String next = t[2];
// return new SurfacePattern(prev, tok, next);
// }
}