package edu.stanford.nlp.patterns.surface;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.patterns.ConstantsAndVariables;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
/**
* Class to represent a target phrase. Note that you can give additional negative constraints
* in getTokenStr(List) but those are not used by toString, hashCode and equals functions
*
* Author: Sonal Gupta (sonalg@stanford.edu)
*/
public class PatternToken implements Serializable {
private static final long serialVersionUID = 1L;
String tag;
boolean useTag;
int numWordsCompound;
boolean useNER = false;
String nerTag = null;
boolean useTargetParserParentRestriction = false;
String grandparentParseTag;
public PatternToken(String tag, boolean useTag, boolean getCompoundPhrases,
int numWordsCompound, String nerTag, boolean useNER,
boolean useTargetParserParentRestriction, String grandparentParseTag) {
if(useNER && nerTag == null){
throw new RuntimeException("NER tag is null and using NER restriction is true. Check your data.");
}
this.tag = tag;
this.useTag = useTag;
this.numWordsCompound = numWordsCompound;
if (!getCompoundPhrases)
this.numWordsCompound = 1;
this.nerTag = nerTag;
this.useNER = useNER;
this.useTargetParserParentRestriction = useTargetParserParentRestriction;
if(useTargetParserParentRestriction){
if(grandparentParseTag == null){
Redwood.log(ConstantsAndVariables.extremedebug,"Grand parent parse tag null ");
this.grandparentParseTag = "null";
}
else
this.grandparentParseTag = grandparentParseTag;
}
}
// static public PatternToken parse(String str) {
// String[] t = str.split("#");
// String tag = t[0];
// boolean usetag = Boolean.parseBoolean(t[1]);
// int num = Integer.parseInt(t[2]);
// boolean useNER = false;
// String ner = "";
// if(t.length > 3){
// useNER = true;
// ner = t[4];
// }
//
// return new PatternToken(tag, usetag, true, num, ner, useNER);
// }
public String toStringToWrite() {
String s = "X";
if (useTag)
s += ":" + tag;
if (useNER)
s += ":" + nerTag;
if (useTargetParserParentRestriction)
s += ":" + grandparentParseTag;
// if(notAllowedClasses !=null && notAllowedClasses.size() > 0){
// s+= ":!(";
// s+= StringUtils.join(notAllowedClasses,"|")+")";
// }
if (numWordsCompound > 1)
s += "{" + numWordsCompound + "}";
return s;
}
String getTokenStr(List<String> notAllowedClasses) {
String str = " (?$term ";
List<String> restrictions = new ArrayList<>();
if (useTag) {
restrictions.add("{tag:/" + tag + ".*/}");
}
if (useNER) {
restrictions.add("{ner:" + nerTag + "}");
}
if (useTargetParserParentRestriction) {
restrictions.add("{grandparentparsetag:\"" + grandparentParseTag + "\"}");
}
if (notAllowedClasses != null && notAllowedClasses.size() > 0) {
for (String na : notAllowedClasses)
restrictions.add("!{" + na + ":" + na +"}");
}
str += "[" + StringUtils.join(restrictions, " & ") + "]{1,"
+ numWordsCompound + "}";
str += ")";
str = StringUtils.toAscii(str);
return str;
}
@Override
public boolean equals(Object b) {
if (!(b instanceof PatternToken))
return false;
PatternToken t = (PatternToken) b;
if(this.useNER != t.useNER || this.useTag != t.useTag || this.useTargetParserParentRestriction != t.useTargetParserParentRestriction || this.numWordsCompound != t.numWordsCompound)
return false;
if (useTag && ! this.tag.equals(t.tag)) {
return false;
}
if (useNER && ! this.nerTag.equals(t.nerTag)){
return false;
}
if (useTargetParserParentRestriction && ! this.grandparentParseTag.equals(t.grandparentParseTag))
return false;
return true;
}
@Override
public int hashCode() {
return getTokenStr(null).hashCode();
}
public PatternToken copy() {
PatternToken t = new PatternToken(tag, useTag, numWordsCompound > 1, numWordsCompound, nerTag, useNER, useTargetParserParentRestriction, grandparentParseTag);
return t;
}
}