package edu.stanford.nlp.parser.lexparser;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Interner;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.ANY_WORD_INT;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.ANY_TAG_INT;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.STOP_WORD_INT;
import static edu.stanford.nlp.parser.lexparser.IntTaggedWord.STOP_TAG_INT;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.ObjectInputStream;
import java.util.Collection;
import java.util.Map;
/**
* An abstract base class for dependency grammars. The only thing you have
* to implement in a subclass is scoreTB (score a "tag binned" dependency
* in the tagProjection space). A subclass also has to either call
* super() in its constructor, or otherwise initialize the tagBin array.
* The call to initTagBins() (in the constructor) must be made after all
* keys have been entered into tagIndex.
*
* @author Galen Andrew
*/
public abstract class AbstractDependencyGrammar implements DependencyGrammar {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(AbstractDependencyGrammar.class);
protected TagProjection tagProjection;
protected final Index<String> tagIndex;
protected final Index<String> wordIndex;
protected int numTagBins;
protected int[] tagBin;
protected TreebankLanguagePack tlp;
protected boolean directional;
protected boolean useDistance;
protected boolean useCoarseDistance;
protected Lexicon lex;
protected final IntTaggedWord stopTW;
protected final IntTaggedWord wildTW;
protected transient Map<IntDependency,IntDependency> expandDependencyMap = Generics.newHashMap();
private static final boolean DEBUG = false;
protected int[] coarseDistanceBins = {0, 2, 5};
protected int[] regDistanceBins = {0, 1, 5, 10};
protected final Options op;
transient protected Interner<IntTaggedWord> itwInterner =
new Interner<>();
public AbstractDependencyGrammar(TreebankLanguagePack tlp, TagProjection tagProjection, boolean directional, boolean useDistance, boolean useCoarseDistance, Options op, Index<String> wordIndex, Index<String> tagIndex) {
this.tlp = tlp;
this.tagProjection = tagProjection;
this.directional = directional;
this.useDistance = useDistance;
this.useCoarseDistance = useCoarseDistance;
this.op = op;
this.wordIndex = wordIndex;
this.tagIndex = tagIndex;
stopTW = new IntTaggedWord(STOP_WORD_INT, STOP_TAG_INT);
wildTW = new IntTaggedWord(ANY_WORD_INT, ANY_TAG_INT);
initTagBins();
}
public void setLexicon(Lexicon lexicon) {
lex = lexicon;
}
/**
* Default is no-op.
*/
public void tune(Collection<Tree> trees) {
}
public int numTagBins() {
return numTagBins;
}
public int tagBin(int tag) {
if (tag < 0) {
return tag;
} else {
return tagBin[tag];
}
}
public boolean rootTW(IntTaggedWord rTW) {
// System.out.println("rootTW: checking if " + rTW.toString("verbose") +
// " == " + Lexicon.BOUNDARY_TAG + "[" +
// tagIndex.indexOf(Lexicon.BOUNDARY_TAG) + "]" + ": " +
// (rTW.tag == tagIndex.indexOf(Lexicon.BOUNDARY_TAG)));
return rTW.tag == tagIndex.indexOf(Lexicon.BOUNDARY_TAG);
}
protected short valenceBin(int distance) {
if (!useDistance) {
return 0;
}
if (distance < 0) {
return -1;
}
if (distance == 0) {
return 0;
}
return 1;
}
public int numDistBins() {
return useCoarseDistance ? 4 : 5;
}
public short distanceBin(int distance) {
if (!useDistance) {
return 0;
} else if (useCoarseDistance) {
return coarseDistanceBin(distance);
} else {
return regDistanceBin(distance);
}
}
public short regDistanceBin(int distance) {
for(short i=0; i<regDistanceBins.length; ++i)
if (distance <= regDistanceBins[i])
return i;
return (short) regDistanceBins.length;
}
public short coarseDistanceBin(int distance) {
for(short i=0; i<coarseDistanceBins.length; ++i)
if (distance <= coarseDistanceBins[i])
return i;
return (short) coarseDistanceBins.length;
}
void setCoarseDistanceBins(int[] bins) {
assert(bins.length == 3);
coarseDistanceBins = bins;
}
void setRegDistanceBins(int[] bins) {
assert(bins.length == 4);
regDistanceBins = bins;
}
protected void initTagBins() {
Index<String> tagBinIndex = new HashIndex<>();
if (DEBUG) {
log.info();
log.info("There are " + tagIndex.size() + " tags.");
}
tagBin = new int[tagIndex.size()];
for (int t = 0; t < tagBin.length; t++) {
String tagStr = tagIndex.get(t);
String binStr;
if (tagProjection == null) {
binStr = tagStr;
} else {
binStr = tagProjection.project(tagStr);
}
tagBin[t] = tagBinIndex.addToIndex(binStr);
if (DEBUG) {
log.info("initTagBins: Mapped " + tagStr + " (" + t +
") to " + binStr + " (" + tagBin[t] + ")");
}
}
numTagBins = tagBinIndex.size();
if (DEBUG) {
log.info("initTagBins: tags " + tagBin.length + " bins " +
numTagBins);
log.info("tagBins: " + tagBinIndex);
}
}
public double score(IntDependency dependency) {
return scoreTB(dependency.head.word, tagBin(dependency.head.tag), dependency.arg.word, tagBin(dependency.arg.tag), dependency.leftHeaded, dependency.distance);
}
// currently unused
public double score(int headWord, int headTag, int argWord, int argTag, boolean leftHeaded, int dist) {
IntDependency tempDependency = new IntDependency(headWord, headTag, argWord, argTag, leftHeaded, dist);
return score(tempDependency); // this method tag bins
}
public double scoreTB(int headWord, int headTag, int argWord, int argTag, boolean leftHeaded, int dist) {
IntDependency tempDependency = new IntDependency(headWord, headTag, argWord, argTag, leftHeaded, dist);
return scoreTB(tempDependency);
}
private void readObject(ObjectInputStream ois)
throws IOException, ClassNotFoundException
{
ois.defaultReadObject();
// reinitialize the transient objects
itwInterner = new Interner<>();
}
/**
* Default is to throw exception.
* @throws IOException
*/
public void readData(BufferedReader in) throws IOException {
throw new UnsupportedOperationException();
}
/**
* Default is to throw exception.
* @throws IOException
*/
public void writeData(PrintWriter out) throws IOException {
throw new UnsupportedOperationException();
}
/**
* This is a custom interner that simultaneously creates and interns
* an IntDependency.
*
* @return An interned IntDependency
*/
protected IntDependency intern(IntTaggedWord headTW, IntTaggedWord argTW, boolean leftHeaded, short dist) {
Map<IntDependency,IntDependency> map = expandDependencyMap;
IntDependency internTempDependency = new IntDependency(itwInterner.intern(headTW), itwInterner.intern(argTW), leftHeaded, dist);
IntDependency returnDependency = internTempDependency;
if (map != null) {
returnDependency = map.get(internTempDependency);
if (returnDependency == null) {
map.put(internTempDependency, internTempDependency);
returnDependency = internTempDependency;
}
}
return returnDependency;
}
private static final long serialVersionUID = 3L;
}