/**
* Copyright (c) 2009, Regents of the University of Colorado All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution. Neither the name of the University of Colorado at
* Boulder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package clear.parse;
import clear.decode.AbstractMultiDecoder;
import clear.dep.DepLib;
import clear.dep.DepNode;
import clear.dep.DepTree;
import clear.ftr.map.DepFtrMap;
import clear.ftr.xml.DepFtrXml;
import clear.util.tuple.JIntDoubleTuple;
import com.carrotsearch.hppc.IntArrayList;
/**
* Shift-pop dependency parser.
*
* @author Jinho D. Choi <b>Last update:</b> 4/12/2010
*/
public class ShiftPopParser extends ShiftEagerParser {
/**
* Label of Left-Pop transition
*/
static public final String LB_LEFT_POP = "LP";
private final String LB_LEFT_ARCPOP = LB_LEFT_ARC + "|" + LB_LEFT_POP;
/**
* {@link ShiftPopParser#FLAG_PRINT_TRANSITION} or {@link ShiftPopParser#FLAG_TRAIN_LEXICON}.
*/
public ShiftPopParser(byte flag, String filename) {
super(flag, filename);
}
/**
* {@link ShiftPopParser#FLAG_TRAIN_INSTANCE}.
*/
public ShiftPopParser(byte flag, DepFtrXml xml, String lexiconFile) {
super(flag, xml, lexiconFile);
}
/**
* {@link ShiftPopParser#FLAG_PREDICT} or {@link ShiftPopParser#FLAG_TRAIN_BOOST}.
*/
public ShiftPopParser(byte flag, DepFtrXml xml, DepFtrMap map, AbstractMultiDecoder decoder) {
super(flag, xml, map, decoder);
}
/**
* Parses the dependency tree.
*/
@Override
public void parse(DepTree tree) {
init(tree);
int size = tree.size();
while (i_beta < size) // beta is not empty
{
if (i_lambda == -1) // lambda_1 is empty: deterministic shift
{
shift(true);
continue;
} else if (tree.get(i_lambda).isSkip) {
i_lambda--;
continue;
} else if (i_flag == FLAG_PREDICT) {
predict();
} else if (i_flag == FLAG_TRAIN_BOOST) {
trainBoost();
} else {
train();
}
d_tree.n_trans++;
}
if (i_flag == FLAG_PRINT_TRANSITION) {
f_out.println();
} else if (i_flag == FLAG_PREDICT) {
postProcess(LB_LEFT_ARCPOP, LB_RIGHT_ARC);
} else if (i_flag == FLAG_TRAIN_BOOST) {
postProcessBoost();
}
}
/**
* Trains a dependency tree .
*/
private void train() {
DepNode lambda = d_tree.get(i_lambda);
DepNode beta = d_tree.get(i_beta);
if (lambda.headId == beta.id) {
if (isPop(d_tree)) {
leftPop(lambda, beta, lambda.deprel, 1d);
} else {
leftArc(lambda, beta, lambda.deprel, 1d);
}
} else if (lambda.id == beta.headId) {
rightArc(lambda, beta, beta.deprel, 1d);
} else if (isShift(d_tree)) {
shift(false);
} else {
noArc();
}
}
protected boolean isPop(DepTree tree) {
int i, size = tree.size();
for (i = i_beta + 1; i < size; i++) {
if (tree.get(i).headId == i_lambda) {
return false;
}
}
return true;
}
/**
* Predicts dependencies.
*/
private void predict() {
predictAux(getFeatureArray());
}
private void trainBoost() {
String gLabel = getGoldLabel(d_copy);
IntArrayList ftr = getFeatureArray();
saveInstance(gLabel, ftr);
predictAux(ftr);
}
private String predictAux(IntArrayList ftr) {
JIntDoubleTuple res;
res = c_dec.predict(ftr);
String label = (res.i < 0) ? LB_NO_ARC : t_map.indexToLabel(res.i);
int index = label.indexOf(LB_DELIM);
String trans = (index > 0) ? label.substring(0, index) : label;
String deprel = (index > 0) ? label.substring(index + 1) : "";
DepNode lambda = d_tree.get(i_lambda);
DepNode beta = d_tree.get(i_beta);
if (trans.equals(LB_LEFT_POP) && !d_tree.isAncestor(lambda, beta) && lambda.id != DepLib.ROOT_ID) {
leftPop(lambda, beta, deprel, res.d);
} else if (trans.equals(LB_LEFT_ARC) && !d_tree.isAncestor(lambda, beta) && lambda.id != DepLib.ROOT_ID) {
leftArc(lambda, beta, deprel, res.d);
} else if (trans.equals(LB_RIGHT_ARC) && !d_tree.isAncestor(beta, lambda)) {
rightArc(lambda, beta, deprel, res.d);
} else if (trans.equals(LB_SHIFT)) {
shift(false);
} else {
noArc();
}
return label;
}
private void postProcessBoost() {
int currId, n = d_tree.size();
DepNode curr;
for (currId = 1; currId < n; currId++) {
if (d_tree.get(currId).hasHead) {
continue;
}
curr = d_copy.get(currId);
i_lambda = currId - 1;
i_beta = currId;
if (isShift(d_copy)) {
saveInstance(LB_SHIFT, getFeatureArray());
}
if (currId < curr.headId) {
i_lambda = currId;
i_beta = curr.headId;
} else {
i_lambda = curr.headId;
i_beta = currId;
}
saveInstance(getGoldLabel(d_copy), getFeatureArray());
}
}
private String getGoldLabel(DepTree tree) {
DepNode lambda = tree.get(i_lambda);
DepNode beta = tree.get(i_beta);
if (lambda.headId == beta.id) {
if (isPop(tree)) {
return LB_LEFT_POP + LB_DELIM + lambda.deprel;
} else {
return LB_LEFT_ARC + LB_DELIM + lambda.deprel;
}
} else if (lambda.id == beta.headId) {
return LB_RIGHT_ARC + LB_DELIM + beta.deprel;
} else if (isShift(tree)) {
return LB_SHIFT;
} else {
return LB_NO_ARC;
}
}
protected void leftPop(DepNode lambda, DepNode beta, String deprel, double score) {
String label = LB_LEFT_POP + LB_DELIM + deprel;
trainInstance(label);
lambda.setHead(beta.id, deprel, score);
lambda.isSkip = true;
if (beta.leftMostDep == null || lambda.id < beta.leftMostDep.id) {
beta.leftMostDep = lambda;
}
i_lambda--;
prev_trans.add(label);
if (i_flag == FLAG_PRINT_TRANSITION) {
printTransition("LEFT-POP", lambda.id + " <-" + deprel + "- " + beta.id);
}
}
}