AbstractSRLParser.java example

Explorer
dependency-parsing-toolbox-master
- Source
/**
 * Copyright (c) 2009, Regents of the University of Colorado All rights
 * reserved.
 * 
* Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
* Redistributions of source code must retain the above copyright notice, this
 * list of conditions and the following disclaimer. Redistributions in binary
 * form must reproduce the above copyright notice, this list of conditions and
 * the following disclaimer in the documentation and/or other materials provided
 * with the distribution. Neither the name of the University of Colorado at
 * Boulder nor the names of its contributors may be used to endorse or promote
 * products derived from this software without specific prior written
 * permission.
 * 
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
package clear.parse;

import clear.decode.AbstractDecoder;
import clear.decode.OneVsAllDecoder;
import clear.dep.DepNode;
import clear.dep.DepTree;
import clear.dep.srl.SRLArg;
import clear.ftr.map.SRLFtrMap;
import clear.ftr.xml.FtrToken;
import clear.ftr.xml.SRLFtrXml;
import clear.util.tuple.JObjectObjectTuple;
import com.carrotsearch.hppc.IntArrayList;
import java.util.ArrayList;
import java.util.regex.Matcher;

/**
 * Shift-eager dependency parser.
 *
 * @author Jinho D. Choi <b>Last update:</b> 11/6/2010
 */
abstract public class AbstractSRLParser extends AbstractParser {

    /**
     * Parse from predicate to the left
     */
    static public final byte DIR_LEFT = -1;
    /**
     * Parse from predicate to the right
     */
    static public final byte DIR_RIGHT = +1;
    /**
     * Feature templates
     */
    protected SRLFtrXml t_xml;
    /**
     * Feature mappings
     */
    protected SRLFtrMap[] t_map;
    /**
     * ML decoder
     */
    protected OneVsAllDecoder[] c_dec;
    /**
     * Current dependency tree
     */
    protected DepTree d_tree;
    /**
     * Index of argument
     */
    protected int i_lambda;
    /**
     * Index of predicate
     */
    protected int i_beta;
    /**
     * {@link AbstractSRLParser#DIR_LEFT} or {@link AbstractSRLParser#DIR_RIGHT}
     */
    protected byte i_dir;
    /**
     * List of all arguments sequence
     */
    protected ArrayList<SRLArg> ls_args;
    /**
     * List of core arguments sequence
     */
    protected ArrayList<String> ls_argn;

//	=============================== Constructors ===============================
    /**
     * {@link AbstractSRLParser#FLAG_TRAIN_LEXICON}.
     */
    public AbstractSRLParser(byte flag, String xmlFile) {
        i_flag = flag;
        t_xml = new SRLFtrXml(xmlFile);
        t_map = new SRLFtrMap[2];

        for (int i = 0; i < t_map.length; i++) {
            t_map[i] = new SRLFtrMap(t_xml);
        }
    }

    /**
     * {@link AbstractSRLParser#FLAG_TRAIN_INSTANCE}.
     */
    public AbstractSRLParser(byte flag, SRLFtrXml xml, String[] lexiconFile) {
        i_flag = flag;
        t_xml = xml;
        t_map = new SRLFtrMap[lexiconFile.length];

        for (int i = 0; i < t_map.length; i++) {
            t_map[i] = new SRLFtrMap(lexiconFile[i]);
        }

        initTrainArrays(t_map.length);
    }

    /**
     * {@link AbstractSRLParser#FLAG_PREDICT} or {@link AbstractSRLParser#FLAG_TRAIN_BOOST}.
     */
    public AbstractSRLParser(byte flag, SRLFtrXml xml, SRLFtrMap[] map, AbstractDecoder[] decoder) {
        i_flag = flag;
        t_xml = xml;
        t_map = map;
        c_dec = new OneVsAllDecoder[decoder.length];

        for (int i = 0; i < decoder.length; i++) {
            c_dec[i] = (OneVsAllDecoder) decoder[i];
        }

        if (flag == FLAG_TRAIN_BOOST) {
            initTrainArrays(decoder.length);
        }
    }

//	=============================== External methods ===============================
    public SRLFtrXml getSRLFtrXml() {
        return t_xml;
    }

    public SRLFtrMap[] getSRLFtrMap() {
        return t_map;
    }

    protected SRLFtrMap getFtrMap() {
        return (i_dir == DIR_LEFT) ? t_map[0] : t_map[1];
    }

    protected OneVsAllDecoder getDecoder() {
        return (i_dir == DIR_LEFT) ? c_dec[0] : c_dec[1];
    }

    protected JObjectObjectTuple<IntArrayList, ArrayList<int[]>> getTrainArray() {
        return (i_dir == DIR_LEFT) ? a_trans.get(0) : a_trans.get(1);
    }

    /**
     * Adds a label and lexica to {@link AbstractSRLParser#t_map}.
     */
    protected void addTags(String label) {
        SRLFtrMap map = getFtrMap();

        addLexica(map);
        map.addLabel(label);
    }

    /**
     * Saves tags from {@link AbstractSRLParser#t_map} to
     * <code>lexiconFile</code>.
     */
    public void saveTags(String[] lexiconFile) {
        for (int i = 0; i < t_map.length; i++) {
            t_map[i].save(t_xml, lexiconFile[i]);
        }
    }

    /**
     * Saves a training instance for argument classification.
     */
    protected void saveInstance(String label, IntArrayList arr) {
        int index = getFtrMap().labelToIndex(label);
        if (index < 0) {
            return;
        }

        JObjectObjectTuple<IntArrayList, ArrayList<int[]>> yx;
        yx = getTrainArray();

        yx.o1.add(index);
        yx.o2.add(arr.toArray());
    }

//	=============================== Lexica ===============================
    /**
     * Add n-gram lexica to the feature map.
     */
    protected void addNgramLexica(SRLFtrMap map) {
        addNgramLexica(t_xml, map);
    }

    /**
     * Adds n-gram features.
     */
    protected void addNgramFeatures(IntArrayList arr, int[] idx, SRLFtrMap tmap) {
        addNgramFeatures(arr, idx, t_xml, tmap);
    }

    /**
     * @return field retrieved from
     * <code>token</code>
     */
    @Override
    protected String getField(FtrToken token) {
        int index = (token.source == SRLFtrXml.LAMBDA) ? i_lambda : i_beta;
        index += token.offset;

        if (!d_tree.isRange(index) || (token.source == SRLFtrXml.LAMBDA && index == i_beta) || (token.source == SRLFtrXml.BETA && index == i_lambda)) {
            return null;
        }

        DepNode node = null;

        if (token.relation == null) {
            node = d_tree.get(index);
        } else if (token.isRelation(SRLFtrXml.R_HD)) {
            node = d_tree.getHead(index);
        } else if (token.isRelation(SRLFtrXml.R_LM)) {
            node = d_tree.getLeftMostDependent(index);
        } else if (token.isRelation(SRLFtrXml.R_RM)) {
            node = d_tree.getRightMostDependent(index);
        } else if (token.isRelation(SRLFtrXml.R_LS)) {
            node = d_tree.getLeftSibling(index);
        } else if (token.isRelation(SRLFtrXml.R_RS)) {
            node = d_tree.getRightSibling(index);
        } else if (token.isRelation(SRLFtrXml.R_VC)) {
            node = d_tree.getHighestVC(index);
        }

        if (node == null) {
            return null;
        }
        Matcher m;

        if (token.isField(SRLFtrXml.F_FORM)) {
            return node.form;
        } else if (token.isField(SRLFtrXml.F_LEMMA)) {
            return node.lemma;
        } else if (token.isField(SRLFtrXml.F_POS)) {
            return node.pos;
        } else if (token.isField(SRLFtrXml.F_DEPREL)) {
            return node.getDeprel();
        } else if ((m = SRLFtrXml.P_FEAT.matcher(token.field)).find()) {
            return node.getFeat(m.group(1));
        } else if ((m = SRLFtrXml.P_SUBCAT.matcher(token.field)).find()) {
            byte idx = Byte.parseByte(m.group(2));
            return d_tree.getSubcat(m.group(1), node.id, idx);
        } else if ((m = SRLFtrXml.P_PATH.matcher(token.field)).find()) {
            byte idx = Byte.parseByte(m.group(2));
            if (node.id > d_tree.size()) {
                System.out.println(node.toString());
                System.out.println(d_tree.toString());
            }
            return d_tree.getPath(m.group(1), node.id, i_beta, idx);
        } else if ((m = SRLFtrXml.P_ARGN.matcher(token.field)).find()) {
            int idx = ls_argn.size() - Integer.parseInt(m.group(1)) - 1;
            return (idx < 0) ? null : ls_argn.get(idx);
        }

        //	System.err.println("Error: unspecified feature '"+token.field+"'");
        return null;
    }

    abstract public void parse(DepTree tree);

    abstract protected void addLexica(SRLFtrMap map);
}