/** * Copyright (c) 2009, Regents of the University of Colorado All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. Redistributions in binary * form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided * with the distribution. Neither the name of the University of Colorado at * Boulder nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package clear.parse; import clear.decode.AbstractDecoder; import clear.decode.OneVsAllDecoder; import clear.dep.DepNode; import clear.dep.DepTree; import clear.dep.srl.SRLArg; import clear.ftr.map.SRLFtrMap; import clear.ftr.xml.FtrToken; import clear.ftr.xml.SRLFtrXml; import clear.util.tuple.JObjectObjectTuple; import com.carrotsearch.hppc.IntArrayList; import java.util.ArrayList; import java.util.regex.Matcher; /** * Shift-eager dependency parser. * * @author Jinho D. Choi <b>Last update:</b> 11/6/2010 */ abstract public class AbstractSRLParser extends AbstractParser { /** * Parse from predicate to the left */ static public final byte DIR_LEFT = -1; /** * Parse from predicate to the right */ static public final byte DIR_RIGHT = +1; /** * Feature templates */ protected SRLFtrXml t_xml; /** * Feature mappings */ protected SRLFtrMap[] t_map; /** * ML decoder */ protected OneVsAllDecoder[] c_dec; /** * Current dependency tree */ protected DepTree d_tree; /** * Index of argument */ protected int i_lambda; /** * Index of predicate */ protected int i_beta; /** * {@link AbstractSRLParser#DIR_LEFT} or {@link AbstractSRLParser#DIR_RIGHT} */ protected byte i_dir; /** * List of all arguments sequence */ protected ArrayList<SRLArg> ls_args; /** * List of core arguments sequence */ protected ArrayList<String> ls_argn; // =============================== Constructors =============================== /** * {@link AbstractSRLParser#FLAG_TRAIN_LEXICON}. */ public AbstractSRLParser(byte flag, String xmlFile) { i_flag = flag; t_xml = new SRLFtrXml(xmlFile); t_map = new SRLFtrMap[2]; for (int i = 0; i < t_map.length; i++) { t_map[i] = new SRLFtrMap(t_xml); } } /** * {@link AbstractSRLParser#FLAG_TRAIN_INSTANCE}. */ public AbstractSRLParser(byte flag, SRLFtrXml xml, String[] lexiconFile) { i_flag = flag; t_xml = xml; t_map = new SRLFtrMap[lexiconFile.length]; for (int i = 0; i < t_map.length; i++) { t_map[i] = new SRLFtrMap(lexiconFile[i]); } initTrainArrays(t_map.length); } /** * {@link AbstractSRLParser#FLAG_PREDICT} or {@link AbstractSRLParser#FLAG_TRAIN_BOOST}. */ public AbstractSRLParser(byte flag, SRLFtrXml xml, SRLFtrMap[] map, AbstractDecoder[] decoder) { i_flag = flag; t_xml = xml; t_map = map; c_dec = new OneVsAllDecoder[decoder.length]; for (int i = 0; i < decoder.length; i++) { c_dec[i] = (OneVsAllDecoder) decoder[i]; } if (flag == FLAG_TRAIN_BOOST) { initTrainArrays(decoder.length); } } // =============================== External methods =============================== public SRLFtrXml getSRLFtrXml() { return t_xml; } public SRLFtrMap[] getSRLFtrMap() { return t_map; } protected SRLFtrMap getFtrMap() { return (i_dir == DIR_LEFT) ? t_map[0] : t_map[1]; } protected OneVsAllDecoder getDecoder() { return (i_dir == DIR_LEFT) ? c_dec[0] : c_dec[1]; } protected JObjectObjectTuple<IntArrayList, ArrayList<int[]>> getTrainArray() { return (i_dir == DIR_LEFT) ? a_trans.get(0) : a_trans.get(1); } /** * Adds a label and lexica to {@link AbstractSRLParser#t_map}. */ protected void addTags(String label) { SRLFtrMap map = getFtrMap(); addLexica(map); map.addLabel(label); } /** * Saves tags from {@link AbstractSRLParser#t_map} to * <code>lexiconFile</code>. */ public void saveTags(String[] lexiconFile) { for (int i = 0; i < t_map.length; i++) { t_map[i].save(t_xml, lexiconFile[i]); } } /** * Saves a training instance for argument classification. */ protected void saveInstance(String label, IntArrayList arr) { int index = getFtrMap().labelToIndex(label); if (index < 0) { return; } JObjectObjectTuple<IntArrayList, ArrayList<int[]>> yx; yx = getTrainArray(); yx.o1.add(index); yx.o2.add(arr.toArray()); } // =============================== Lexica =============================== /** * Add n-gram lexica to the feature map. */ protected void addNgramLexica(SRLFtrMap map) { addNgramLexica(t_xml, map); } /** * Adds n-gram features. */ protected void addNgramFeatures(IntArrayList arr, int[] idx, SRLFtrMap tmap) { addNgramFeatures(arr, idx, t_xml, tmap); } /** * @return field retrieved from * <code>token</code> */ @Override protected String getField(FtrToken token) { int index = (token.source == SRLFtrXml.LAMBDA) ? i_lambda : i_beta; index += token.offset; if (!d_tree.isRange(index) || (token.source == SRLFtrXml.LAMBDA && index == i_beta) || (token.source == SRLFtrXml.BETA && index == i_lambda)) { return null; } DepNode node = null; if (token.relation == null) { node = d_tree.get(index); } else if (token.isRelation(SRLFtrXml.R_HD)) { node = d_tree.getHead(index); } else if (token.isRelation(SRLFtrXml.R_LM)) { node = d_tree.getLeftMostDependent(index); } else if (token.isRelation(SRLFtrXml.R_RM)) { node = d_tree.getRightMostDependent(index); } else if (token.isRelation(SRLFtrXml.R_LS)) { node = d_tree.getLeftSibling(index); } else if (token.isRelation(SRLFtrXml.R_RS)) { node = d_tree.getRightSibling(index); } else if (token.isRelation(SRLFtrXml.R_VC)) { node = d_tree.getHighestVC(index); } if (node == null) { return null; } Matcher m; if (token.isField(SRLFtrXml.F_FORM)) { return node.form; } else if (token.isField(SRLFtrXml.F_LEMMA)) { return node.lemma; } else if (token.isField(SRLFtrXml.F_POS)) { return node.pos; } else if (token.isField(SRLFtrXml.F_DEPREL)) { return node.getDeprel(); } else if ((m = SRLFtrXml.P_FEAT.matcher(token.field)).find()) { return node.getFeat(m.group(1)); } else if ((m = SRLFtrXml.P_SUBCAT.matcher(token.field)).find()) { byte idx = Byte.parseByte(m.group(2)); return d_tree.getSubcat(m.group(1), node.id, idx); } else if ((m = SRLFtrXml.P_PATH.matcher(token.field)).find()) { byte idx = Byte.parseByte(m.group(2)); if (node.id > d_tree.size()) { System.out.println(node.toString()); System.out.println(d_tree.toString()); } return d_tree.getPath(m.group(1), node.id, i_beta, idx); } else if ((m = SRLFtrXml.P_ARGN.matcher(token.field)).find()) { int idx = ls_argn.size() - Integer.parseInt(m.group(1)) - 1; return (idx < 0) ? null : ls_argn.get(idx); } // System.err.println("Error: unspecified feature '"+token.field+"'"); return null; } abstract public void parse(DepTree tree); abstract protected void addLexica(SRLFtrMap map); }