/** * Copyright (c) 2009, Regents of the University of Colorado All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. Redistributions in binary * form must reproduce the above copyright notice, this list of conditions and * the following disclaimer in the documentation and/or other materials provided * with the distribution. Neither the name of the University of Colorado at * Boulder nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package clear.engine; import clear.decode.OneVsAllDecoder; import clear.ftr.map.DepFtrMap; import clear.ftr.map.SRLFtrMap; import clear.ftr.xml.DepFtrXml; import clear.ftr.xml.SRLFtrXml; import clear.parse.*; import clear.reader.AbstractReader; import java.io.*; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.kohsuke.args4j.Option; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * <b>Last update:</b> 12/15/2010 * * @author Jinho D. Choi */ abstract public class AbstractCommon { @Option(name = "-c", usage = "configuration file", required = true, metaVar = "REQUIRED") protected String s_configFile = null; protected final String TAG_COMMON = "common"; protected final String TAG_COMMON_LANGUAGE = "language"; protected final String TAG_COMMON_FORMAT = "format"; protected final String TAG_COMMON_PARSER = "parser"; protected final String TAG_PREDICT = "predict"; protected final String TAG_PREDICT_TOK_MODEL = "tok_model"; protected final String TAG_PREDICT_POS_MODEL = "pos_model"; protected final String TAG_PREDICT_DEP_MODEL = "dep_model"; protected final String TAG_PREDICT_MORPH_DICT = "morph_dict"; static protected final String ENTRY_PARSER = "parser"; static protected final String ENTRY_LEXICA = "lexica"; static protected final String ENTRY_MODEL = "model"; static protected final String ENTRY_FEATURE = "feature"; /** * Language */ protected String s_language = AbstractReader.LANG_EN; /** * Input format */ protected String s_format = AbstractReader.FORMAT_DEP; /** * Dependency parsing algorithm */ protected String s_depParser = AbstractDepParser.ALG_SHIFT_POP; /** * Configuration element */ protected Element e_config; public static PrintStream out = System.out; abstract protected void initElements(); public void init() { DocumentBuilderFactory dFactory = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dFactory.newDocumentBuilder(); Document doc = builder.parse(new File(s_configFile)); e_config = doc.getDocumentElement(); initCommonElements(); initElements(); } catch (ParserConfigurationException | SAXException | IOException e) { e.printStackTrace(); System.exit(1); } } /** * Initializes <common> elements. */ protected void initCommonElements() { Element eCommon = getElement(e_config, TAG_COMMON); Element element; if ((element = getElement(eCommon, TAG_COMMON_LANGUAGE)) != null) { s_language = element.getTextContent().trim(); } if ((element = getElement(eCommon, TAG_COMMON_FORMAT)) != null) { s_format = element.getTextContent().trim(); } if ((element = getElement(eCommon, TAG_COMMON_PARSER)) != null) { s_depParser = element.getTextContent().trim(); } } protected Element getElement(Element parent, String name) { NodeList list = parent.getElementsByTagName(name); return (list.getLength() > 0) ? (Element) list.item(0) : null; } protected AbstractDepParser getDepParser(String modelFile) throws Exception { ZipInputStream zin = new ZipInputStream(new FileInputStream(modelFile)); ZipEntry zEntry; String algorithm = AbstractDepParser.ALG_SHIFT_POP; DepFtrXml xml = null; DepFtrMap map = null; OneVsAllDecoder decoder = null; while ((zEntry = zin.getNextEntry()) != null) { switch (zEntry.getName()) { case ENTRY_FEATURE: { out.println("- loading feature template"); BufferedReader reader = new BufferedReader(new InputStreamReader(zin)); StringBuilder build = new StringBuilder(); String string; while ((string = reader.readLine()) != null) { build.append(string); build.append("\n"); } xml = new DepFtrXml(new ByteArrayInputStream(build.toString().getBytes())); break; } case ENTRY_LEXICA: out.println("- loading lexica"); map = new DepFtrMap(new BufferedReader(new InputStreamReader(zin))); break; case ENTRY_MODEL: out.println("- loading model"); decoder = new OneVsAllDecoder(new BufferedReader(new InputStreamReader(zin))); break; case ENTRY_PARSER: { BufferedReader reader = new BufferedReader(new InputStreamReader(zin)); algorithm = reader.readLine().trim(); break; } } } switch (algorithm) { case AbstractDepParser.ALG_SHIFT_EAGER: return new ShiftEagerParser(AbstractDepParser.FLAG_PREDICT, xml, map, decoder); case AbstractDepParser.ALG_SHIFT_POP: return new ShiftPopParser(AbstractDepParser.FLAG_PREDICT, xml, map, decoder); } return null; } protected AbstractSRLParser getSRLabeler(String modelFile) throws Exception { ZipInputStream zin = new ZipInputStream(new FileInputStream(modelFile)); ZipEntry zEntry; String entry; SRLFtrXml xml = null; SRLFtrMap[] map = new SRLFtrMap[2]; OneVsAllDecoder[] decoder = new OneVsAllDecoder[2]; while ((zEntry = zin.getNextEntry()) != null) { if (zEntry.getName().equals(ENTRY_FEATURE)) { out.println("- loading feature template"); BufferedReader reader = new BufferedReader(new InputStreamReader(zin)); StringBuilder build = new StringBuilder(); String string; while ((string = reader.readLine()) != null) { build.append(string); build.append("\n"); } xml = new SRLFtrXml(new ByteArrayInputStream(build.toString().getBytes())); } else if ((entry = zEntry.getName()).startsWith(ENTRY_LEXICA)) { int i = Integer.parseInt(entry.substring(entry.lastIndexOf(".") + 1)); out.println("- loading lexica"); map[i] = new SRLFtrMap(new BufferedReader(new InputStreamReader(zin))); } else if (zEntry.getName().startsWith(ENTRY_MODEL)) { int i = Integer.parseInt(entry.substring(entry.lastIndexOf(".") + 1)); out.println("- loading model"); decoder[i] = new OneVsAllDecoder(new BufferedReader(new InputStreamReader(zin))); } } AbstractSRLParser labeler = new SRLParser(AbstractSRLParser.FLAG_PREDICT, xml, map, decoder); labeler.setLanguage(s_language); return labeler; } }