/** * Copyright 2014, Emory University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.emory.clir.clearnlp.component.utils; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.util.HashMap; import java.util.Map; import java.util.Set; import org.tukaani.xz.XZInputStream; import edu.emory.clir.clearnlp.collection.tree.PrefixTree; import edu.emory.clir.clearnlp.component.mode.dep.AbstractDEPParser; import edu.emory.clir.clearnlp.component.mode.dep.DEPConfiguration; import edu.emory.clir.clearnlp.component.mode.dep.DefaultDEPParser; import edu.emory.clir.clearnlp.component.mode.dep.EnglishDEPParser; import edu.emory.clir.clearnlp.component.mode.morph.AbstractMPAnalyzer; import edu.emory.clir.clearnlp.component.mode.morph.DefaultMPAnalyzer; import edu.emory.clir.clearnlp.component.mode.morph.EnglishMPAnalyzer; import edu.emory.clir.clearnlp.component.mode.ner.AbstractNERecognizer; import edu.emory.clir.clearnlp.component.mode.ner.DefaultNERecognizer; import edu.emory.clir.clearnlp.component.mode.ner.EnglishNERecognizer; import edu.emory.clir.clearnlp.component.mode.pos.AbstractPOSTagger; import edu.emory.clir.clearnlp.component.mode.pos.DefaultPOSTagger; import edu.emory.clir.clearnlp.component.mode.pos.EnglishPOSTagger; import edu.emory.clir.clearnlp.component.mode.srl.AbstractSRLabeler; import edu.emory.clir.clearnlp.component.mode.srl.EnglishSRLabeler; import edu.emory.clir.clearnlp.component.mode.srl.SRLConfiguration; import edu.emory.clir.clearnlp.conversion.AbstractC2DConverter; import edu.emory.clir.clearnlp.conversion.EnglishC2DConverter; import edu.emory.clir.clearnlp.conversion.headrule.HeadRuleMap; import edu.emory.clir.clearnlp.ner.NERInfoSet; import edu.emory.clir.clearnlp.tokenization.AbstractTokenizer; import edu.emory.clir.clearnlp.tokenization.EnglishTokenizer; import edu.emory.clir.clearnlp.util.BinUtils; import edu.emory.clir.clearnlp.util.IOUtils; import edu.emory.clir.clearnlp.util.lang.TLanguage; /** * @since 3.0.0 * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ public class NLPUtils { private NLPUtils() {} /** @param in the inputstream for a headrule file. */ static public AbstractC2DConverter getC2DConverter(TLanguage language, InputStream in) { HeadRuleMap headrules = new HeadRuleMap(in); return new EnglishC2DConverter(headrules); } static public AbstractTokenizer getTokenizer(TLanguage language) { return new EnglishTokenizer(); } static public AbstractMPAnalyzer getMPAnalyzer(TLanguage language) { switch (language) { case ENGLISH: return new EnglishMPAnalyzer(); default : return new DefaultMPAnalyzer(); } } static public AbstractPOSTagger getPOSTagger(TLanguage language, ObjectInputStream in) { BinUtils.LOG.info("Loading part-of-speech tagging models.\n"); switch (language) { case ENGLISH: return new EnglishPOSTagger(in); default : return new DefaultPOSTagger(in); } } static public AbstractPOSTagger getPOSTagger(TLanguage language, String modelPath) { return getPOSTagger(language, getObjectInputStream(modelPath)); } static public AbstractDEPParser getDEPParser(TLanguage language, ObjectInputStream in, DEPConfiguration configuration) { BinUtils.LOG.info("Loading dependency parsing models.\n"); switch (language) { case ENGLISH: return new EnglishDEPParser(configuration, in); default : return new DefaultDEPParser(configuration, in); } } static public AbstractDEPParser getDEPParser(TLanguage language, String modelPath, DEPConfiguration configuration) { return getDEPParser(language, getObjectInputStream(modelPath), configuration); } static public AbstractSRLabeler getSRLabeler(TLanguage language, ObjectInputStream in, SRLConfiguration configuration) { BinUtils.LOG.info("Loading semantic role labeling models.\n"); switch (language) { case ENGLISH: return new EnglishSRLabeler(configuration, in); default : return null; } } static public AbstractSRLabeler getSRLabeler(TLanguage language, String modelPath, SRLConfiguration configuration) { return getSRLabeler(language, getObjectInputStream(modelPath), configuration); } static public AbstractNERecognizer getNERecognizer(TLanguage language, ObjectInputStream in) { BinUtils.LOG.info("Loading named entity recognition models.\n"); switch (language) { case ENGLISH: return new EnglishNERecognizer(in); default : return new DefaultNERecognizer(in); } } static public AbstractNERecognizer getNERecognizer(TLanguage language, String modelPath) { return getNERecognizer(language, getObjectInputStream(modelPath)); } @SuppressWarnings("unchecked") static public PrefixTree<String,NERInfoSet> getNERDictionary(ObjectInputStream in) { BinUtils.LOG.info("Loading named entity gazetteers.\n"); PrefixTree<String,NERInfoSet> tree = null; try { tree = (PrefixTree<String,NERInfoSet>)in.readObject(); } catch (Exception e) {e.printStackTrace();} return tree; } static public PrefixTree<String,NERInfoSet> getNERDictionary(String modelPath) { return getNERDictionary(NLPUtils.getObjectInputStream(modelPath)); } @SuppressWarnings("unchecked") static public Map<String,Set<String>> getDistributionalSemantics(ObjectInputStream in) { BinUtils.LOG.info("Loading distributional semantics.\n"); Map<String,Set<String>> map = null; try { map = (HashMap<String,Set<String>>)in.readObject(); } catch (Exception e) {e.printStackTrace();} return map; } static public Map<String,Set<String>> getDistributionalSemantics(String modelPath) { return getDistributionalSemantics(getObjectInputStream(modelPath)); } static public ObjectInputStream getObjectInputStream(String modelPath) { try { return new ObjectInputStream(new XZInputStream(new BufferedInputStream(IOUtils.getInputStreamsFromClasspath(modelPath)))); } catch (IOException e) {e.printStackTrace();} return null; } }