package edu.isistan.uima.unified.analysisengines.wordnet; import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.StringWriter; import java.util.HashMap; import java.util.Locale; import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.SAXException; import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.Adjective; import net.didion.jwnl.data.FileDictionaryElementFactory; import net.didion.jwnl.data.POS; import net.didion.jwnl.data.PointerType; import net.didion.jwnl.data.VerbFrame; import net.didion.jwnl.dictionary.Dictionary; import net.didion.jwnl.dictionary.FileBackedDictionary; import net.didion.jwnl.dictionary.MorphologicalProcessor; import net.didion.jwnl.dictionary.file_manager.FileManager; import net.didion.jwnl.dictionary.file_manager.FileManagerImpl; import net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor; import net.didion.jwnl.dictionary.morph.DetachSuffixesOperation; import net.didion.jwnl.dictionary.morph.LookupExceptionsOperation; import net.didion.jwnl.dictionary.morph.LookupIndexWordOperation; import net.didion.jwnl.dictionary.morph.Operation; import net.didion.jwnl.dictionary.morph.TokenizerOperation; import net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory; import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile; import net.didion.jwnl.util.ResourceBundleSet; public class JWNLInitialization { private static Dictionary dictionary; private static MorphologicalProcessor morphologicalProcessor; public static boolean isInit() { return JWNL.isInitialized(); } public static void init(String jwnlPath, String wordnetPath) throws Exception { //initManual(wordnetPath); initAutomatic(jwnlPath, wordnetPath); dictionary = Dictionary.getInstance(); morphologicalProcessor = dictionary.getMorphologicalProcessor(); } @SuppressWarnings({ "rawtypes", "unchecked", "unused"}) private static void initManual(String wordnetPath) throws IOException { ResourceBundleSet bundle = (ResourceBundleSet) JWNL.getResourceBundle(); bundle.setLocale(new Locale("en", "")); bundle.addResource("PrincetonResource"); PointerType.initialize(); Adjective.initialize(); VerbFrame.initialize(); Map suffixMap = new HashMap(); suffixMap.put(POS.NOUN, new String[][] { { "s", "" }, { "ses", "s" }, { "xes", "x" }, { "zes", "z" }, { "ches", "ch" }, { "shes", "sh" }, { "men", "man" }, { "ies", "y" } }); suffixMap.put(POS.VERB, new String[][] { { "s", "" }, { "ies", "y" }, { "es", "e" }, { "es", "" }, { "ed", "e" }, { "ed", "" }, { "ing", "e" }, { "ing", "" } }); suffixMap.put(POS.ADJECTIVE, new String[][] { { "er", "" }, { "est", "" }, { "er", "e" }, { "est", "e" } }); DetachSuffixesOperation tokDso = new DetachSuffixesOperation(suffixMap); tokDso.addDelegate(DetachSuffixesOperation.OPERATIONS, new Operation[] { new LookupIndexWordOperation(), new LookupExceptionsOperation() }); TokenizerOperation tokOp = new TokenizerOperation(new String[] { " ", "-" }); tokOp.addDelegate(TokenizerOperation.TOKEN_OPERATIONS, new Operation[] { new LookupIndexWordOperation(), new LookupExceptionsOperation(), tokDso }); DetachSuffixesOperation morphDso = new DetachSuffixesOperation(suffixMap); morphDso.addDelegate(DetachSuffixesOperation.OPERATIONS, new Operation[] { new LookupIndexWordOperation(), new LookupExceptionsOperation() }); Operation[] operations = { new LookupExceptionsOperation(), morphDso, tokOp }; morphologicalProcessor = new DefaultMorphologicalProcessor(operations); FileManager manager = new FileManagerImpl(wordnetPath, PrincetonRandomAccessDictionaryFile.class); FileDictionaryElementFactory factory = new PrincetonWN17FileDictionaryElementFactory(); FileBackedDictionary.install(manager, morphologicalProcessor, factory, true); } private static void initAutomatic(String jwnlPath, String wordnetPath) throws SAXException, IOException, ParserConfigurationException, XPathExpressionException, JWNLException, TransformerConfigurationException, TransformerException, TransformerFactoryConfigurationError { InputStream propertiesStream = new FileInputStream(jwnlPath); DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance(); fact.setNamespaceAware(true); DocumentBuilder parser = fact.newDocumentBuilder(); Document doc = parser.parse(propertiesStream); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); XPathExpression expr = xpath.compile("//jwnl_properties/dictionary/param[@name='file_manager']/param[@name='dictionary_path']"); Object result = expr.evaluate(doc, XPathConstants.NODE); Node node = (Node) result; Element param = (Element) node; Attr value = param.getAttributeNode("value"); value.setValue(wordnetPath); StringWriter out = new StringWriter(); TransformerFactory.newInstance().newTransformer().transform(new DOMSource(doc), new StreamResult(out)); InputStream modifiedPropertiesStream = new ByteArrayInputStream(out.toString().getBytes()); JWNL.initialize(modifiedPropertiesStream); } public static Dictionary getDictionary() { return dictionary; } public static MorphologicalProcessor getMorphologicalProcessor() { return morphologicalProcessor; } public static void main(String[] args) throws Exception { JWNLInitialization.init(System.getenv("MODELS_PATH") + "jwnl/jwnl-properties.xml", System.getenv("MODELS_PATH") + "wordnet/win/2.0/dict/"); } }