/** * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.tudarmstadt.ukp.uby.ubycreate; import java.io.File; import java.io.IOException; import javax.xml.stream.XMLStreamException; import net.sf.extjwnl.JWNLException; import net.sf.extjwnl.dictionary.Dictionary; import org.apache.commons.io.IOUtils; import org.dom4j.DocumentException; import org.xml.sax.SAXException; import de.tudarmstadt.ukp.lmf.model.core.LexicalResource; import de.tudarmstadt.ukp.lmf.transform.DBConfig; import de.tudarmstadt.ukp.lmf.transform.LMFXmlWriter; import de.tudarmstadt.ukp.lmf.transform.XMLToDBTransformer; import de.tudarmstadt.ukp.lmf.transform.wordnet.WNConverter; public class WordNetCreator implements Creator { private static final String dtdPath = "ubyLmfDTD_1.0.dtd"; private static final String dtdVersion = "1_0"; /** * This method converts wordNet into ubyXML,stores it into a temporary file * and then migrates the data from the temporary file into DB * */ @Override public void lexicon2DB(final DBConfig dbConfig, String source) throws IOException, XMLStreamException, SAXException, DocumentException, JWNLException { String lexicalName = "WordNet"; File lmfXML = File.createTempFile("tempfile", ".tmp"); lmfXML = lexicon2XML(source, lmfXML); /* Persisting lmfXML into DB */ XMLToDBTransformer xmlToDB = new XMLToDBTransformer(dbConfig); xmlToDB.transform(lmfXML, lexicalName); System.out.println("DB Operation DONE"); lmfXML.deleteOnExit(); } @Override public File lexicon2XML(String source, File lmfXML) throws IOException, XMLStreamException, SAXException, DocumentException, JWNLException { String lexicalResourceName = "WordNet_3.0_eng"; /* Dumping lexical into a file */ LexicalResource lexicalResource = null; File wnPath = new File(source); Dictionary extWordnet; extWordnet = Dictionary .getInstance(IOUtils .toInputStream("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<jwnl_properties language=\"en\">" + " <version publisher=\"Princeton\" number=\"3.0\" language=\"en\"/>" + " <dictionary class=\"net.sf.extjwnl.dictionary.FileBackedDictionary\">" + " <param name=\"morphological_processor\" value=\"net.sf.extjwnl.dictionary.morph.DefaultMorphologicalProcessor\">" + " <param name=\"operations\">" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupExceptionsOperation\"/>" + " <param value=\"net.sf.extjwnl.dictionary.morph.DetachSuffixesOperation\">" + " <param name=\"noun\" value=\"|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|\"/>" + " <param name=\"verb\" value=\"|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|\"/>" + " <param name=\"adjective\" value=\"|er=|est=|er=e|est=e|\"/>" + " <param name=\"operations\">" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupIndexWordOperation\"/>" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupExceptionsOperation\"/>" + " </param>" + " </param>" + " <param value=\"net.sf.extjwnl.dictionary.morph.TokenizerOperation\">" + " <param name=\"delimiters\">" + " <param value=\" \"/>" + " <param value=\"-\"/>" + " </param>" + " <param name=\"token_operations\">" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupIndexWordOperation\"/>" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupExceptionsOperation\"/>" + " <param value=\"net.sf.extjwnl.dictionary.morph.DetachSuffixesOperation\">" + " <param name=\"noun\" value=\"|s=|ses=s|xes=x|zes=z|ches=ch|shes=sh|men=man|ies=y|\"/>" + " <param name=\"verb\" value=\"|s=|ies=y|es=e|es=|ed=e|ed=|ing=e|ing=|\"/>" + " <param name=\"adjective\" value=\"|er=|est=|er=e|est=e|\"/>" + " <param name=\"operations\">" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupIndexWordOperation\"/>" + " <param value=\"net.sf.extjwnl.dictionary.morph.LookupExceptionsOperation\"/>" + " </param>" + " </param>" + " </param>" + " </param>" + " </param>" + " </param>" + " <param name=\"dictionary_element_factory\"" + " value=\"net.sf.extjwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory\"/>" + " <param name=\"file_manager\" value=\"net.sf.extjwnl.dictionary.file_manager.FileManagerImpl\">" + " <param name=\"file_type\" value=\"net.sf.extjwnl.princeton.file.PrincetonRandomAccessDictionaryFile\">" + " <!--<param name=\"write_princeton_header\" value=\"true\"/>-->" + " <!--<param name=\"encoding\" value=\"UTF-8\"/>-->" + " </param>" + " <!--<param name=\"cache_use_count\" value=\"true\"/>-->" + " <param name=\"dictionary_path\" value=\"" + wnPath.getAbsolutePath() + "\"/>" + " </param>" + " </dictionary>" + " <resource class=\"net.sf.extjwnl.princeton.PrincetonResource\"/>" + "</jwnl_properties>")); WNConverter converterWN = new WNConverter(wnPath, extWordnet, new LexicalResource(), lexicalResourceName, dtdVersion); converterWN.toLMF(); lexicalResource = converterWN.getLexicalResource(); LMFXmlWriter xmlWriter = new LMFXmlWriter(lmfXML.getAbsolutePath(), dtdPath); xmlWriter.writeElement(lexicalResource); xmlWriter.writeEndDocument(); System.out.println("temp file saved: " + lmfXML.getAbsolutePath()); return lmfXML; } }