/** * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.tudarmstadt.ukp.lmf.transform.wordnet; import java.io.File; import java.io.InputStream; import java.util.LinkedList; import net.sf.extjwnl.JWNLException; import net.sf.extjwnl.dictionary.Dictionary; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import de.tudarmstadt.ukp.lmf.model.core.GlobalInformation; import de.tudarmstadt.ukp.lmf.model.core.LexicalResource; import de.tudarmstadt.ukp.lmf.model.core.Lexicon; import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier; /** * * Instance of this class converts * <a href="URL#https://wordnet.princeton.edu/wordnet/">WordNet 3.0</a> * to LMF-format * @author Zijad Maksuti * @author Judith Eckle-Kohler * */ public class WNConverter { private final Dictionary extWordnet; // extWordNet Dictionary protected File dictionaryPath; private final LexicalResource lexicalResource; private InputStream subcatStream; // subcat mapping file private final String dtd_version; private final String resourceVersion; private final Log LOG = LogFactory.getLog(getClass()); /** * Constructs a {@link WNConverter} based on the consumed parameters * @param dictionaryPath the path of the WordNet dictionary files * @param wordNet initialized WordNet's {@link Dictionary} object * @param lexicalResource initialized object of {@link LexicalResource}, which will be filled with WordNet's data * @param resourceVersion Version of this resource * @param dtd_version specifies the version of the .dtd which will be written to lexicalResource * @param exMappingPath path of the file containing manually entered mappings of lexemes and example sentences */ public WNConverter(final File dictionaryPath, final Dictionary wordNet, final LexicalResource lexicalResource, final String resourceVersion, final String dtd) { this.dictionaryPath = dictionaryPath; this.extWordnet = wordNet; this.lexicalResource = lexicalResource; this.resourceVersion = resourceVersion; this.dtd_version = dtd; try { this.subcatStream = getClass().getClassLoader().getResource("WordNetSubcatMappings/wnFrameMapping.txt").openStream(); } catch (Exception e) { LOG.error("Unable to load subcat mapping file. Aborting all operations"); System.exit(1); } } /** @deprecated Use alternative constructor instead! */ @Deprecated public WNConverter(File dictionaryPath, Dictionary wordNet, LexicalResource lexicalResource, String resourceVersion, String dtd, String exMappingPath) { this(dictionaryPath, wordNet, lexicalResource, resourceVersion, dtd); /*try { File exMapping = new File(exMappingPath); } catch (Exception e) { LOG.error( "Unable to load the file containing manually entered mappings of example sentences. Aborting all operations"); System.exit(1); }*/ } /** * Converts the informations provided by the initialized WordNet-{@link Dictionary} instance to LMF-format. <br> * The result of the conversion can be obtained by calling {@link WNConverter#getLexicalResource()} */ public void toLMF() { try { LOG.info("Started converting WordNet to LMF..."); SubcategorizationFrameExtractor subcategorizationFrameExtractor = new SubcategorizationFrameExtractor(subcatStream); // Setting attributes of LexicalResource lexicalResource.setName("WordNet"); lexicalResource.setDtdVersion(dtd_version); // *** Setting GlobalInformation *** // GlobalInformation globalInformation = new GlobalInformation(); globalInformation.setLabel("LMF representation of WordNet 3.0"); lexicalResource.setGlobalInformation(globalInformation); //*** Setting Lexicon (only one since WordNet is monolingual)***// Lexicon lexicon = new Lexicon(); lexicon.setLanguageIdentifier(ELanguageIdentifier.ENGLISH); lexicon.setId("WN_Lexicon_0"); lexicon.setName("WordNet"); LinkedList<Lexicon> lexicons = new LinkedList<Lexicon>(); lexicons.add(lexicon); lexicalResource.setLexicons(lexicons); // *** Creating Synsets *** // LOG.info("Generating Synsets..."); SynsetGenerator synsetGenerator = new SynsetGenerator(extWordnet, resourceVersion); synsetGenerator.initialize(); // Setting Synsets lexicon.setSynsets(synsetGenerator.getSynsets()); LOG.info("Generating Synsets done"); // *** Creating LexicalEntries *** // LOG.info("Generating LexicalEntries..."); LexicalEntryGenerator lexicalEntryGenerator = new LexicalEntryGenerator(dictionaryPath, extWordnet, synsetGenerator, subcategorizationFrameExtractor, resourceVersion); lexicon.setLexicalEntries(lexicalEntryGenerator.getLexicalEntries()); LOG.info("Generating LexicalEntries done"); // *** Creating SynsetRelations *** // LOG.info("Generating SynsetRelations..."); SynsetRelationGenerator synsetRelationGenerator = new SynsetRelationGenerator(synsetGenerator, lexicalEntryGenerator); // Update the relatios of previously extracted (and generated) Synsets synsetRelationGenerator.updateSynsetRelations(); LOG.info("Generating SynsetRelations done"); // *** Creating RelatedForms of LexicalEntries *** // LOG.info("Generating RelatedForms..."); RelatedFormGenerator relatedFormGenerator = new RelatedFormGenerator(lexicalEntryGenerator); relatedFormGenerator.updateRelatedForms(); LOG.info("Generating RelatedForms done"); // *** Creating SenseRelations *** // LOG.info("Generating SenseRelations..."); SenseRelationGenerator senseRelationGenerator = new SenseRelationGenerator(lexicalEntryGenerator); senseRelationGenerator.updateSenseRelations(); LOG.info("Generating SenseRelations done"); // *** Setting SubcategorizationFrames ***// lexicon.setSubcategorizationFrames(subcategorizationFrameExtractor.getSubcategorizationFrames()); // setting SemanticPredicates lexicon.setSemanticPredicates(subcategorizationFrameExtractor.getSemanticPredicates()); // setting SynSemCorrespondences lexicon.setSynSemCorrespondences(subcategorizationFrameExtractor.getSynSemCorrespondences()); } catch (JWNLException e) { throw new RuntimeException("UBY-LMF creation failed", e); } } /** * Returns the {@link LexicalResource} object, which contains the results of the conversion * @return an instance of LexicalResource, which contains the results of the conversion */ public LexicalResource getLexicalResource() { return this.lexicalResource; } }