/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.omegawiki; import java.io.UnsupportedEncodingException; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import de.tudarmstadt.ukp.lmf.model.core.Definition; import de.tudarmstadt.ukp.lmf.model.core.TextRepresentation; import de.tudarmstadt.ukp.lmf.model.semantics.MonolingualExternalRef; import de.tudarmstadt.ukp.lmf.model.semantics.Synset; import de.tudarmstadt.ukp.omegawiki.api.DefinedMeaning; import de.tudarmstadt.ukp.omegawiki.api.OmegaWiki; import de.tudarmstadt.ukp.omegawiki.api.TranslatedContent; import de.tudarmstadt.ukp.omegawiki.exception.OmegaWikiException; /** * This class generates Synset and all it's underlying children * * @author matuschek * */ public class SynsetGenerator { public final static String DEFINED_MEANING = "definedMeaning"; private final String resourceVersion; private int GlobalLanguage; private String GlobalLanguageLMF; private final OmegaWiki omegawiki; // Omegawiki Dictionary private int LMFSynsetNumber = 0; // This is the running number used for creating IDs of LMFSynsets // Mappings between LMF-Synsets and OW-DefinedMeaning private final HashMap<de.tudarmstadt.ukp.lmf.model.semantics.Synset, DefinedMeaning> LMFSynsetOWSynsetMappings = new HashMap<de.tudarmstadt.ukp.lmf.model.semantics.Synset, DefinedMeaning>(); // Mappings between OW-Synsets and LMF-DefinedMeaning private static HashMap<DefinedMeaning, de.tudarmstadt.ukp.lmf.model.semantics.Synset> OWSynsetLMFSynsetMappings = new HashMap<DefinedMeaning, de.tudarmstadt.ukp.lmf.model.semantics.Synset>(); private boolean initialized = false; // true only if SynsetGenerator is already initialized /** * This Method Constructs a SynsetGenerator based on consumed OmegaWiki Dictionary * @param omegawiki OmegaWiki Dictionary * @param language * @param resourceVersion Version of the resource * @return SynsetGenerator */ public SynsetGenerator(OmegaWiki omegawiki, int language, String resourceVersion){ this.GlobalLanguage = language; this.GlobalLanguageLMF = OmegaWikiLMFMap.mapLanguage(language); this.resourceVersion = resourceVersion; this.omegawiki = omegawiki; } /** * This method initializes the SynsetGenerator * @throws OmegaWikiException * @throws UnsupportedEncodingException */ public void initialize() throws UnsupportedEncodingException, OmegaWikiException { if(!initialized){ //Iterate over all DefinedMeanings in the specified language Iterator<DefinedMeaning> dmIter = null; int overall = 0; int current = 0; try { dmIter = omegawiki.getAllDefinedMeanings(this.GlobalLanguage).iterator(); overall = omegawiki.getAllDefinedMeanings(this.GlobalLanguage).size(); } catch (OmegaWikiException e) { e.printStackTrace(); } while(dmIter.hasNext() ) {//&& i++<=100 ){ DefinedMeaning dm = dmIter.next(); if(current++ % 1000 == 0) { System.out.println("Generating Synsets... " + ((current * 100) / overall) + "%"); } Synset lmfSynset = new Synset(); lmfSynset.setId(getNewID()); LMFSynsetOWSynsetMappings.put(lmfSynset, dm); OWSynsetLMFSynsetMappings.put(dm, lmfSynset); // Generating Definition(s) of the Synset Definition definition = new Definition(); TextRepresentation textRepresentation = new TextRepresentation(); textRepresentation.setLanguageIdentifier(OmegaWikiLMFMap.mapLanguage(GlobalLanguage)); if(dm.getGlosses(GlobalLanguage).size()>0) { textRepresentation.setWrittenText(((TranslatedContent)dm.getGlosses(GlobalLanguage).toArray()[0]).getGloss()); } List<TextRepresentation> textRepresentations = new LinkedList<TextRepresentation>(); textRepresentations.add(textRepresentation); definition.setTextRepresentations(textRepresentations); List<Definition> definitions = new LinkedList<Definition>(); definitions.add(definition); lmfSynset.setDefinitions(definitions); // *** Creating MonolingualExternalRef ***// MonolingualExternalRef monolingualExternalRef = new MonolingualExternalRef(); // Generating MonolingualExternalRef ID monolingualExternalRef.setExternalSystem(resourceVersion + "_" + DEFINED_MEANING); StringBuffer sb = new StringBuffer(16); sb.append(dm.getDefinedMeaningId()); monolingualExternalRef.setExternalReference(sb.toString()); List<MonolingualExternalRef> monolingualExternalRefs = new LinkedList<MonolingualExternalRef>(); monolingualExternalRefs.add(monolingualExternalRef); lmfSynset.setMonolingualExternalRefs(monolingualExternalRefs); } initialized = true; } } /** * This method returns a list of LMF-Synsets */ public List<de.tudarmstadt.ukp.lmf.model.semantics.Synset> getSynsets(){ List<de.tudarmstadt.ukp.lmf.model.semantics.Synset> result = new LinkedList<de.tudarmstadt.ukp.lmf.model.semantics.Synset>(); result.addAll(LMFSynsetOWSynsetMappings.keySet()); Collections.sort(result); return result; } /** * This method generates a Synset-ID */ private String getNewID() { StringBuffer sb = new StringBuffer(64); sb.append("OW_"+GlobalLanguageLMF+"_Synset_").append(Integer.toString(LMFSynsetNumber)); LMFSynsetNumber++; return sb.toString(); } /** * This method consumes a OW DM returns It's corresponding Uby-LMFSynset * * Evoke this method only after SynsetGenerator has been initialized! * * @param dn DefinedMeaning * @return Uby-LMFSynset that corresponds to dm */ public de.tudarmstadt.ukp.lmf.model.semantics.Synset getLMFSynset(DefinedMeaning dm){ return OWSynsetLMFSynsetMappings.get(dm); } /** * @return the OWSynsetLMFSynsetMappings */ public HashMap<DefinedMeaning, de.tudarmstadt.ukp.lmf.model.semantics.Synset> getOWSynsetLMFSynsetMappings() { return OWSynsetLMFSynsetMappings; } public int getGlobalLanguage() { return GlobalLanguage; } public String getGlobalLanguageLMF() { return GlobalLanguageLMF; } /** * @return the OmegaWiki */ public OmegaWiki getOmegaWiki() { return omegawiki; } }