/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.uby.resource; import java.util.HashMap; import java.util.List; import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech; /** * * This class provides helper methods to access UBY senses and UBY sense links. * - simple word sense disambiguation (most frequent sense heuristics) * - mapping between DKPro-Core POS tags and UBY POS tags * TODO - enrichment of resource-specific senses by following the sense links in UBY * * @author Judith Eckle-Kohler * */ public class UbyResourceUtils { /** * @param lexicalEntries * a list of lexical entries that share a common lemma form and POS * @return * the most frequent sense of the first lexical entry in the list */ public static Sense getMostFrequentSense(List<LexicalEntry> lexicalEntries) { Sense resultSense = null; // WordNet contains MFS information, since the senses are ordered by decreasing frequency in SemCor: // in UBY, the sense with index = 1 is the MFS for (LexicalEntry lexicalEntry : lexicalEntries) { for (Sense sense : lexicalEntry.getSenses()) { if (sense.getIndex() == 1) { resultSense = sense; } } } return resultSense; } /** * @param corePosValue * the String value of a POS type in DKPro-Core * @return * the corresponding POS enumeration value in UBY */ public static EPartOfSpeech[] corePosToUbyPos(String corePosValue) { // covers only UBY POS values that are used as POS values of common nouns EPartOfSpeech UbyCommonNounPOS[] = { EPartOfSpeech.noun, EPartOfSpeech.nounCommon }; // covers only UBY POS values that are used as POS values of main verbs EPartOfSpeech UbyMainVerbPOS[] = { EPartOfSpeech.verb, EPartOfSpeech.verbMain }; EPartOfSpeech UbyAdjectivePOS[] = { EPartOfSpeech.adjective }; EPartOfSpeech EmptyPosList[] = {}; HashMap<String, EPartOfSpeech[]> posMap = new HashMap<String, EPartOfSpeech[]>(); posMap.put("NN", UbyCommonNounPOS); posMap.put("N", UbyCommonNounPOS); // universal POS tags collapse common noun and proper noun distinction posMap.put("V", UbyMainVerbPOS); // universal POS tags collapse main verb and auxiliary / modal verb distinction posMap.put("ADJ", UbyAdjectivePOS); if (posMap.containsKey(corePosValue)) { return posMap.get(corePosValue); } else { return EmptyPosList; } } }