/*
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.dictionaryannotator.semantictagging;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.fit.component.Resource_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceSpecifier;
import de.tudarmstadt.ukp.dkpro.core.api.resources.ResourceUtils;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
/**
*
* This shared resource can be added as ExternalResource in Analysis Engines
* that annotate tokens with semantic tags looked up in a key-value map
* e.g., to annotate common nouns with semantic field information from WordNet.
*
*
*/
public class SemanticTagResource
extends Resource_ImplBase
implements SemanticTagProvider
{
public final static String PARAM_RESOURCE_PATH = "resourcePath";
@ConfigurationParameter(name = PARAM_RESOURCE_PATH, mandatory = true)
// TODO add default like: defaultValue = "classpath:de/tudarmstadt/ukp/dkpro/core/decompounding/lib/spelling/de/igerman98/de_DE_igerman98.dic"
private String resourcePath;
private Map<String,String> keySemanticTagMap= new HashMap<String,String>();
@Override
public boolean initialize(ResourceSpecifier aSpecifier, Map aAdditionalParams)
throws ResourceInitializationException
{
if (!super.initialize(aSpecifier, aAdditionalParams)) {
return false;
}
try {
final URL uri = ResourceUtils.resolveLocation(resourcePath, this, null);
readFileToMap(new BufferedReader(new InputStreamReader(uri.openStream())));
}
catch (IOException e) {
throw new ResourceInitializationException(e);
}
return true;
}
@Override
public String getSemanticTag(Token token) throws ResourceAccessException {
try {
if (keySemanticTagMap.containsKey(token.getLemma().getValue())) {
return keySemanticTagMap.get(token.getLemma().getValue());
} else {
return "UNKNOWN";
}
} catch (Exception e) {
throw new ResourceAccessException(e);
}
}
@Override
public String getSemanticTag(List<Token> tokens) throws ResourceAccessException {
List<String> lemmas = new ArrayList<String>();
for (Token token : tokens) {
lemmas.add(token.getLemma().getValue());
}
String lemmaString = StringUtils.join(lemmas, " ");
try {
if (keySemanticTagMap.containsKey(lemmaString)) {
return keySemanticTagMap.get(lemmaString);
} else {
return "UNKNOWN";
}
} catch (Exception e) {
throw new ResourceAccessException(e);
}
}
private void readFileToMap(BufferedReader bufferedReader) throws IOException {
String line;
while((line = bufferedReader.readLine())!=null){
String temp[] = line.split("\t");
String key = temp[0];
String semField = temp[1];
System.out.println(line);
keySemanticTagMap.put(key, semField);
}
}
}