/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.nlp4j.internal;
import java.util.List;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import de.tudarmstadt.ukp.dkpro.core.api.io.BilouDecoder;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT;
import edu.emory.mathcs.nlp.component.template.node.NLPNode;
public class EmoryNlp2Uima
{
public static void convertPos(CAS aCas, List<Token> aTokens, NLPNode[] aNodes,
MappingProvider aMappingProvider, boolean internStrings)
{
// EmoryNLP tokens start at 1
int i = 1;
for (Token t : aTokens) {
String tag = aNodes[i].getPartOfSpeechTag();
// Convert the tag produced by the tagger to an UIMA type, create an annotation
// of this type, and add it to the document.
Type posTag = aMappingProvider.getTagType(tag);
POS posAnno = (POS) aCas.createAnnotation(posTag, t.getBegin(), t.getEnd());
// To save memory, we typically intern() tag strings
posAnno.setPosValue(internStrings ? tag.intern() : tag);
posAnno.setCoarseValue(posAnno.getClass().equals(POS.class) ? null
: posTag.getShortName().intern());
posAnno.addToIndexes();
// Connect the POS annotation to the respective token annotation
t.setPos(posAnno);
i++;
}
}
public static void convertDependencies(JCas aJCas, List<Token> aTokens, NLPNode[] aNodes,
MappingProvider aMappingProvider, boolean aInternTags)
{
for (int i = 1; i < aNodes.length; i++) {
NLPNode depNode = aNodes[i];
NLPNode govNode = depNode.getDependencyHead();
String label = depNode.getDependencyLabel();
// FIXME Also extract the semantic heads and store them with dependency flavor
// ENHANCED
if (govNode.getID() != 0) {
Type depRel = aMappingProvider.getTagType(label);
Dependency dep = (Dependency) aJCas.getCas().createFS(depRel);
dep.setDependencyType(aInternTags ? label.intern() : label);
dep.setDependent(aTokens.get(depNode.getID() - 1));
dep.setGovernor(aTokens.get(govNode.getID() - 1));
dep.setBegin(dep.getDependent().getBegin());
dep.setEnd(dep.getDependent().getEnd());
dep.setFlavor(DependencyFlavor.BASIC);
dep.addToIndexes();
}
else {
Dependency dep = new ROOT(aJCas);
dep.setDependencyType(label);
dep.setDependent(aTokens.get(depNode.getID() - 1));
dep.setGovernor(aTokens.get(depNode.getID() - 1));
dep.setBegin(dep.getDependent().getBegin());
dep.setEnd(dep.getDependent().getEnd());
dep.setFlavor(DependencyFlavor.BASIC);
dep.addToIndexes();
}
}
}
public static void convertNamedEntities(CAS aCas, List<Token> aTokens, NLPNode[] aNodes,
MappingProvider aMappingProvider, boolean aInternTags)
{
Type neType = aCas.getTypeSystem().getType(NamedEntity.class.getName());
Feature valueFeat = neType.getFeatureByBaseName("value");
String[] neTags = new String[aNodes.length-1];
for (int i = 1; i < aNodes.length; i++) {
neTags[i-1] = aNodes[i].getNamedEntityTag();
}
BilouDecoder decoder = new BilouDecoder(aCas, valueFeat, aMappingProvider);
decoder.setInternTags(aInternTags);
decoder.decode(aTokens, neTags);
}
}