/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.core.udpipe.internal;
import java.util.Collection;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import cz.cuni.mff.ufal.udpipe.Sentence;
import cz.cuni.mff.ufal.udpipe.Word;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT;
public class UDPipe2DKPro
{
public static void convertPosLemmaMorph(Sentence sentence, Collection<Token> tokens, JCas aJCas,
MappingProvider mappingProvider, boolean internTags)
{
CAS cas = aJCas.getCas();
int i = 1; // the first tag is <root>
for (Token t : tokens) {
Word w = sentence.getWords().get(i);
String xtag = w.getXpostag();
String utag = w.getUpostag();
// For Norwegian xtag is not provided. It is a blank string.
// So the value of Utag is used as an replacement.
if (xtag.length() == 0 && utag.length() > 0)
xtag = utag;
// Convert the tag produced by the tagger to an UIMA type, create an annotation
// of this type, and add it to the document.
Type posTag = mappingProvider.getTagType(xtag);
POS posAnno = (POS) cas.createAnnotation(posTag, t.getBegin(), t.getEnd());
// To save memory, we typically intern() tag strings
posAnno.setPosValue(internTags ? xtag.intern() : xtag);
if (utag == null) {
posAnno.setCoarseValue(posAnno.getClass().equals(POS.class) ? null
: posAnno.getType().getShortName().intern());
}
else {
posAnno.setCoarseValue(internTags ? utag.intern() : utag);
}
posAnno.addToIndexes();
// Connect the POS annotation to the respective token annotation
t.setPos(posAnno);
if (StringUtils.isNotBlank(w.getLemma())) {
Lemma lemma = new Lemma(aJCas, t.getBegin(), t.getEnd());
lemma.setValue(w.getLemma());
lemma.addToIndexes();
t.setLemma(lemma);
}
if (StringUtils.isNotBlank(w.getForm())) {
MorphologicalFeatures morph = new MorphologicalFeatures(aJCas, t.getBegin(), t.getEnd());
morph.setValue(w.getFeats());
morph.addToIndexes();
t.setMorph(morph);
}
i++;
}
}
public static void convertParse(Sentence sentence, List<Token> tokens, JCas aJCas,
MappingProvider mappingProvider, boolean internTags)
{
for (int i = 1; i < sentence.getWords().size(); i++) {
Word w = sentence.getWords().get(i);
if (StringUtils.isNotBlank(w.getDeprel())) {
int depId = w.getId();
int govId = w.getHead();
// Model the root as a loop onto itself
makeDependency(mappingProvider, aJCas, govId, depId, w.getDeprel(),
DependencyFlavor.BASIC, tokens);
}
if (StringUtils.isNotBlank(w.getDeps())) {
// list items separated by vertical bar
String[] items = w.getDeps().split("\\|");
for (String item : items) {
String[] sItem = item.split(":");
int depId = w.getId();
int govId = Integer.valueOf(sItem[0]);
makeDependency(mappingProvider, aJCas, govId, depId, sItem[1],
DependencyFlavor.ENHANCED, tokens);
}
}
}
}
private static Dependency makeDependency(MappingProvider mappingProvider, JCas aJCas, int govId,
int depId, String label, String flavor, List<Token> tokens)
{
// write dependency information as annotation to JCas
Type depRel = mappingProvider.getTagType(label);
Dependency rel;
if (govId == 0) {
rel = new ROOT(aJCas);
rel.setGovernor(tokens.get(depId - 1));
rel.setDependent(tokens.get(depId - 1));
}
else {
rel = (Dependency) aJCas.getCas().createFS(depRel);
rel.setGovernor(tokens.get(govId - 1));
rel.setDependent(tokens.get(depId - 1));
}
rel.setDependencyType(label);
rel.setFlavor(flavor);
rel.setBegin(rel.getDependent().getBegin());
rel.setEnd(rel.getDependent().getEnd());
rel.addToIndexes();
return rel;
}
}