/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.lbj.internal;
import java.util.List;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.jcas.JCas;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.resources.MappingProvider;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk;
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
public class ConvertToUima
{
public static void convertPOSs(JCas aJCas, List<Token> casTokens, TextAnnotation document,
MappingProvider mappingProvider, boolean internStrings)
{
CAS cas = aJCas.getCas();
List<Constituent> pos = document.getView(ViewNames.POS).getConstituents();
int i = 0;
for (Constituent p : pos) {
String tag = p.getLabel();
// Convert tagger output to CAS
Type posTag = mappingProvider.getTagType(tag);
POS posAnno = (POS) cas.createAnnotation(posTag, p.getStartCharOffset(),
p.getEndCharOffset());
posAnno.setPosValue(internStrings ? tag.intern() : tag);
posAnno.setCoarseValue(posAnno.getClass().equals(POS.class) ? null
: posAnno.getType().getShortName().intern());
posAnno.addToIndexes();
casTokens.get(i).setPos(posAnno);
i++;
}
}
public static void convertChunks(JCas aJCas, List<Token> casTokens, TextAnnotation document,
MappingProvider mappingProvider, boolean internStrings)
{
CAS cas = aJCas.getCas();
List<Constituent> pos = document.getView(ViewNames.SHALLOW_PARSE).getConstituents();
for (Constituent p : pos) {
String tag = p.getLabel();
// Convert tagger output to CAS
Type chunkTag = mappingProvider.getTagType(tag);
Chunk chunkAnno = (Chunk) cas.createAnnotation(chunkTag, p.getStartCharOffset(),
p.getEndCharOffset());
chunkAnno.setChunkValue(internStrings ? tag.intern() : tag);
chunkAnno.addToIndexes();
}
}
public static void convertNamedEntity(JCas aJCas, TextAnnotation document,
MappingProvider mappingProvider, boolean internStrings)
{
CAS cas = aJCas.getCas();
List<Constituent> ne = document.getView(ViewNames.NER_CONLL).getConstituents();
for (Constituent p : ne) {
String tag = p.getLabel();
// Convert tagger output to CAS
Type neTag = mappingProvider.getTagType(tag);
NamedEntity neAnno = (NamedEntity) cas.createAnnotation(neTag, p.getStartCharOffset(),
p.getEndCharOffset());
neAnno.setValue(internStrings ? tag.intern() : tag);
neAnno.addToIndexes();
}
}
public static void convertLemma(JCas aJCas, List<Token> casTokens, TextAnnotation document)
{
List<Constituent> lemma = document.getView(ViewNames.LEMMA).getConstituents();
int i = 0;
for (Constituent l : lemma) {
Lemma casLemma = new Lemma(aJCas, l.getStartCharOffset(), l.getEndCharOffset());
casLemma.setValue(l.getLabel());
casLemma.addToIndexes();
casTokens.get(i).setLemma(casLemma);
i++;
}
}
}