/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.gate.internal;
import static de.tudarmstadt.ukp.dkpro.core.io.gate.internal.GateAnnieConstants.FEAT_LEMMA;
import static de.tudarmstadt.ukp.dkpro.core.io.gate.internal.GateAnnieConstants.FEAT_STEM;
import static gate.creole.ANNIEConstants.SENTENCE_ANNOTATION_TYPE;
import static gate.creole.ANNIEConstants.TOKEN_ANNOTATION_TYPE;
import static gate.creole.ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME;
import org.apache.uima.jcas.JCas;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import gate.Annotation;
import gate.AnnotationSet;
import gate.Document;
import gate.FeatureMap;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
public class Gate2DKPro
{
public void convert(Document doc, JCas jcas)
{
jcas.setDocumentText(doc.getContent().toString());
AnnotationSet annSet = doc.getAnnotations();
IntOpenHashSet processed = new IntOpenHashSet();
for (Annotation ann : annSet) {
if (processed.contains(ann.getId())) {
continue;
}
if (SENTENCE_ANNOTATION_TYPE.equals(ann.getType())) {
Sentence s = new Sentence(jcas, ann.getStartNode().getOffset().intValue(),
ann.getEndNode().getOffset().intValue());
s.addToIndexes(jcas);
}
else if (TOKEN_ANNOTATION_TYPE.equals(ann.getType())) {
FeatureMap fm = ann.getFeatures();
int startIndex = ann.getStartNode().getOffset().intValue();
int endIndex = ann.getEndNode().getOffset().intValue();
Token token = new Token(jcas, startIndex, endIndex);
String posValue = (String) fm.get(TOKEN_CATEGORY_FEATURE_NAME);
if (posValue != null) {
POS pos = new POS(jcas, startIndex, endIndex);
pos.setPosValue(posValue);
pos.addToIndexes(jcas);
token.setPos(pos);
}
String lemmaValue = (String) fm.get(FEAT_LEMMA);
if (lemmaValue != null) {
Lemma lemma = new Lemma(jcas, startIndex, endIndex);
lemma.setValue(lemmaValue);
lemma.addToIndexes(jcas);
token.setLemma(lemma);
}
String stemValue = (String) fm.get(FEAT_STEM);
if (stemValue != null) {
Stem stem = new Stem(jcas, startIndex, endIndex);
stem.setValue(stemValue);
stem.addToIndexes(jcas);
token.setStem(stem);
}
token.addToIndexes(jcas);
}
else {
System.err.printf("Don't know how to handle type: %s%n", ann.getType());
}
processed.add(ann.getId());
}
}
}