/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.gate.internal;
import static gate.creole.ANNIEConstants.*;
import static de.tudarmstadt.ukp.dkpro.core.io.gate.internal.GateAnnieConstants.FEAT_LEMMA;
import static de.tudarmstadt.ukp.dkpro.core.io.gate.internal.GateAnnieConstants.FEAT_STEM;
import static org.apache.uima.fit.util.JCasUtil.selectAll;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import gate.AnnotationSet;
import gate.Document;
import gate.FeatureMap;
import gate.corpora.DocumentContentImpl;
import gate.util.GateException;
import gate.util.SimpleFeatureMapImpl;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
public class DKPro2Gate
{
/*
* Converts DKPro to Gate using default unnamed annotation set (kept for backward compatibility
*/
public Document convert(JCas aSource, Document aTarget)
throws GateException
{
return convert(aSource, aTarget, null);
}
/*
* Converts DKPro to Gate possibly with a named annotation set
*/
public Document convert(JCas aSource, Document aTarget, String annotationSetName)
throws GateException
{
IntOpenHashSet processed = new IntOpenHashSet();
aTarget.setContent(new DocumentContentImpl(aSource.getDocumentText()));
AnnotationSet as;
if (annotationSetName == null || annotationSetName.length() == 0) {
as = aTarget.getAnnotations();
}
else {
as = aTarget.getAnnotations(annotationSetName);
}
for (TOP fs : selectAll(aSource)) {
if (processed.contains(fs.getAddress())) {
continue;
}
if (fs instanceof Token) {
Token t = (Token) fs;
FeatureMap fm = new SimpleFeatureMapImpl();
fm.put(TOKEN_LENGTH_FEATURE_NAME, t.getCoveredText().length());
fm.put(TOKEN_STRING_FEATURE_NAME, t.getCoveredText());
if (t.getPos() != null) {
fm.put(TOKEN_CATEGORY_FEATURE_NAME, t.getPos().getPosValue());
}
if (t.getLemma() != null) {
fm.put(FEAT_LEMMA, t.getLemma().getValue());
}
if (t.getStem() != null) {
fm.put(FEAT_STEM, t.getStem().getValue());
}
as.add(Long.valueOf(t.getBegin()), Long.valueOf(t.getEnd()), TOKEN_ANNOTATION_TYPE,
fm);
}
else if (fs instanceof Lemma) {
// Do nothing - handled as part of Token
}
else if (fs instanceof POS) {
// Do nothing - handled as part of Token
}
else if (fs instanceof Sentence) {
Sentence s = (Sentence) fs;
FeatureMap fm = new SimpleFeatureMapImpl();
as.add(Long.valueOf(s.getBegin()), Long.valueOf(s.getEnd()),
SENTENCE_ANNOTATION_TYPE, fm);
}
else {
System.out.printf("Don't know how to handle type: %s%n", fs.getType().getName());
}
processed.add(fs.getAddress());
}
return aTarget;
}
}