/*
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.lbj.internal;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.selectCovered;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;
import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk;
import edu.illinois.cs.cogcomp.core.datastructures.IntPair;
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation;
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView;
public class ConvertToIllinois
{
public TextAnnotation convert(JCas aJCas)
{
String corpusId = null;
String id = null;
String text = aJCas.getDocumentText();
IntPair[] tokens = new IntPair[select(aJCas, Token.class).size()];
String[] sTokens = new String[tokens.length];
int[] sentences = new int[select(aJCas, Sentence.class).size()];
int it = 0;
int is = 0;
for (Sentence s : select(aJCas, Sentence.class)) {
for (Token t : selectCovered(Token.class, s)) {
tokens[it] = new IntPair(t.getBegin(), t.getEnd());
sTokens[it] = t.getCoveredText();
it++;
}
sentences[is] = it;
is++;
}
TextAnnotation document = new TextAnnotation(corpusId, id, text, tokens, sTokens,
sentences);
// Lemmas & POS
TokenLabelView lemmaView = new TokenLabelView(ViewNames.LEMMA, null, document, 1.0);
TokenLabelView posView = new TokenLabelView(ViewNames.POS, null, document, 1.0);
int i = 0;
for (Token t : select(aJCas, Token.class)) {
Lemma lemma = t.getLemma();
if (lemma != null) {
Constituent lemmaConstituent = new Constituent(lemma.getValue(), ViewNames.LEMMA,
document, i, i + 1);
lemmaView.addConstituent(lemmaConstituent);
}
POS pos = t.getPos();
if (pos != null) {
Constituent posConstituent = new Constituent(pos.getPosValue(), ViewNames.POS,
document, i, i + 1);
posView.addConstituent(posConstituent);
}
i++;
}
if (lemmaView.count() > 0) {
document.addView(ViewNames.LEMMA, lemmaView);
}
if (posView.count() > 0) {
document.addView(ViewNames.POS, posView);
}
convertSpanLabelView(document, ViewNames.NER_CONLL, aJCas, tokens, NamedEntity.class,
"value");
convertSpanLabelView(document, ViewNames.SHALLOW_PARSE, aJCas, tokens, Chunk.class,
"chunkValue");
return document;
}
private void convertSpanLabelView(TextAnnotation document, String aView, JCas aJCas,
IntPair[] tokens, Class<?> type, String aFeature)
{
SpanLabelView view = new SpanLabelView(aView, document);
int t = 0;
Type uimaType = CasUtil.getType(aJCas.getCas(), type);
Feature valueFeat = uimaType.getFeatureByBaseName(aFeature);
for (AnnotationFS chunk : CasUtil.select(aJCas.getCas(), uimaType)) {
int begin = t;
while (tokens[begin].getFirst() < chunk.getBegin()) {
begin++;
}
assert tokens[begin].getFirst() == chunk.getBegin();
int end = begin;
while (tokens[end].getSecond() < chunk.getEnd()) {
end++;
}
assert tokens[end].getSecond() == chunk.getEnd();
view.addSpanLabel(begin, end, chunk.getStringValue(valueFeat), 1.0);
t = end + 1;
}
document.addView(aView, view);
}
}