package de.berlin.hu.uima.util;
import banner.types.Token;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.u_compare.shared.syntactic.Sentence;
import org.uimafit.util.JCasUtil;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* @author Tim Rocktäschel
*
*/
public class Util {
/**
* @return all tokens that occur within the sentence
*/
public static List<org.u_compare.shared.syntactic.Token> getTokens(JCas aJCas,
int sentenceBegin, int sentenceEnd) {
List<org.u_compare.shared.syntactic.Token> tokensInSentence = new ArrayList<org.u_compare.shared.syntactic.Token>();
FSIndex<Annotation> tokenIndex = aJCas.getAnnotationIndex(org.u_compare.shared.syntactic.Token.type);
org.u_compare.shared.syntactic.Token dummyToken = new org.u_compare.shared.syntactic.Token(aJCas);
dummyToken.setBegin(sentenceBegin - 1);
dummyToken.setEnd(sentenceBegin - 1);
Iterator<Annotation> tokenIterator = tokenIndex.iterator(dummyToken);
while (tokenIterator.hasNext()) {
org.u_compare.shared.syntactic.Token currentToken = (org.u_compare.shared.syntactic.Token) tokenIterator.next();
int currentTokenBegin = currentToken.getBegin();
int currentTokenEnd = currentToken.getEnd();
if (currentTokenBegin < sentenceEnd && currentTokenEnd <= sentenceEnd) {
tokensInSentence.add(currentToken);
} else {
break;
}
}
return tokensInSentence;
}
public static List<org.u_compare.shared.syntactic.Token> getTokens(JCas aJCas,
Sentence sentence) {
List<org.u_compare.shared.syntactic.Token> tokensInSentence = new ArrayList<org.u_compare.shared.syntactic.Token>();
Iterator<org.u_compare.shared.syntactic.Token> tokenIterator = JCasUtil.iterator(sentence, org.u_compare.shared.syntactic.Token.class, true, true);
while (tokenIterator.hasNext()) {
org.u_compare.shared.syntactic.Token currentToken = (org.u_compare.shared.syntactic.Token) tokenIterator.next();
tokensInSentence.add(currentToken);
}
return tokensInSentence;
}
public static void tokenizeBannerSentence(banner.types.Sentence bannerSentence,
List<org.u_compare.shared.syntactic.Token> tokensInSentence) {
//get the absolute position of the first token in the document
try {
int offset = tokensInSentence.get(0).getBegin();
for (org.u_compare.shared.syntactic.Token token : tokensInSentence) {
//create a new token with the relative position in the sentence
try {
Token bannerToken = new Token(bannerSentence, token.getBegin() - offset, token.getEnd() - offset);
bannerSentence.addToken(bannerToken);
} catch (IllegalArgumentException e) {
System.out.println(e);
System.out.println(bannerSentence.getText());
System.out.println("tokentext: [" + token.getCoveredText() + "]");
}
}
} catch (IndexOutOfBoundsException e) {
//FIXME: what happens here?
}
}
}