//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.uima.data;
import java.util.Collection;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import uk.gov.dstl.baleen.exceptions.BaleenRuntimeException;
import uk.gov.dstl.baleen.types.language.Text;
/**
* The Class TextBlock.
*/
public class TextBlock {
private final JCas jCas;
private final Text text;
private final int blockBegin;
private final int blockEnd;
/**
* Instantiates a new text block which represents a text annotation
*
* @param jCas the jCas
* @param text the text
*/
public TextBlock(final JCas jCas, final Text text) {
this.jCas = jCas;
this.text = text;
this.blockBegin = text.getBegin();
this.blockEnd = text.getEnd();
}
/**
* Instantiates a new text block which represents the entire JCas
*
* @param jCas the jCas
*/
public TextBlock(final JCas jCas) {
this.jCas = jCas;
this.blockBegin = 0;
this.blockEnd = jCas.getDocumentText().length();
this.text = null;
}
/**
* Checks if is whole document (ie the JCas vs a Text annotation).
*
* Note that if a text annotation covers the entire document this will still be true.
*
* @return true, if is whole document
*/
public boolean isWholeDocument() {
return text == null
|| (text.getBegin() == 0 && text.getEnd() == jCas.getDocumentText().length());
}
/**
* Gets the text annotation.
*
* @return the text (null if this is a JCas)
*/
public Text getText() {
return text;
}
/**
* Gets the jCas.
*
* @return the jCas
*/
public JCas getJCas() {
return jCas;
}
/**
* Gets the begin offset.
*
* @return the begin (0 if whole document)
*/
public int getBegin() {
return blockBegin;
}
/**
* Gets the end.
*
* @return the end (jCas.getDocumentText().length() if whole document)
*/
public int getEnd() {
return blockEnd;
}
/**
* Gets the covered text.
*
* @return the covered text (will be the same as getDocumentText if this is JCas)
*/
public String getCoveredText() {
if (isWholeDocument()) {
return jCas.getDocumentText();
} else {
return text.getCoveredText();
}
}
/**
* Gets the JCas document text.
*
* @return the document text
*/
public String getDocumentText() {
return jCas.getDocumentText();
}
// JCasUtil helpers
/**
* Helper function providing same functionality as JCasUtil.select
*
* @param <T> the generic type
* @param type the type
* @return the collection
*/
public <T extends Annotation> Collection<T> select(final Class<T> type) {
if (isWholeDocument()) {
return JCasUtil.select(jCas, type);
} else {
return JCasUtil.selectCovered(jCas, type, getBegin(), getEnd());
}
}
// Creating annotation helpers
/**
* Create a new annotation, correcting the being&end to be the document offset rather than within
* this text block.
*
* Note this uses reflection, so may not be as performant as simply new Type().
*
* @param <T> the generic type
* @param type the type
* @param begin the begin offset within this text block
* @param end the end offset within this text block
* @return the annotation
*/
public <T extends Annotation> T newAnnotation(final Class<T> type, final int begin,
final int end) {
try {
return type.getConstructor(JCas.class, int.class, int.class)
.newInstance(jCas, toDocumentOffset(begin), toDocumentOffset(end));
} catch (final Exception e) {
throw new BaleenRuntimeException("Required type not found", e);
}
}
/**
* Sets the begin and end of the annotation against the document (rather than this block)
*
* @param <T> the generic type
* @param annotation the annotation
* @param begin the begin offset within this text block
* @param end the end offset within this text block
* @return the annotaiton (with begin and end set to the document offsets)
*/
public <T extends Annotation> T setBeginAndEnd(final T annotation, final int begin,
final int end) {
annotation.setBegin(toDocumentOffset(begin));
annotation.setEnd(toDocumentOffset(end));
return annotation;
}
/**
* Convert an offset within this text span to a document offset.
*
* @param blockOffset the block offset
* @return the document offset
*/
public int toDocumentOffset(final int blockOffset) {
return blockOffset + getBegin();
}
/**
* Convert an offset within the document to an offset within this text span
*
* @param documentOffset the document offset
* @return the block offset
*
* @throws IllegalArgumentException if the documentOffset is outside of this text block
*/
public int toBlockOffset(final int documentOffset) {
if(documentOffset < getBegin() || documentOffset > getEnd()){
throw new IllegalArgumentException("documentOffset is outside block");
}
return documentOffset - getBegin();
}
}