package edu.stanford.nlp.process;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetEndAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
/**
* Constructs {@link CoreLabel}s from Strings optionally with
* beginning and ending (character after the end) offset positions in
* an original text. The makeToken method will put the token in the
* CurrentAnnotation AND TextAnnotation keys (2 places!),
* and optionally records
* begin and position after offsets in BeginPositionAnnotation and
* EndPositionAnnotation. If the tokens are built in PTBTokenizer with
* an "invertible" tokenizer, you will also get a BeforeAnnotation and for
* the last token an AfterAnnotation.You can also get an empty CoreLabel token
*
* @author Anna Rafferty
* @author Sonal Gupta (now implements CoreTokenFactory, you can make tokens using many options)
*/
public class CoreLabelTokenFactory implements CoreTokenFactory<CoreLabel>, LexedTokenFactory<CoreLabel> {
final boolean addIndices;
/**
* Constructor for a new token factory which will add in the word, the
* "current" annotation, and the begin/end position annotations.
*/
public CoreLabelTokenFactory() {
this(true);
}
/**
* Constructor that allows one to choose if index annotation
* indicating begin/end position will be included in the label.
*
* @param addIndices if true, begin and end position annotations will be included (this is the default)
*/
public CoreLabelTokenFactory(boolean addIndices) {
super();
this.addIndices = addIndices;
}
/**
* Constructs a CoreLabel as a String with a corresponding BEGIN and END position.
* (Does not take substring).
*/
public CoreLabel makeToken(String str, int begin, int length) {
CoreLabel cl;
if (addIndices) {
cl = new CoreLabel(8); // Save a reallocation, as there will be at least 5 keys
} else {
cl = new CoreLabel();
}
cl.setWord(str);
cl.set(TextAnnotation.class, str);
cl.setCurrent(str);
if(addIndices) {
cl.set(CharacterOffsetBeginAnnotation.class, begin);
cl.set(CharacterOffsetEndAnnotation.class, begin+length);
}
return cl;
}
public CoreLabel makeToken() {
CoreLabel l = new CoreLabel();
return l;
}
public CoreLabel makeToken(String[] keys, String[] values) {
CoreLabel l = new CoreLabel(keys, values);
return l;
}
public CoreLabel makeToken(CoreLabel labelToBeCopied) {
CoreLabel l = new CoreLabel(labelToBeCopied);
return l;
}
}