package edu.stanford.nlp.process;
import java.io.Serializable;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
/**
* Constructs {@link CoreLabel}s from Strings optionally with
* beginning and ending (character after the end) offset positions in
* an original text. The makeToken method will put the token in the
* OriginalTextAnnotation AND TextAnnotation keys (2 places!),
* and optionally records
* begin and position after offsets in BeginPositionAnnotation and
* EndPositionAnnotation. If the tokens are built in PTBTokenizer with
* an "invertible" tokenizer, you will also get a BeforeAnnotation and for
* the last token an AfterAnnotation.You can also get an empty CoreLabel token
*
* @author Anna Rafferty
* @author Sonal Gupta (now implements CoreTokenFactory, you can make tokens using many options)
*/
public class CoreLabelTokenFactory implements CoreTokenFactory<CoreLabel>, LexedTokenFactory<CoreLabel>, Serializable {
private final boolean addIndices;
/**
* Constructor for a new token factory which will add in the word, the
* "current" annotation, and the begin/end position annotations.
*/
public CoreLabelTokenFactory() {
this(true);
}
/**
* Constructor that allows one to choose if index annotation
* indicating begin/end position will be included in the label.
*
* @param addIndices if true, begin and end position annotations will be included (this is the default)
*/
public CoreLabelTokenFactory(boolean addIndices) {
super();
this.addIndices = addIndices;
}
/**
* Constructs a CoreLabel as a String with a corresponding BEGIN and END position.
* (Does not take substring).
*/
@Override
public CoreLabel makeToken(String tokenText, int begin, int length) {
return makeToken(tokenText, tokenText, begin, length);
}
/**
* Constructs a CoreLabel as a String with a corresponding BEGIN and END position,
* when the original OriginalTextAnnotation is different from TextAnnotation
* (Does not take substring).
*/
public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
cl.setValue(tokenText);
cl.setWord(tokenText);
cl.setOriginalText(originalText);
if(addIndices) {
cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, begin+length);
}
return cl;
}
@Override
public CoreLabel makeToken() {
CoreLabel l = new CoreLabel();
return l;
}
@Override
public CoreLabel makeToken(String[] keys, String[] values) {
CoreLabel l = new CoreLabel(keys, values);
return l;
}
@Override
public CoreLabel makeToken(CoreLabel labelToBeCopied) {
CoreLabel l = new CoreLabel(labelToBeCopied);
return l;
}
private static final long serialVersionUID = 4L;
}