package org.cogroo.tools.headfinder;
import org.cogroo.tools.chunker2.TokenTag;
import opennlp.tools.util.SequenceValidator;
public class HeadFinderSequenceValidator implements SequenceValidator<TokenTag>{
@Override
public boolean validSequence(int i, TokenTag[] inputSequence,
String[] outcomesSequence, String outcome) {
int size = inputSequence.length;
String[] chunkTags = new String[size];
String[] posTags = new String[size];
String[] lexemes = new String[size];
TokenTag.extract(inputSequence, lexemes, posTags, chunkTags);
// if it is boundary, accept any
if(isBoundary(chunkTags[i])) {
return true;
}
boolean isHead = isHead(outcome);
// only chunks has head
if(chunkTags[i].equals("O") && isHead) {
return false;
}
if(isIntermediate(chunkTags[i]) && isHead) {
// only if it is the only head...
boolean foundBoundary = false;
for(int j = i - 1; j >= 0 && !foundBoundary; j--) {
if(isHead(outcomesSequence[j])) {
return false;
}
foundBoundary = isBoundary(chunkTags[j]);
}
}
return true;
}
private boolean isIntermediate(String tag) {
return tag.startsWith("I-");
}
private boolean isBoundary(String tag) {
return tag.startsWith("B-");
}
private boolean isHead(String outcome) {
return !outcome.equals("O");
}
}