package edu.cmu.minorthird.text.learn.experiments;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import edu.cmu.minorthird.text.AbstractTextBase;
import edu.cmu.minorthird.text.Document;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.TextBase;
import edu.cmu.minorthird.text.Tokenizer;
/**
* A read-only TextBase which is a subset of another TextBase.
*
*
* @author William Cohen
*/
public class SubTextBase extends AbstractTextBase{
private Set<Span> validDocumentSpans;
private TextBase base;
public static class UnknownDocumentException extends Exception{
static final long serialVersionUID=20080314L;
public UnknownDocumentException(String s){
super(s);
}
}
public SubTextBase(TextBase base,Iterator<Span> documentSpanIterator)
throws UnknownDocumentException{
super(base.getTokenizer());
this.base=base;
validDocumentSpans=new TreeSet<Span>();
while(documentSpanIterator.hasNext()){
Span span=documentSpanIterator.next();
if(base.documentSpan(span.getDocumentId())==null){
throw new UnknownDocumentException("documentId not in textBase: "+
span.getDocumentId());
}
validDocumentSpans.add(span);
}
}
/** True if a span is contained by this TextBase */
public boolean contains(Span span){
return validDocumentSpans.contains(span.documentSpan());
}
//
// Implementations of abstract methods from AbstractTextBase
@Override
public Tokenizer getTokenizer(){
return base.getTokenizer();
}
@Override
public Document getDocument(String documentId){
return base.getDocument(documentId);
}
@Override
public int size(){
return validDocumentSpans.size();
}
@Override
public Iterator<Span> documentSpanIterator(){
return validDocumentSpans.iterator();
}
@Override
public Span documentSpan(String documentId){
Span span=base.documentSpan(documentId);
return validDocumentSpans.contains(span)?span:null;
}
}