package edu.jhu.agiga;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* StreamingDocumentReader is an iterator over AgigaDocument objects. The
* AgigaDocument class gives access to the coreference resolution (via
* AgigaCoref objects) annotations and the sentences (via AgigaSentence
* objects).
*
* @author mgormley
*
*/
public class StreamingDocumentReader extends StreamingVtdXmlReader<AgigaDocument> {
private static Logger log = Logger.getLogger(StreamingDocumentReader.class.getName());
private AgigaPrefs prefs;
public StreamingDocumentReader(String inputFile, AgigaPrefs prefs) {
super(inputFile);
this.prefs = prefs;
}
@Override
protected Iterator<AgigaDocument> getIteratorInstance(byte[] b) {
return new AgigaDocumentReader(b, prefs);
}
public static void main(String args[]) throws Exception {
// Must be Level.FINER for debug logging
Util.initializeLogging(Level.FINE);
// Parse each file provided on the command line.
for (int i = 0; i < args.length; i++) {
StreamingDocumentReader reader = new StreamingDocumentReader(args[i], new AgigaPrefs());
log.info("Parsing XML");
for (AgigaDocument doc : reader) {
// Do nothing
}
log.info("Number of docs: " + reader.getNumDocs());
}
}
@Override
protected int getNumSents(AgigaDocument doc) {
return doc.getSents().size();
}
}