package de.berlin.hu.uima.cr.xml;
import java.io.File;
import java.io.IOException;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public abstract class XMLCollectionReader extends CollectionReader_ImplBase {
/**
* Name of configuration parameter that must be set to the path of a directory containing input
* files.
*/
public static final String PARAM_INPUTDIR = "InputDirectory";
protected String inputDirectory = null;
protected int numDocuments = 0;
protected int currentDocument = 0;
protected List<File> files = null;
protected abstract List<File> getfiles(String inputDir);
@Override
public void initialize() throws ResourceInitializationException {
inputDirectory = (String)getConfigParameterValue(PARAM_INPUTDIR);
files = getfiles(inputDirectory);
numDocuments = files.size();
currentDocument = 0;
}
protected Document getNextDocument() throws CollectionException, IOException {
if (currentDocument >= numDocuments) throw new CollectionException();
File file = files.get(currentDocument++);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new IOException(e);
}
Document document;
try {
document = builder.parse(file);
} catch (SAXException e) {
throw new IOException(e);
}
return document;
}
public abstract void getNext(CAS aCAS) throws IOException, CollectionException;
public boolean hasNext() {
return currentDocument < numDocuments;
}
public Progress[] getProgress() {
return new Progress[] { new ProgressImpl(currentDocument, numDocuments, Progress.ENTITIES) };
}
public void close() throws IOException {
// nothing to do
}
}