/**
*
*/
package tml.storage;
import java.io.IOException;
import org.apache.log4j.Logger;
import tml.annotators.Annotator;
/**
* @author jorge
*
*/
public class DocumentAnnotator implements Runnable {
private static Logger logger = Logger.getLogger(DocumentAnnotator.class);
private Repository repository;
public DocumentAnnotator(Repository repo) {
this.repository = repo;
}
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
@Override
public void run() {
int total = 0;
String[][] docs = this.repository.getDbConnection().getUnannotatedDocument();
if(docs == null) {
logger.debug("No documents to annotate");
return;
}
for(String[] doc : docs) {
String externalid = doc[0];
String type = doc[1];
String content = null;
try {
content = this.repository.getDocumentField(externalid, this.repository.getLuceneContentField());
} catch (IOException e) {
e.printStackTrace();
logger.error("No content found in Lucene index for document " + externalid);
return;
}
for (Annotator annotator : this.repository.getAnnotators()) {
String metadata = null;
if (annotator.getTypes().contains(type)) {
metadata = annotator.getAnnotations(content);
} else {
metadata = "Not available";
}
this.repository.getDbConnection().setAnnotation(externalid, annotator.getFieldName(), metadata);
}
total++;
}
if(total > 0)
logger.info("Annotated " + total + " documents");
else
logger.debug("Nothing to annotate");
}
}