package com.bigdata.rdf.load;
import java.io.File;
import java.net.MalformedURLException;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.log4j.Logger;
import org.openrdf.model.Statement;
import org.openrdf.rio.RDFFormat;
import com.bigdata.rdf.rio.RDFParserOptions;
import com.bigdata.rdf.store.AbstractTripleStore;
/**
* Factory for tasks for loading RDF resources into a database or validating
* RDF resources against a database.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*
* @todo report the #of resources processed in each case.
*/
public class AbstractRDFTaskFactory<S extends Statement,T extends Runnable> implements
ITaskFactory<T> {
protected static final Logger log = Logger
.getLogger(RDFLoadTaskFactory.class);
/**
* The database on which the data will be written.
*/
final protected AbstractTripleStore db;
/**
* The timestamp set when {@link #notifyStart()} is invoked.
*/
private long beginTime;
/**
* The timestamp set when {@link #notifyEnd()} is invoked.
*/
private long endTime;
/**
* Notify that the factory will begin running tasks. This sets the
* {@link #beginTime} used by {@link #elapsed()} to report the run time
* of the tasks.
*/
public void notifyStart() {
endTime = 0L;
beginTime = System.currentTimeMillis();
}
/**
* Notify that the factory is done running tasks (for now). This
* places a cap on the time reported by {@link #elapsed()}.
*
* @todo Once we are done loading data the client should be told to
* flush its counters to the load balancer so that we have the
* final state snapshot once it is ready.
*/
public void notifyEnd() {
endTime = System.currentTimeMillis();
assert beginTime <= endTime;
}
/**
* The elapsed time, counting only the time between
* {@link #notifyStart()} and {@link #notifyEnd()}.
*/
public long elapsed() {
if (endTime == 0L) {
// Still running.
return System.currentTimeMillis() - beginTime;
} else {
// Done.
final long elapsed = endTime - beginTime;
assert elapsed >= 0L;
return elapsed;
}
}
/**
* An attempt will be made to determine the interchange syntax using
* {@link RDFFormat}. If no determination can be made then the loader
* will presume that the files are in the format specified by this
* parameter (if any). Files whose format can not be determined will be
* logged as errors.
*/
final public RDFFormat fallback;
/**
* RDFParser options.
*/
final public RDFParserOptions parserOptions;
/**
* Delete files after successful processing when <code>true</code>.
*/
final public boolean deleteAfter;
final IStatementBufferFactory bufferFactory;
public IStatementBufferFactory<S> getBufferFactory() {
return bufferFactory;
}
/**
* #of told triples loaded into the database by successfully completed {@link SingleResourceReaderTask}s.
*/
final AtomicLong toldTriples = new AtomicLong(0);
/**
* Guess at the {@link RDFFormat}.
*
* @param filename
* Some filename.
*
* @return The {@link RDFFormat} -or- <code>null</code> iff
* {@link #fallback} is <code>null</code> and the no format
* was recognized for the <i>filename</i>
*/
public RDFFormat getRDFFormat(String filename) {
final RDFFormat rdfFormat = //
fallback == null //
? RDFFormat.forFileName(filename) //
: RDFFormat.forFileName(filename, fallback)//
;
return rdfFormat;
}
protected AbstractRDFTaskFactory(AbstractTripleStore db,
final RDFParserOptions parserOptions, final boolean deleteAfter,
RDFFormat fallback, IStatementBufferFactory bufferFactory) {
this.db = db;
this.parserOptions = parserOptions;
this.deleteAfter = deleteAfter;
this.fallback = fallback;
this.bufferFactory = bufferFactory;
}
public T newTask(final String resource) throws Exception {
if (log.isInfoEnabled())
log.info("resource=" + resource);
final RDFFormat rdfFormat = getRDFFormat( resource );
if (rdfFormat == null) {
throw new RuntimeException(
"Could not determine interchange syntax - skipping : file="
+ resource);
}
// Convert the file path to a URL.
final String baseURL;
try {
baseURL = new File(resource).toURL().toString();
} catch (MalformedURLException e) {
throw new RuntimeException("resource=" + resource);
}
return (T) new SingleResourceReaderTask(resource, baseURL, rdfFormat, parserOptions,
deleteAfter, bufferFactory, toldTriples);
}
}