package eu.fbk.knowledgestore.triplestore.virtuoso; import java.io.IOException; import java.sql.SQLException; import javax.annotation.Nullable; import javax.sql.ConnectionPoolDataSource; import com.google.common.base.MoreObjects; import com.google.common.base.Preconditions; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.openrdf.repository.RepositoryException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import virtuoso.jdbc4.VirtuosoConnectionPoolDataSource; import virtuoso.sesame2.driver.VirtuosoRepository; import virtuoso.sesame2.driver.VirtuosoRepositoryConnection; import eu.fbk.knowledgestore.runtime.DataCorruptedException; import eu.fbk.knowledgestore.triplestore.SynchronizedTripleStore; import eu.fbk.knowledgestore.triplestore.TripleStore; import eu.fbk.knowledgestore.triplestore.TripleTransaction; /** * A {@code TripleStore} implementation accessing an external OpenLink Virtuoso server. * <p> * This class stores and access triples in an external Virtuoso triple store, communicating to it * via the Virtuoso Sesame driver. Data modification is performed without relying on Virtuoso * transactions, in order to support bulk loading. When writing data in a read-write * {@code TripleTransaction}, a Virtuoso transaction is thus not created; a marker file is instead * stored and later removed upon successful 'commit' of the {@code TripleTransaction}; in case of * failure, the marker file remain on disk and signals that the triplestore is in a potentially * corrupted state, triggering repopulation starting from master data. Given this mechanism, it is * thus important for the component to be wrapped in a {@link SynchronizedTripleStore} that allows * at most a write transaction at a time, preventing simultaneous read transactions * (synchronization N:WX). Note that configuration, startup, shutdown and management in general of * the Virtuoso server are a responsibility of the user, with the {@code VirtuosoTripleStore} * component limiting to access Virtuoso for reading and writing triples. * </p> */ public final class VirtuosoTripleStore implements TripleStore { // see also the following resources for reference: // - https://newsreader.fbk.eu/trac/wiki/TripleStoreNotes // - http://docs.openlinksw.com/sesame/ (Virtuoso javadoc) // - http://www.openlinksw.com/vos/main/Main/VirtSesame2Provider private static final String DEFAULT_HOST = "localhost"; private static final int DEFAULT_PORT = 1111; private static final String DEFAULT_USERNAME = "dba"; private static final String DEFAULT_PASSWORD = "dba"; private static final boolean DEFAULT_POOLING = false; private static final int DEFAULT_BATCH_SIZE = 5000; private static final int DEFAULT_FETCH_SIZE = 200; private static final String DEFAULT_MARKER_FILENAME = "virtuoso.bulk.transaction"; private static final Logger LOGGER = LoggerFactory.getLogger(VirtuosoTripleStore.class); private final VirtuosoRepository virtuoso; private final FileSystem fileSystem; private final Path markerPath; /** * Creates a new instance based on the supplied most relevant properties. * * @param fileSystem * the file system where to store the marker file * @param host * the name / IP address of the host where virtuoso is running; if null defaults to * localhost * @param port * the port Virtuoso is listening to; if null defaults to 1111 * @param username * the username to login into Virtuoso; if null defaults to dba * @param password * the password to login into Virtuoso; if null default to dba */ public VirtuosoTripleStore(final FileSystem fileSystem, @Nullable final String host, @Nullable final Integer port, @Nullable final String username, @Nullable final String password) { this(fileSystem, host, port, username, password, null, null, null, null); } /** * Creates a new instance based the supplied complete set of configuration properties. * * @param fileSystem * the file system where to store the marker file * @param host * the name / IP address of the host where virtuoso is running; if null defaults to * localhost * @param port * the port Virtuoso is listening to; if null defaults to 1111 * @param username * the username to login into Virtuoso; if null defaults to dba * @param password * the password to login into Virtuoso; if null default to dba * @param pooling * true if connection pooling should be used (impact on performances is * negligible); if null defaults to false * @param batchSize * the number of added/removed triples to buffer on the client before sending them * to Virtuoso as a single chunk; if null defaults to 5000 * @param fetchSize * the number of results (solutions, triples, ...) to fetch from Virtuoso in a * single operation when query results are iterated; if null defaults to 200 * @param markerFilename * the name of the marker file created to signal Virtuoso is being used in a * non-transactional mode; if null defaults to virtuoso.bulk.transaction */ public VirtuosoTripleStore(final FileSystem fileSystem, @Nullable final String host, @Nullable final Integer port, @Nullable final String username, @Nullable final String password, @Nullable final Boolean pooling, @Nullable final Integer batchSize, @Nullable final Integer fetchSize, @Nullable final String markerFilename) { // Apply default values final String actualMarkerFilename = MoreObjects.firstNonNull(markerFilename, DEFAULT_MARKER_FILENAME); final String actualHost = MoreObjects.firstNonNull(host, DEFAULT_HOST); final int actualPort = MoreObjects.firstNonNull(port, DEFAULT_PORT); final String actualUsername = MoreObjects.firstNonNull(username, DEFAULT_USERNAME); final String actualPassword = MoreObjects.firstNonNull(password, DEFAULT_PASSWORD); final boolean actualPooling = MoreObjects.firstNonNull(pooling, DEFAULT_POOLING); final int actualBatchSize = MoreObjects.firstNonNull(batchSize, DEFAULT_BATCH_SIZE); final int actualFetchSize = MoreObjects.firstNonNull(fetchSize, DEFAULT_FETCH_SIZE); // Check parameters Preconditions.checkArgument(actualPort > 0 && actualPort < 65536); Preconditions.checkArgument(actualBatchSize > 0); Preconditions.checkArgument(actualFetchSize > 0); // Instantiate the VirtuosoRepository if (actualPooling) { // Pooling (see http://docs.openlinksw.com/virtuoso/VirtuosoDriverJDBC.html, section // 7.4.4.2) doesn't seem to affect performances. We keep this implementation: perhaps // things may change with future versions of Virtuoso. final VirtuosoConnectionPoolDataSource source = new VirtuosoConnectionPoolDataSource(); source.setServerName(actualHost); source.setPortNumber(actualPort); source.setUser(actualUsername); source.setPassword(actualPassword); this.virtuoso = new VirtuosoRepository((ConnectionPoolDataSource) source, "sesame:nil", true); } else { final String url = String.format("jdbc:virtuoso://%s:%d", actualHost, actualPort); this.virtuoso = new VirtuosoRepository(url, actualUsername, actualPassword, "sesame:nil", true); } // Further configure the VirtuosoRepository this.virtuoso.setBatchSize(actualBatchSize); this.virtuoso.setFetchSize(actualFetchSize); // Setup marker variables this.fileSystem = Preconditions.checkNotNull(fileSystem); this.markerPath = new Path(actualMarkerFilename).makeQualified(fileSystem); // Log relevant information LOGGER.info("VirtuosoTripleStore URL: {}", actualHost + ":" + actualPort); LOGGER.info("VirtuosoTripleStore marker: {}", this.markerPath); } @Override public void init() throws IOException { try { this.virtuoso.initialize(); // looking at Virtuoso code this seems a NOP } catch (final RepositoryException ex) { throw new IOException("Failed to initialize Virtuoso driver", ex); } } @Override public TripleTransaction begin(final boolean readOnly) throws DataCorruptedException, IOException { // Check if there was an interrupted transaction. if (existsTransactionMarker()) { throw new DataCorruptedException("The triple store performed a bulk operation " + "that didn't complete successfully."); } return new VirtuosoTripleTransaction(this, readOnly); } @Override public void reset() throws IOException { VirtuosoRepositoryConnection connection = null; try { connection = (VirtuosoRepositoryConnection) this.virtuoso.getConnection(); connection.getQuadStoreConnection().prepareCall("RDF_GLOBAL_RESET ()").execute(); } catch (final RepositoryException ex) { throw new IOException("Could not connect to Virtuoso server", ex); } catch (final SQLException e) { throw new IOException("Something went wrong while invoking stored procedure.", e); } finally { if (connection != null) { try { connection.close(); } catch (final RepositoryException re) { throw new IOException("Error while closing connection.", re); } } } final boolean removedTransactionMarker = removeTransactionMarker(); LOGGER.info("Database reset. Transaction marker removed: " + removedTransactionMarker); } @Override public void close() { // no need to terminate pending transactions: this is done externally try { this.virtuoso.shutDown(); // looking at Virtuoso code this should be a NOP } catch (final RepositoryException ex) { LOGGER.error("Failed to shutdown Virtuoso driver", ex); } } @Override public String toString() { return getClass().getSimpleName(); } VirtuosoRepository getVirtuoso() { return this.virtuoso; } /** * Checks if the transaction file exists. * * @return <code>true</code> if the marker is present, <code>false</code> otherwise. */ boolean existsTransactionMarker() throws IOException { // try { // return this.fileSystem.exists(this.markerPath) // } catch (final IOException ioe) { // throw new IOException("Error while checking virtuoso transaction file.", ioe); // } return false; // TODO disabled so not to depend on HDFS } /** * Adds the transaction file. * * @return <code>true</code> if the marker was not present, <code>false</code> otherwise. */ boolean addTransactionMarker() throws IOException { // try { // return this.fileSystem.createNewFile(this.markerPath); // } catch (final IOException ioe) { // throw new IOException("Error while adding virtuoso transaction file.", ioe); // } return false; // TODO disabled so not to depend on HDFS } /** * Removes the transaction file. * * @return <code>true</code> if the marker was present, <code>false</code> otherwise. */ boolean removeTransactionMarker() throws IOException { // try { // return this.fileSystem.delete(this.markerPath, false); // } catch (final IOException ioe) { // throw new IOException("Error while adding virtuoso transaction file.", ioe); // } return false; // TODO disabled so not to depend on HDFS } }