package com.bigdata.rdf.sparql.ast.service.history; import java.util.HashMap; import java.util.Map; import java.util.Properties; import org.apache.log4j.Logger; import com.bigdata.btree.IIndex; import com.bigdata.btree.IRangeQuery; import com.bigdata.btree.ITupleIterator; import com.bigdata.btree.keys.IKeyBuilder; import com.bigdata.btree.keys.KVO; import com.bigdata.journal.IIndexManager; import com.bigdata.rdf.changesets.ChangeAction; import com.bigdata.rdf.changesets.IChangeLog; import com.bigdata.rdf.changesets.IChangeRecord; import com.bigdata.rdf.internal.IV; import com.bigdata.rdf.sail.BigdataSail; import com.bigdata.rdf.sail.BigdataSail.BigdataSailConnection; import com.bigdata.rdf.sparql.ast.eval.CustomServiceFactoryBase; import com.bigdata.rdf.sparql.ast.service.BigdataNativeServiceOptions; import com.bigdata.rdf.sparql.ast.service.IServiceOptions; import com.bigdata.rdf.sparql.ast.service.ServiceCall; import com.bigdata.rdf.sparql.ast.service.ServiceCallCreateParams; import com.bigdata.rdf.spo.ISPO; import com.bigdata.rdf.spo.SPORelation; import com.bigdata.rdf.store.AbstractTripleStore; import com.bigdata.relation.AbstractRelation; /** * This service tracks KB updates via an {@link IChangeLog} and is responsible * for maintaining an ordered index over the assertions that have been added to * or removed from a KB instance. * * @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/607"> History * Service</a> * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a> */ public class HistoryServiceFactory extends CustomServiceFactoryBase { static private transient final Logger log = Logger .getLogger(HistoryServiceFactory.class); private final BigdataNativeServiceOptions serviceOptions; public HistoryServiceFactory() { serviceOptions = new BigdataNativeServiceOptions(); /* * TODO Review decision to make this a runFirst service. The rational is * that this service can only apply a very limited set of restrictions * during query, therefore it will often make sense to run it first. * However, the fromTime and toTime could be bound by the query and the * service can filter some things more efficiently internally than if we * generated a bunch of intermediate solutions for those things. */ serviceOptions.setRunFirst(true); } @Override public IServiceOptions getServiceOptions() { return serviceOptions; } /** * TODO Implement: Query should support an index scan of a date range with * optional filters on the (s,p,o,c) and add/remove flags. It might make * more sense to index in (POS) order rather than SPO order so we can more * efficiently scan a specific predicate within some date range using an * advancer pattern. * <p> * The restrictions that this service COULD apply to the index scan are: * <dl> * <dt>fromTime</dt> * <dd>Inclusive lower bound.</dd> * <dt>toTime</dt> * <dd>Exclusive upper bound (e.g., the first commit point NOT to be * reported).</dd> * <dt>P</dt> * <dd>The {@link IV} for the predicate (this is the first statement key * component in the history index for both triples and quads mode KBs)</dd> * </dl> * In addition, it could filter on the remaining fields (that is, skip over * tuples that fail a filter): * <dl> * <dt>S, O [, C]</dt> * <dd>The {@link IV} for the subject, object, and (for quads mode, the * context).</dd> * <dt>action</dt> * <dd>The {@link ChangeAction}.</dd> * <dt>type</dt> * <dd>The {@link StatementTypeEnum}.</dd> * </dl> */ @Override public ServiceCall<?> create(final ServiceCallCreateParams params) { throw new UnsupportedOperationException(); } /** * Register an {@link IChangeLog} listener that will manage the maintenance * of the describe cache. */ @Override public void startConnection(final BigdataSailConnection conn) { // final Properties properties = conn.getProperties(); final AbstractTripleStore tripleStore = conn.getTripleStore(); if (Boolean.valueOf(tripleStore.getProperty( BigdataSail.Options.HISTORY_SERVICE, BigdataSail.Options.DEFAULT_HISTORY_SERVICE))) { conn.addChangeLog(new HistoryChangeLogListener(conn)); } } /** * Handles maintenance of the history index. * * @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan * Thompson</a> */ static private class HistoryChangeLogListener implements IChangeLog { /** The vector size for updates. */ private static final int threshold = 10000; /** The connection. */ private final BigdataSailConnection conn; /** The KB instance. */ private final AbstractTripleStore tripleStore; /** * The head of the index is pruned on update to remove entries that are * older than this age (milliseconds). */ private final long minReleaseAge; /** * The first timestamp that WILL NOT be released. */ private final long releaseTime; /** * The timestamp that will be associated with the {@link IChangeLog} * events in the index. */ private volatile long revisionTimestamp; /** The set of IVs to be invalidated (lazily instantiated). */ private Map<ISPO, IChangeRecord> changeSet; /** The history index. */ private IIndex ndx = null; HistoryChangeLogListener(final BigdataSailConnection conn) { this.conn = conn; this.tripleStore = conn.getTripleStore(); this.revisionTimestamp = getRevisionTimestamp(tripleStore); this.minReleaseAge = Long .valueOf(tripleStore .getProperty( BigdataSail.Options.HISTORY_SERVICE_MIN_RELEASE_AGE, BigdataSail.Options.DEFAULT_HISTORY_SERVICE_MIN_RELEASE_AGE)); /* * TODO We should be able to reach the timestamp service from the * index manager. We want to use the globally agreed on clock for * the current time when making the decision to prune the head of * the index. */ releaseTime = (System.currentTimeMillis() - minReleaseAge) + 1; if (log.isInfoEnabled()) { log.info("minReleaseAge=" + minReleaseAge + ", releaseTime=" + releaseTime); } } /** * Return the revision time that will be used for all changes written * onto the history index by this {@link IChangeLog} listener. * * @see HistoryChangeRecord#getRevisionTime() */ static private long getRevisionTimestamp( final AbstractTripleStore tripleStore) { final long revisionTimestamp; final IIndexManager indexManager = tripleStore.getIndexManager(); revisionTimestamp = indexManager.getLastCommitTime() + 1; // if (indexManager instanceof IJournal) { // // revisionTimestamp = indexManager.getLastCommitTime() + 1; //// ((IJournal) indexManager) //// .getLocalTransactionManager().nextTimestamp(); // // } else if (indexManager instanceof IBigdataFederation) { // // try { // // revisionTimestamp = ((IBigdataFederation<?>) indexManager) // .getTransactionService().nextTimestamp(); // // } catch (IOException e) { // // throw new RuntimeException(e); // // } // // } else { // // throw new AssertionError("indexManager=" // + indexManager.getClass()); // // } return revisionTimestamp; } @Override public void transactionBegin() { this.revisionTimestamp = getRevisionTimestamp(tripleStore); } @Override public void transactionPrepare() { flush(); } /** * Vectors updates against the DESCRIBE cache. */ @Override public void changeEvent(final IChangeRecord record) { if (changeSet == null) { // Lazy instantiation. changeSet = new HashMap<ISPO, IChangeRecord>(); // Get the history index. ndx = getHistoryIndex(tripleStore); if (minReleaseAge > 0) { pruneHistory(); } } final ISPO spo = record.getStatement(); changeSet.put(spo, record); if (changeSet.size() > threshold) { flush(); } } /** * Return the pre-existing history index. * * @param tripleStore * The KB. * @return The history index and never <code>null</code>. * * @throws IllegalStateException * if the index was not configured / does not exist. */ private IIndex getHistoryIndex(final AbstractTripleStore tripleStore) { final SPORelation spoRelation = tripleStore.getSPORelation(); final String fqn = AbstractRelation.getFQN(spoRelation, SPORelation.NAME_HISTORY); ndx = spoRelation.getIndex(fqn); if (ndx == null) throw new IllegalStateException("Index not found: " + fqn); return ndx; } /** * Prune the head of the history index. * <p> * Note: Either this should be done as the first action or you must make * a note of the effective release time as the first action and then * apply that effective release time later. If you instead compute and * apply the effective release time later on, then there is the * possibility that you could prune out entries from the current * transaction! */ private void pruneHistory() { final IKeyBuilder keyBuilder = ndx.getIndexMetadata().getKeyBuilder().reset(); keyBuilder.append(releaseTime); final byte[] toKey = keyBuilder.getKey(); long n = 0; final ITupleIterator<?> itr = ndx .rangeIterator(null/* fromKey */, toKey, 0/* capacity */, IRangeQuery.REMOVEALL/* flags */, null/* filterCtor */); while(itr.hasNext()) { itr.next(); n++; } if (n > 0 && log.isInfoEnabled()) { log.info("pruned history: nremoved=" + n + ", minReleaseAge=" + minReleaseAge + ", releaseTime=" + releaseTime); } } @Override public void transactionCommited(long commitTime) { flush(); } @Override public void transactionAborted() { reset(); } /** * See {@link IChangeLog#close()}. */ @Override public void close() { reset(); } /** Reset the buffer. */ private void reset() { changeSet = null; } /** * Incremental flush. */ private void flush() { if (changeSet != null) { final int size = changeSet.size(); final KVO<HistoryChangeRecord>[] b = new KVO[size]; { // Extract the new change records into an array. final IChangeRecord[] a = changeSet.values().toArray( new IChangeRecord[size]); final HistoryIndexTupleSerializer tupSer = (HistoryIndexTupleSerializer) ndx .getIndexMetadata().getTupleSerializer(); // Wrap each one with the revision time. for (int i = 0; i < size; i++) { final IChangeRecord r = a[i]; // attach the revision time. final HistoryChangeRecord s = new HistoryChangeRecord( r, revisionTimestamp); final byte[] key = tupSer.serializeKey(s); final byte[] val = tupSer.serializeVal(s); b[i] = new KVO<HistoryChangeRecord>(key, val, s); } } // Sort to improve the index locality. java.util.Arrays.sort(b); // Write on the indices. for (int i = 0; i < size; i++) { final KVO<HistoryChangeRecord> r = b[i]; ndx.insert(r.key, r.val); } reset(); } } } // class HistoryChangeLogListener } // class HistoryServiceFactory