/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on May 21, 2007
*/
package com.bigdata.rdf.store;
import java.lang.ref.WeakReference;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.log4j.Logger;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.Statement;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.vocabulary.OWL;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.rio.rdfxml.RDFXMLParser;
import com.bigdata.bop.BOp;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IPredicate;
import com.bigdata.bop.IVariable;
import com.bigdata.bop.NV;
import com.bigdata.bop.Var;
import com.bigdata.bop.bindingSet.ListBindingSet;
import com.bigdata.bop.joinGraph.IEvaluationPlanFactory;
import com.bigdata.bop.joinGraph.fast.DefaultEvaluationPlanFactory2;
import com.bigdata.btree.AbstractBTree;
import com.bigdata.btree.BTree;
import com.bigdata.btree.IIndex;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.IndexSegment;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.journal.IConcurrencyManager;
import com.bigdata.journal.IIndexManager;
import com.bigdata.journal.IResourceLock;
import com.bigdata.journal.ITx;
import com.bigdata.journal.TimestampUtility;
import com.bigdata.rdf.ServiceProviderHook;
import com.bigdata.rdf.axioms.Axioms;
import com.bigdata.rdf.axioms.BaseAxioms;
import com.bigdata.rdf.axioms.NoAxioms;
import com.bigdata.rdf.axioms.OwlAxioms;
import com.bigdata.rdf.changesets.IChangeLog;
import com.bigdata.rdf.inf.IJustificationIterator;
import com.bigdata.rdf.inf.Justification;
import com.bigdata.rdf.inf.JustificationIterator;
import com.bigdata.rdf.internal.DefaultExtensionFactory;
import com.bigdata.rdf.internal.IDatatypeURIResolver;
import com.bigdata.rdf.internal.IExtension;
import com.bigdata.rdf.internal.IExtensionFactory;
import com.bigdata.rdf.internal.IInlineURIFactory;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.InlineURIFactory;
import com.bigdata.rdf.internal.NotMaterializedException;
import com.bigdata.rdf.internal.VTE;
import com.bigdata.rdf.internal.constraints.RangeBOp;
import com.bigdata.rdf.internal.impl.BlobIV;
import com.bigdata.rdf.internal.impl.extensions.XSDStringExtension;
import com.bigdata.rdf.lexicon.BigdataSubjectCentricFullTextIndex;
import com.bigdata.rdf.lexicon.BigdataValueCentricFullTextIndex;
import com.bigdata.rdf.lexicon.ITermIndexCodes;
import com.bigdata.rdf.lexicon.ITextIndexer;
import com.bigdata.rdf.lexicon.IValueCentricTextIndexer;
import com.bigdata.rdf.lexicon.LexiconKeyOrder;
import com.bigdata.rdf.lexicon.LexiconRelation;
import com.bigdata.rdf.lexicon.TermIdEncoder;
import com.bigdata.rdf.model.BigdataResource;
import com.bigdata.rdf.model.BigdataStatement;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueFactoryImpl;
import com.bigdata.rdf.rio.IStatementBuffer;
import com.bigdata.rdf.rio.StatementBuffer;
import com.bigdata.rdf.rules.BaseClosure;
import com.bigdata.rdf.rules.FastClosure;
import com.bigdata.rdf.rules.FullClosure;
import com.bigdata.rdf.rules.InferenceEngine;
import com.bigdata.rdf.rules.MatchRule;
import com.bigdata.rdf.rules.RDFJoinNexusFactory;
import com.bigdata.rdf.rules.RuleContextEnum;
import com.bigdata.rdf.sail.RDRHistory;
import com.bigdata.rdf.sparql.ast.optimizers.ASTBottomUpOptimizer;
import com.bigdata.rdf.spo.BulkCompleteConverter;
import com.bigdata.rdf.spo.BulkFilterConverter;
import com.bigdata.rdf.spo.ExplicitSPOFilter;
import com.bigdata.rdf.spo.ISPO;
import com.bigdata.rdf.spo.JustificationWriter;
import com.bigdata.rdf.spo.SPO;
import com.bigdata.rdf.spo.SPOKeyOrder;
import com.bigdata.rdf.spo.SPOPredicate;
import com.bigdata.rdf.spo.SPORelation;
import com.bigdata.rdf.spo.SPOTupleSerializer;
import com.bigdata.rdf.spo.StatementWriter;
import com.bigdata.rdf.spo.XXXCShardSplitHandler;
import com.bigdata.rdf.vocab.BaseVocabulary;
import com.bigdata.rdf.vocab.NoVocabulary;
import com.bigdata.rdf.vocab.Vocabulary;
import com.bigdata.rdf.vocab.VocabularyDecl;
import com.bigdata.rdf.vocab.core.BigdataCoreVocabulary_v20160317;
import com.bigdata.relation.AbstractResource;
import com.bigdata.relation.IDatabase;
import com.bigdata.relation.IMutableDatabase;
import com.bigdata.relation.IRelation;
import com.bigdata.relation.RelationSchema;
import com.bigdata.relation.accesspath.ElementFilter;
import com.bigdata.relation.accesspath.EmptyAccessPath;
import com.bigdata.relation.accesspath.IAccessPath;
import com.bigdata.relation.accesspath.IElementFilter;
import com.bigdata.relation.locator.DefaultResourceLocator;
import com.bigdata.relation.locator.IResourceLocator;
import com.bigdata.relation.rule.IProgram;
import com.bigdata.relation.rule.IRule;
import com.bigdata.relation.rule.Program;
import com.bigdata.relation.rule.Rule;
import com.bigdata.relation.rule.eval.ActionEnum;
import com.bigdata.relation.rule.eval.DefaultRuleTaskFactory;
import com.bigdata.relation.rule.eval.IJoinNexus;
import com.bigdata.relation.rule.eval.IJoinNexusFactory;
import com.bigdata.relation.rule.eval.IRuleTaskFactory;
import com.bigdata.relation.rule.eval.ISolution;
import com.bigdata.search.FullTextIndex;
import com.bigdata.service.IBigdataFederation;
import com.bigdata.service.geospatial.GeoSpatialConfig;
import com.bigdata.service.geospatial.GeoSpatialConfigOptions;
import com.bigdata.sparse.GlobalRowStoreUtil;
import com.bigdata.striterator.ChunkedArrayIterator;
import com.bigdata.striterator.ChunkedConvertingIterator;
import com.bigdata.striterator.ChunkedWrappedIterator;
import com.bigdata.striterator.DelegateChunkedIterator;
import com.bigdata.striterator.EmptyChunkedIterator;
import com.bigdata.striterator.IChunkedIterator;
import com.bigdata.striterator.IChunkedOrderedIterator;
import com.bigdata.striterator.IKeyOrder;
import com.bigdata.util.BytesUtil;
import com.bigdata.util.InnerCause;
import com.bigdata.util.PropertyUtil;
import cutthecrap.utils.striterators.ICloseableIterator;
/**
* Abstract base class that implements logic for the {@link ITripleStore}
* interface that is invariant across the choice of the backing store.
* <p>
* By default, this class supports RDFS inference plus optional support for
* <code>owl:sameAs</code>, <code>owl:equivalentProperty</code>, and
* <code>owl:equivalentClass</code>. The {@link IRule}s are declarative, and
* it is easy to write new rules. Those {@link IRule}s can be introduced using
* custom {@link BaseClosure} implementations. See {@link Options#CLOSURE_CLASS}.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
abstract public class AbstractTripleStore extends
AbstractResource<IDatabase<AbstractTripleStore>> implements ITripleStore,
IRawTripleStore, IMutableDatabase<AbstractTripleStore> {
final static private Logger log = Logger.getLogger(AbstractTripleStore.class);
final protected static boolean INFO = log.isInfoEnabled();
final protected static boolean DEBUG = log.isDebugEnabled();
/*
* TODO I have held back this change until I can test it on a cluster. I am
* not sure what effect this would have. You can always force registration
* of the embedded services using "ServiceRegistry.getInstance()".
*/
// /**
// * Make sure that any services are registered.
// */
// static {
//
// ServiceRegistry.getInstance();
//
// }
/**
* This is used to conditionally enable the logic to retract justifications
* when the corresponding statements is retracted.
*/
final private boolean justify;
/**
* True iff justification chains are being recorded for entailments and used
* to support truth maintenance.
* <P>
* Note: This is the same value that is reported by {@link SPORelation#justify}.
*
* @see Options#JUSTIFY
*/
final public boolean isJustify() {
return justify;
}
/**
* This is used to conditionally disable the lexicon support, principally in
* conjunction with a {@link TempTripleStore}.
*/
final protected boolean lexicon;
/**
* The #of internal values in the key for a statement index (3 is a triple
* store, 4 is a quad store).
*
* @see Options#QUADS
*/
final private int spoKeyArity;
/**
* <code>true</code> iff this is a quad store.
*/
final private boolean quads;
/**
* Indicate whether this is a triple or a quad store (3 is a triple store, 4
* is a quad store).
*
* @return The #of elements in the key for the {@link SPORelation} (3 or 4).
*/
final public int getSPOKeyArity() {
return spoKeyArity;
}
/**
* Return <code>true</code> iff this is a quad store.
*
* @see Options#QUADS
*/
@Override
final public boolean isQuads() {
return quads;
}
/**
* When <code>true</code> the database will support statement identifiers.
* A statement identifier is a unique 64-bit integer taken from the same
* space as the term identifiers and which uniquely identifiers a statement
* in the database regardless of the graph in which that statement appears.
* The purpose of statement identifiers is to allow statements about
* statements without recourse to RDF style reification.
*
* @see Options#STATEMENT_IDENTIFIERS
*/
final public boolean isStatementIdentifiers() {
return statementIdentifiers;
}
final private boolean statementIdentifiers;
/**
* The {@link Axioms} class.
*
* @see com.bigdata.rdf.store.AbstractTripleStore.Options#AXIOMS_CLASS
*/
final private Class<? extends BaseAxioms> axiomClass;
/**
* The {@link Vocabulary} class.
*
* @see Options#VOCABULARY_CLASS
*/
final private Class<? extends BaseVocabulary> vocabularyClass;
/**
* The {@link BaseClosure} class.
*
* @see Options#CLOSURE_CLASS
*/
final private Class<? extends BaseClosure> closureClass;
/**
* @see Options#CONSTRAIN_XXXC_SHARDS
*/
final private boolean constrainXXXCShards;
/**
* @see Options#BOTTOM_UP_EVALUATION
*/
final private boolean bottomUpEvaluation;
/**
* @see Options#COMPUTE_CLOSURE_FOR_SIDS
*/
final private boolean computeClosureForSids;
/**
* The {@link RDRHistory} class.
*
* @see Options#RDR_HISTORY_CLASS
*/
final private Class<? extends RDRHistory> rdrHistoryClass;
/**
* Return an instance of the class that is used to compute the closure of
* the database.
*/
public BaseClosure getClosureInstance() {
try {
final Constructor<? extends BaseClosure> ctor = closureClass
.getConstructor(new Class[] { AbstractTripleStore.class });
return ctor.newInstance(this);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Return an instance of the {@link RDRHistory} class.
*/
public RDRHistory getRDRHistoryInstance() {
if (!isRDRHistory()) {
throw new RuntimeException("RDR history not enabled");
}
try {
final Constructor<? extends RDRHistory> ctor = rdrHistoryClass
.getConstructor(new Class[] { AbstractTripleStore.class });
return ctor.newInstance(this);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Return true if the RDR history feature is enabled (experimental).
*/
public boolean isRDRHistory() {
return rdrHistoryClass != null;
}
/**
* Return <code>true</code> iff the fully bound statement is an axiom.
*
* @param s
* The internal value ({@link IV}) for the subject position.
* @param p
* The internal value ({@link IV}) for the predicate position.
* @param o
* The internal value ({@link IV}) for the object position.
*/
public boolean isAxiom(final IV s, final IV p, final IV o) {
return getAxioms().isAxiom(s, p, o);
}
/**
* When <code>true</code> the database will support statement identifiers.
* <p>
* A statement identifier is a unique 64-bit integer taken from the same
* space as the term identifiers and which uniquely identifiers a statement
* in the database regardless of the graph in which that statement appears.
* The purpose of statement identifiers is to allow statements about
* statements without recourse to RDF style reification.
* <p>
* Only explicit statements will have a statement identifier. Statements
* made about statements using their statement identifiers will
* automatically be retracted if a statement they describe is retracted (a
* micro form of truth maintenance that is always enabled when statement
* identifiers are enabled).
*/
public boolean getStatementIdentifiers() {
return statementIdentifiers;
}
/**
* Returns <code>true</code> when the database is in inline terms mode. In
* this mode, certain types of terms (numerics in particular) are inlined
* into the statement indices rather than being mapped to and from term
* identifiers in the lexicon.
*/
public boolean isInlineLiterals() {
return getLexiconRelation().isInlineLiterals();
}
/**
* @see Options#CONSTRAIN_XXXC_SHARDS
*/
final public boolean isConstrainXXXCShards() {
return constrainXXXCShards;
}
/**
* @see Options#CONSTRAIN_XXXC_SHARDS
*/
final public boolean isBottomUpEvaluation() {
return bottomUpEvaluation;
}
/**
* The {@link BigdataValueFactoryImpl} for namespace of the
* {@link LexiconRelation} associated with this {@link AbstractTripleStore}.
*
* @throws UnsupportedOperationException
* if there is no associated lexicon.
*
* @todo allow a {@link TempTripleStore} to specify another db's lexicon?
*/
final public BigdataValueFactory getValueFactory() {
if (valueFactory == null) {
if (!lexicon) {
throw new UnsupportedOperationException();
}
synchronized (this) {
if (valueFactory == null) {
valueFactory = getLexiconRelation().getValueFactory();
}
}
}
return valueFactory;
}
private volatile BigdataValueFactory valueFactory;
/*
* IDatabase, ILocatableResource
*/
public Iterator<IRelation> relations() {
return Collections.unmodifiableList(Arrays.asList(new IRelation[] { //
getSPORelation(), //
getLexiconRelation() //
})).iterator();
}
/**
* Configuration options.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*/
public static interface Options extends AbstractResource.Options,
InferenceEngine.Options, com.bigdata.journal.Options,
KeyBuilder.Options, DataLoader.Options, FullTextIndex.Options,
GeoSpatialConfigOptions.Options {
/**
* Boolean option (default <code>true</code>) enables support for the
* lexicon (the forward and backward term indices). When
* <code>false</code>, the lexicon indices are not registered. This can
* be safely turned off for the {@link TempTripleStore} when only the
* statement indices are to be used.
* <p>
* You can control how the triple store will interpret the RDF URIs, and
* literals using the {@link KeyBuilder.Options}. For example:
*
* <pre>
* // Force ASCII key comparisons.
* properties.setProperty(Options.COLLATOR, CollatorEnum.ASCII.toString());
* </pre>
*
* or
*
* <pre>
* // Force identical unicode comparisons (assuming default COLLATOR setting).
* properties.setProperty(Options.STRENGTH, StrengthEnum.IDENTICAL.toString());
* </pre>
*
* @see LexiconRelation
* @see KeyBuilder.Options
*/
String LEXICON = AbstractTripleStore.class.getName() + ".lexicon";
String DEFAULT_LEXICON = "true";
/**
* Boolean option (default {@value #DEFAULT_STORE_BLANK_NODES}) controls
* whether or not we store blank nodes in the forward mapping of the
* lexicon (this is also known as the "told bnodes" mode).
* <p>
* When <code>false</code> blank node semantics are enforced, you CAN
* NOT unify blank nodes based on their IDs in the lexicon, and
* {@link AbstractTripleStore#getBNodeCount()} is disabled.
* <p>
* When <code>true</code>, you are able to violate blank node semantics
* and force unification of blank nodes by assigning the ID from the RDF
* interchange syntax to the blank node. RIO has an option that will
* allow you to do this. When this option is also <code>true</code>,
* then you will in fact be able to resolve pre-existing blank nodes
* using their identifiers. The tradeoff is time and space : if you have
* a LOT of document using blank nodes then you might want to disable
* this option in order to spend less time writing the forward lexicon
* index (and it will also take up less space).
*/
String STORE_BLANK_NODES = AbstractTripleStore.class.getName() + ".storeBlankNodes";
String DEFAULT_STORE_BLANK_NODES = "false";
/**
* Option effects how evenly distributed the assigned term identifiers
* which has a pronounced effect on the ID2TERM and statement indices
* for <em>scale-out deployments</em>. The default for a scale-out
* deployment is {@value #DEFAULT_TERMID_BITS_TO_REVERSE}. This option
* is ignored for a standalone deployment.
* <p>
* For the scale-out triple store, the term identifiers are formed by
* placing the index partition identifier in the high word and the local
* counter for the index partition into the low word. The effect of this
* option is to cause the low N bits of the local counter value to be
* reversed and written into the high N bits of the term identifier (the
* other bits are shifted down to make room for this). Regardless of the
* configured value for this option, all bits of the both the partition
* identifier and the local counter are preserved.
* <p>
* Normally, the low bits of a sequential counter will vary the most
* rapidly. By reversing the localCounter and placing some of the
* reversed bits into the high bits of the term identifier we cause the
* term identifiers to be uniformly (but not randomly) distributed. This
* is much like using hash function without collisions or a random
* number generator that does not produce duplicates. When the value of
* this option is ZERO (0), no bits are reversed so the high bits of the
* term identifiers directly reflect the partition identifier and the
* low bits are assigned sequentially by the local counter within each
* TERM2ID index partition.
* <p>
* The use of a non-zero value for this option can easily cause the
* write load on the index partitions for the ID2TERM and statement
* indices to be perfectly balanced. However, using too many bits has
* some negative consequences on locality of operations <em>within</em>
* an index partition (since the distribution of the keys be
* approximately uniform distribution, leading to poor cache
* performance, more copy-on-write for the B+Tree, and both more IO and
* faster growth in the journal for writes (since there will be more
* leaves made dirty on average by each bulk write)).
* <p>
* The use of a non-zero value for this option also directly effects the
* degree of scatter for bulk read or write operations. As more bits are
* used, it becomes increasingly likely that each bulk read or write
* operation will on average touch all index partitions. This is because
* #of low order local counter bits reversed and rotated into the high
* bits of the term identifier places an approximate bound on the #of
* index partitions of the ID2TERM or a statement index that will be
* touched by a scattered read or write. However, that number will
* continue to grow slowly over time as new partition identifiers are
* introduced (the partition identifiers appear next in the encoded term
* identifier and therefore determine the degree of locality or scatter
* once the quickly varying high bits have had their say).
* <p>
* The "right" value really depends on the expected scale of the
* knowledge base. If you estimate that you will have 50 x 200M index
* partitions for the statement indices, then SQRT(50) =~ 7 would be a
* good choice.
*
* @see TermIdEncoder
*/
String TERMID_BITS_TO_REVERSE = AbstractTripleStore.class.getName() + ".termIdBitsToReverse";
String DEFAULT_TERMID_BITS_TO_REVERSE = "6";
/**
* Integer option whose value is the capacity of the term cache. This
* cache provides fast lookup of frequently used RDF {@link Value}s by
* their term identifier.
*/
String TERM_CACHE_CAPACITY = AbstractTripleStore.class.getName()
+ ".termCache.capacity";
String DEFAULT_TERM_CACHE_CAPACITY = "10000";//"50000";
/**
* The name of the class that will establish the pre-defined
* {@link Vocabulary} for the database (default
* {@value #DEFAULT_VOCABULARY_CLASS}). The class MUST extend
* {@link BaseVocabulary}. This option is ignored if the lexicon is
* disabled.
* <p>
* The {@link Vocabulary} is initialized by
* {@link AbstractTripleStore#create()}. Its state is stored in the
* global row store under the {@link TripleStoreSchema#VOCABULARY}
* property. The named {@link Vocabulary} class will be used to
* instantiate a consistent vocabulary mapping each time a view of the
* {@link AbstractTripleStore} is materialized. This depends on the
* named {@link Vocabulary} class having a stable behavior. Thus the
* {@link BaseVocabulary} class builds in protection against version
* changes and will refuse to materialize a view of the
* {@link AbstractTripleStore} if the {@link Vocabulary} would not be
* consistent.
* <p>
* The {@link BaseVocabulary} class is designed for easy and modular
* extension. You can trivially define a concrete instance of this class
* which provides any (reasonable) number of {@link VocabularyDecl}
* instances. Each {@link VocabularyDecl} declares the namespace(s) and
* the {@link URI}s for some ontology. A number of such classes have
* been created and are combined by the
* {@link #DEFAULT_VOCABULARY_CLASS}. You can create your own
* {@link VocabularyDecl} classes and combine them within your own
* {@link Vocabulary}, but it must extend {@link BaseVocabulary}.
* <p>
* Note: There is an interaction between the {@link Vocabulary} and
* {@link IExtension}s. The {@link IDatatypeURIResolver} requires that
* URIs used by an {@link IExtension} are pre-declared by the
* {@link Vocabulary}.
*/
String VOCABULARY_CLASS = AbstractTripleStore.class.getName() + ".vocabularyClass";
/**
* Note: The default {@link Vocabulary} class may be changed from time
* to time as additional {@link VocabularyDecl} are created and bundled
* into a new default {@link Vocabulary}. However, a deployed concrete
* instance of the default {@link Vocabulary} class MUST NOT be modified
* since that could introduce inconsistencies into the URI to IV mapping
* which it provides for {@link AbstractTripleStore}s created using that
* class.
*/
String DEFAULT_VOCABULARY_CLASS = BigdataCoreVocabulary_v20160317.class.getName();
/**
* The {@link Axioms} model that will be used (default
* {@value Options#DEFAULT_AXIOMS_CLASS}). The value is the name of the
* class that will be instantiated by
* {@link AbstractTripleStore#create()}. The class must extend
* {@link BaseAxioms}. This option is ignored if the lexicon is
* disabled. Use {@link NoAxioms} to disable inference.
*/
String AXIOMS_CLASS = AbstractTripleStore.class.getName() + ".axiomsClass";
String DEFAULT_AXIOMS_CLASS = OwlAxioms.class.getName();
/**
* The name of the {@link BaseClosure} class that will be used (default
* {@value Options#DEFAULT_CLOSURE_CLASS}). The value is the name of
* the class that will be used to generate the {@link Program} that
* computes the closure of the database. The class must extend
* {@link BaseClosure}. This option is ignored if the inference is
* disabled.
* <p>
* There are two pre-defined "programs" used to compute and maintain
* closure. The {@link FullClosure} program is a simple fix point of the
* RDFS+ entailments, except for the
* <code> foo rdf:type rdfs:Resource</code> entailments which are
* normally generated at query time. The {@link FastClosure} program
* breaks nearly all cycles in the RDFS rules and runs nearly entirely
* as a sequence of {@link IRule}s, including several custom rules.
* <p>
* It is far easier to modify the {@link FullClosure} program since any
* new rules can just be dropped into place. Modifying the
* {@link FastClosure} program requires careful consideration of the
* entailments computed at each stage in order to determine where a new
* rule would fit in.
* <p>
* Note: When support for <code>owl:sameAs</code>, etc. processing is
* enabled, some of the entailments are computed by rules run during
* forward closure and some of the entailments are computed by rules run
* at query time. Both {@link FastClosure} and {@link FullClosure} are
* aware of this and handle it correctly (e.g., as configured).
*/
String CLOSURE_CLASS = AbstractTripleStore.class.getName() + ".closureClass";
String DEFAULT_CLOSURE_CLASS = FastClosure.class.getName();
/**
* Boolean option (default <code>false</code>) disables all but a
* single statement index (aka access path).
* <p>
* Note: The main purpose of the option is to make it possible to turn
* off the other access paths for special bulk load purposes. The use of
* this option is NOT compatible with either the application of the
* {@link InferenceEngine} or high-level query.
* <p>
* Note: You may want to explicitly enable or disable the bloom filter
* for this. Normally a single access path (SPO) is used for a temporary
* store. Temporary stores tend to be smaller, so if you will also be
* doing point tests on the temporary store then you probably want to
* use the {@link #BLOOM_FILTER}. Otherwise it may be turned off to
* realize some (minimal) performance gain.
*/
String ONE_ACCESS_PATH = AbstractTripleStore.class.getName() + ".oneAccessPath";
String DEFAULT_ONE_ACCESS_PATH = "false";
/**
* Optional property controls whether or not a bloom filter is
* maintained for the SPO statement index. The bloom filter is effective
* up to ~ 2M entries per index (partition). For scale-up, the bloom
* filter is automatically disabled after its error rate would be too
* large given the #of index entries. For scale-out, as the index grows
* we keep splitting it into more and more index partitions, and those
* index partitions are comprised of both views of one or more
* {@link AbstractBTree}s. While the mutable {@link BTree}s might
* occasionally grow too large to support a bloom filter, data is
* periodically migrated onto immutable {@link IndexSegment}s which have
* perfect fit bloom filters. This means that the bloom filter
* scales-out, but not up.
* <p>
* Note: The SPO access path is used any time we have an access path
* that corresponds to a point test. Therefore this is the only index
* for which it makes sense to maintain a bloom filter.
* <p>
* If you are going to do a lot of small commits, then please DO NOT
* enable the bloom filter for the {@link AbstractTripleStore}. The
* bloom filter takes 1 MB each time you commit on the SPO/SPOC index.
* The bloom filter limited value in any case for scale-up since its
* nominal error rate will be exceeded at ~2M triples. This concern does
* not apply for scale-out, where the bloom filter is always a good
* idea.
*
* @see IndexMetadata#getBloomFilterFactory()
*
* @todo Review the various temp triple stores that are created and see
* which of them would benefit from the SPO bloom filter (TM,
* backchainers, SIDs fixed point, etc).
*/
String BLOOM_FILTER = AbstractTripleStore.class.getName() + ".bloomFilter";
String DEFAULT_BLOOM_FILTER = "true";
/**
* When <code>true</code> (default {@value Options#DEFAULT_JUSTIFY}),
* proof chains for entailments generated by forward chaining are stored
* in the database. This option is required for truth maintenance when
* retracting assertion.
* <p>
* If you will not be retracting statements from the database then you
* can specify <code>false</code> for a significant performance boost
* during writes and a smaller profile on the disk.
* <p>
* This option does not effect query performance since the
* justifications are maintained in a distinct index and are only used
* when retracting assertions.
*/
String JUSTIFY = AbstractTripleStore.class.getName() + ".justify";
String DEFAULT_JUSTIFY = "true";
/**
* Boolean option (default {@value #DEFAULT_STATEMENT_IDENTIFIERS})
* enables support for statement identifiers. A statement identifier is
* unique identifier for a <em>triple</em> in the database. Statement
* identifiers may be used to make statements about statements without
* using RDF style reification.
* <p>
* Statement identifiers are assigned consistently when {@link Statement}
* s are mapped into the database. This is done using an extension of
* the <code>term:id</code> index to map the statement as if it were a
* term onto a unique statement identifier. While the statement
* identifier is assigned canonically by the <code>term:id</code> index,
* it is stored redundantly in the value position for each of the
* statement indices. While the statement identifier is, in fact, a term
* identifier, the reverse mapping is NOT stored in the id:term index
* and you CAN NOT translate from a statement identifier back to the
* original statement.
* <p>
* bigdata supports an RDF/XML interchange extension for the interchange
* of <em>triples</em> with statement identifiers that may be used as
* blank nodes to make statements about statements. See {@link BD} and
* {@link RDFXMLParser}.
* <p>
* Statement identifiers add some latency when loading data since it
* increases the size of the writes on the terms index (and also its
* space requirements since all statements are also replicated in the
* terms index). However, if you are doing concurrent data load then the
* added latency is nicely offset by the parallelism.
* <p>
* The main benefit for statement identifiers is that they provide a
* mechanism for statement level provenance. This is critical for some
* applications.
* <p>
* An alternative approach to provenance within RDF is to use the
* concatenation of the subject, predicate, and object (or a hash of
* their concatenation) as the value in the context position. While this
* approach can be used with any quad store, it is less transparent and
* requires <em>twice</em> the amount of data on the disk since you need
* an additional three statement indices to cover the quad access paths.
* <p>
* The provenance mode (SIDs) IS NOT compatible with the {@link #QUADS}
* mode. You may use either one, but not both in the same KB instance.
* <p>
* There are examples for using the provenance mode online.
*/
String STATEMENT_IDENTIFIERS = AbstractTripleStore.class.getName() + ".statementIdentifiers";
String DEFAULT_STATEMENT_IDENTIFIERS = "false";
/**
* Boolean option determines whether the KB instance will be a quad
* store or a triple store. For a triple store only, the
* {@link #STATEMENT_IDENTIFIERS} option determines whether or not the
* provenance mode is enabled.
*/
String QUADS = AbstractTripleStore.class.getName() + ".quads";
String DEFAULT_QUADS = "false";
/**
* Set up database in triples mode, no provenance. This is equivalent
* to setting the following options:
* <p>
* <ul>
* <li>{@link AbstractTripleStore.Options#QUADS}
* = <code>false</code></li>
* <li>{@link AbstractTripleStore.Options#STATEMENT_IDENTIFIERS}
* = <code>false</code></li>
* </ul>
*/
String TRIPLES_MODE = AbstractTripleStore.class.getName() + ".triplesMode";
String DEFAULT_TRIPLES_MODE = "false";
/**
* Set up database in triples mode with provenance. This is equivalent
* to setting the following options:
* <p>
* <ul>
* <li>{@link AbstractTripleStore.Options#QUADS}
* = <code>false</code></li>
* <li>{@link AbstractTripleStore.Options#STATEMENT_IDENTIFIERS}
* = <code>true</code></li>
* </ul>
*/
String TRIPLES_MODE_WITH_PROVENANCE = AbstractTripleStore.class
.getName() + ".triplesModeWithProvenance";
String DEFAULT_TRIPLES_MODE_WITH_PROVENANCE = "false";
/**
* Set up database in quads mode. Quads mode means no provenance,
* no inference. This is equivalent to setting the following options:
* <p>
* <ul>
* <li>{@link AbstractTripleStore.Options#QUADS}
* = <code>true</code></li>
* <li>{@link AbstractTripleStore.Options#STATEMENT_IDENTIFIERS}
* = <code>false</code></li>
* <li>{@link AbstractTripleStore.Options#AXIOMS_CLASS}
* = <code>com.bigdata.rdf.store.AbstractTripleStore.NoAxioms</code></li>
* </ul>
*/
String QUADS_MODE = AbstractTripleStore.class.getName() + ".quadsMode";
String DEFAULT_QUADS_MODE = "false";
/**
* The name of the {@link BigdataValueFactory} class. The implementation
* MUST declare a method with the following signature which will be used
* as a canonicalizing factory for the instances of that class.
*
* <pre>
* public static BigdataValueFactory getInstance(final String namespace)
* </pre>
*
* @see #DEFAULT_VALUE_FACTORY_CLASS
*/
String VALUE_FACTORY_CLASS = AbstractTripleStore.class.getName()
+ ".valueFactoryClass";
String DEFAULT_VALUE_FACTORY_CLASS = BigdataValueFactoryImpl.class
.getName();
/*
* Full text index options.
*/
/**
* Boolean option (default {@value #DEFAULT_TEXT_INDEX}) enables support
* for a full text index that may be used to lookup literals by tokens
* found in the text of those literals.
*
* @see #TEXT_INDEXER_CLASS
* @see #TEXT_INDEX_DATATYPE_LITERALS
* @see #INLINE_TEXT_LITERALS
* @see #MAX_INLINE_TEXT_LENGTH
*/
String TEXT_INDEX = AbstractTripleStore.class.getName() + ".textIndex";
String DEFAULT_TEXT_INDEX = "true";
/**
* Boolean option (default <code>true</code>) enables support for a full
* text index that may be used to lookup literals by tokens found in the
* text of those literals.
*
* @see #TEXT_INDEXER_CLASS
* @see #TEXT_INDEX_DATATYPE_LITERALS
* @see #INLINE_TEXT_LITERALS
* @see #MAX_INLINE_TEXT_LENGTH
* @deprecated Feature was never completed due to scalability issues.
* See BZLG-1548, BLZG-563.
*/
@Deprecated
String SUBJECT_CENTRIC_TEXT_INDEX = AbstractTripleStore.class.getName() + ".subjectCentricTextIndex";
@Deprecated
String DEFAULT_SUBJECT_CENTRIC_TEXT_INDEX = "false";
/**
* Boolean option enables support for a full text index that may be used
* to lookup datatype literals by tokens found in the text of those
* literals (default {@value #DEFAULT_TEXT_INDEX_DATATYPE_LITERALS}).
* Enabling of this option will cause ALL datatype literals to be presented
* to the full text indexer, including <code>xsd:string</code>,
* <code>xsd:int</code>, etc. If disabled, only plain, <code>xsd:string</code>
* and <code>rdf:langString</code> (language tagged literals), will be indexed.<br>
* Note: literals are text indexed not depending on inlining configuration
* since <a href="https://jira.blazegraph.com/browse/BLZG-1928"> BLZG-1928 </a>
*/
String TEXT_INDEX_DATATYPE_LITERALS = AbstractTripleStore.class
.getName() + ".textIndex.datatypeLiterals";
String DEFAULT_TEXT_INDEX_DATATYPE_LITERALS = "true";
/**
* List of datatypes, which will be put into full text index even if
* {@link #TEXT_INDEX_DATATYPE_LITERALS} is not enabled
* (default {@value #DEFAULT_DATATYPES_TO_TEXT_INDEX}).
*/
String DATATYPES_TO_TEXT_INDEX = AbstractTripleStore.class
.getName() + ".textIndex.datatypes";
String DEFAULT_DATATYPES_TO_TEXT_INDEX = "";
/**
* The name of the {@link IValueCentricTextIndexer} class. The implementation MUST
* declare a method with the following signature which will be used to
* locate instances of that class.
*
* <pre>
* static public ITextIndexer getInstance(final IIndexManager indexManager,
* final String namespace, final Long timestamp,
* final Properties properties)
* </pre>
*
* @see #DEFAULT_TEXT_INDEXER_CLASS
*/
String TEXT_INDEXER_CLASS = AbstractTripleStore.class.getName()
+ ".textIndexerClass";
String DEFAULT_TEXT_INDEXER_CLASS = BigdataValueCentricFullTextIndex.class
.getName();
/**
* The name of the {@link ITextIndexer} class. The implementation MUST
* declare a method with the following signature which will be used to
* locate instances of that class.
*
* <pre>
* static public ITextIndexer getInstance(final IIndexManager indexManager,
* final String namespace, final Long timestamp,
* final Properties properties)
* </pre>
*
* @see #DEFAULT_TEXT_INDEXER_CLASS
*/
String SUBJECT_CENTRIC_TEXT_INDEXER_CLASS = AbstractTripleStore.class.getName()
+ ".subjectCentricTextIndexerClass";
String DEFAULT_SUBJECT_CENTRIC_TEXT_INDEXER_CLASS = BigdataSubjectCentricFullTextIndex.class
.getName();
/*
* Inlining options.
*/
/**
* The threshold (in character length) at which an RDF {@link Value}
* will be inserted into the {@link LexiconKeyOrder#BLOBS} index rather
* than the {@link LexiconKeyOrder#TERM2ID} and
* {@link LexiconKeyOrder#ID2TERM} indices (default
* {@value #DEFAULT_BLOBS_THRESHOLD}).
* <p>
* The {@link LexiconKeyOrder#BLOBS} index is capable of storing very
* large literals but has more IO scatter due to the hash code component
* of the key for that index. Therefore smaller RDF {@link Value}s
* should be inserted into the {@link LexiconKeyOrder#TERM2ID} and
* {@link LexiconKeyOrder#ID2TERM} indices while very large RDF
* {@link Value}s MUST be inserted into the
* {@link LexiconKeyOrder#BLOBS} index.
* <p>
* The {@link LexiconKeyOrder#TERM2ID} index keys are Unicode sort codes
* based on the RDF {@link Value}s. This threshold essentially limits
* the maximum length of the keys in the {@link LexiconKeyOrder#TERM2ID}
* index.
* <p>
* Note: The BLOBS index MAY be disabled entirely by setting this
* property to {@link #BLOBS_THRESHOLD_DISABLE} (@value
* {@link #BLOBS_THRESHOLD_DISABLE}). However, this is generally not
* advised since it implies that large literals will be appear as keys
* in the TERM2ID index.
*
* @see <a href="https://github.com/SYSTAP/bigdata-gpu/issues/25">
* Disable BLOBS indexing completely for GPU </a>
*/
String BLOBS_THRESHOLD = AbstractTripleStore.class.getName()
+ ".blobsThreshold";
String DEFAULT_BLOBS_THRESHOLD = "256";
/**
* The constant that may be used to disable the BLOBS index.
*
* @see #BLOBS_THRESHOLD
*
* @see <a href="https://github.com/SYSTAP/bigdata-gpu/issues/25">
* Disable BLOBS indexing completely for GPU </a>
*/
String BLOBS_THRESHOLD_DISABLE = Integer.toString(Integer.MAX_VALUE);
/**
* Set up database to inline XSD datatype literals corresponding to
* primitives (boolean) and numerics (byte, short, int, etc) directly
* into the statement indices (default
* {@value #DEFAULT_INLINE_XSD_DATATYPE_LITERALS}).
* <p>
* Note: <code>xsd:dateTime</code> inlining is controlled by a distinct
* option. See {@link #INLINE_DATE_TIMES}.
* <p>
* Note: <code>xsd:string</code> inlining and the inlining of non-xsd
* literals are controlled by {@link #INLINE_TEXT_LITERALS} and
* {@link #MAX_INLINE_TEXT_LENGTH}.
*/
String INLINE_XSD_DATATYPE_LITERALS = AbstractTripleStore.class
.getName() + ".inlineXSDDatatypeLiterals";
String DEFAULT_INLINE_XSD_DATATYPE_LITERALS = "true";
/**
* Inline ANY literal having fewer than {@link #MAX_INLINE_TEXT_LENGTH}
* characters (default {@value #DEFAULT_INLINE_TEXT_LITERALS}).
* <p>
* Note: This option exists mainly to support a scale-out design in
* which everything is inlined into the statement indices. This design
* is similar to the YARS2 system with its ISAM files and has the
* advantage that little or nothing is stored within the lexicon.
* <p>
* Inlining of large literals via this option is NOT compatible with
* {@link #TEXT_INDEX}. The problem is that we need to index literals
* which are inlined as well as those which are not inlined. While the
* full text index does support this, indexing fully inline literals
* only makes sense for reasonably short literals. This is because the
* {@link IV} of the inlined literal (a) embeds its (compressed) Unicode
* representation; and (b) is replicated for each token within that
* literal. For large literals, this causes a substantial expansion in
* the full text index.
*/
String INLINE_TEXT_LITERALS = AbstractTripleStore.class.getName()
+ ".inlineTextLiterals";
String DEFAULT_INLINE_TEXT_LITERALS = "false";
/**
* The maximum length of a String value which may be inlined into the
* statement indices (default {@value #DEFAULT_MAX_INLINE_STRING_LENGTH}
* ). Depending on the configuration, this may applies to literal label
* (and datatypeURI or language code), URI local names, full URIs, blank
* node IDs, etc. The {@link XSDStringExtension} is registered by the
* {@link DefaultExtensionFactory} when GT ZERO (0).
* <p>
* Note: URIs may be readily inlined using this mechanism without
* causing an interaction with the full text index since they are not
* indexed by the full text index. However, inlining literals in this
* manner causes the Unicode representation of the literal to be
* duplicated within the full text index for each token in that literal.
* See {@link #TEXT_INDEX} and {@link #INLINE_TEXT_LITERALS}.
*
* @see DefaultExtensionFactory
*/
String MAX_INLINE_TEXT_LENGTH = AbstractTripleStore.class.getName()
+ ".maxInlineTextLength";
/**
* Note that there an interaction when this is enabled with the full
* text indexer. When we inline a non-datatype literal then the literal
* is ALSO inlined into the full text index as well for each keyword in
* that literal. That can produce quite a bit of duplication. Therefore
* the full text index does not play well with inlining large literals
* into the statement indices.
*/
String DEFAULT_MAX_INLINE_STRING_LENGTH = "0";
/**
* Set up database to inline bnodes directly into the statement indices
* rather than using the lexicon to map them to term identifiers and
* back. This is only compatible with told bnodes mode.
* <p>
* See {@link Options#STORE_BLANK_NODES}.
*/
String INLINE_BNODES = AbstractTripleStore.class.getName()
+ ".inlineBNodes";
String DEFAULT_INLINE_BNODES = "true";
/**
* Set up database to inline date/times directly into the statement
* indices rather than using the lexicon to map them to term identifiers
* and back (default {@value #DEFAULT_INLINE_DATE_TIMES}). Date times
* will be converted to UTC, then stored as milliseconds since the
* epoch. Thus if you inline date/times you will lose the canonical
* representation of the date/time. This has two consequences: (1) you
* will not be able to recover the original time zone of the date/time;
* and (2) greater than millisecond precision will be lost.
*
* @see #INLINE_DATE_TIMES_TIMEZONE
*/
String INLINE_DATE_TIMES = AbstractTripleStore.class.getName()
+ ".inlineDateTimes";
String DEFAULT_INLINE_DATE_TIMES = "true";
/**
* The default time zone to be used to a) encode inline xsd:datetime
* literals that do not have a time zone specified and b) decode
* xsd:datetime literals from the statement indices where they are
* stored as UTC milliseconds since the epoch (default
* {@value #DEFAULT_INLINE_DATE_TIMES_TIMEZONE}).
*
* @see #INLINE_DATE_TIMES
*/
String INLINE_DATE_TIMES_TIMEZONE = AbstractTripleStore.class.getName()
+ ".inlineDateTimesTimezone";
/**
* @see #INLINE_DATE_TIMES_TIMEZONE
*/
String DEFAULT_INLINE_DATE_TIMES_TIMEZONE =
// TimeZone.getDefault().getID();
"GMT";
/**
* The name of the {@link IExtensionFactory} class. The implementation
* MUST declare a constructor that accepts an
* {@link IDatatypeURIResolver} as its only argument. The
* {@link IExtension}s constructed by the factory need a resolver to
* resolve datatype URIs to term identifiers in the database.
*
* @see #DEFAULT_EXTENSION_FACTORY_CLASS
*/
String EXTENSION_FACTORY_CLASS = AbstractTripleStore.class.getName()
+ ".extensionFactoryClass";
String DEFAULT_EXTENSION_FACTORY_CLASS = DefaultExtensionFactory.class
.getName();
/**
* When <code>true</code> AND {@value #INLINE_XSD_DATATYPE_LITERALS} is
* <code>true</code>, literals having an xsd datatype URI which can not
* be validated against that datatype will be rejected (default
* {@link #DEFAULT_REJECT_INVALID_XSD_VALUES}). For example, when
* <code>true</code> <code>abc^^xsd:int</code> would be rejected. When
* <code>false</code> the literal will be accepted, but it will not be
* inlined with the rest of the literals for that value space and will
* typically encounter an SPARQL type error during query evaluation.
*/
String REJECT_INVALID_XSD_VALUES = AbstractTripleStore.class.getName()
+ ".rejectInvalidXSDValues";
String DEFAULT_REJECT_INVALID_XSD_VALUES = "false";
/*
* Options for shard split behavior.
*/
/**
* Boolean option determines whether or not an
* {@link XXXCShardSplitHandler} is applied (scale-out only, default
* {@value #DEFAULT_CONSTRAIN_XXXC_SHARDS}).
* <p>
* When <code>true</code>, shards whose {@link SPOKeyOrder} name ends
* with "C" are constrained such that all quads for the same triple will
* be co-located on the same shard. This constraint allows certain
* optimizations for default graph handling.
* <p>
* This constraint may be used if you do not expect to have more than
* ~200MB worth of distinct graphs within which the same triple may be
* asserted. This is a soft constraint as larger shards are permitted,
* but performance will degrade if this constraint forces some shards to
* be many times larger than their nominal capacity.
*
* @see XXXCShardSplitHandler
*/
String CONSTRAIN_XXXC_SHARDS = (AbstractTripleStore.class.getName() + ".constrainXXXCShards")
.intern();
String DEFAULT_CONSTRAIN_XXXC_SHARDS = "true";
/*
* History Service.
*/
/**
* When <code>true</code> a HISTORY SERVICE and its associated index
* will be maintained.
*
* @see <a href="https://sourceforge.net/apps/trac/bigdata/ticket/607">
* HISTORY SERVICE </a>
*/
public static String HISTORY_SERVICE = AbstractTripleStore.class
.getName() + ".historyService";
public static String DEFAULT_HISTORY_SERVICE = "false";
/**
* The minimum amount of history (in milliseconds) that will be retained
* by the {@link #HISTORY_SERVICE} (default
* {@value #DEFAULT_HISTORY_SERVICE_MIN_RELEASE_AGE}). The head of the
* index will be pruned during update to remove tuples associated with
* older commit points.
*/
public static String HISTORY_SERVICE_MIN_RELEASE_AGE = AbstractTripleStore.class
.getName() + ".historyService.minReleaseAge";
public static String DEFAULT_HISTORY_SERVICE_MIN_RELEASE_AGE = Long
.toString(Long.MAX_VALUE);
/**
* If this option is set to false, turn off the ASTBottomUpOptimizer.
*
* @see {@link ASTBottomUpOptimizer}
*/
public static String BOTTOM_UP_EVALUATION = AbstractTripleStore.class
.getName() + ".bottomUpEvaluation";
public static String DEFAULT_BOTTOM_UP_EVALUATION = "true";
/**
* The name of the {@link IInlineURIFactory} class.
*
* @see #DEFAULT_EXTENSION_FACTORY_CLASS
*/
String INLINE_URI_FACTORY_CLASS = AbstractTripleStore.class.getName()
+ ".inlineURIFactory";
String DEFAULT_INLINE_URI_FACTORY_CLASS = InlineURIFactory.class
.getName();
/**
* The name of the {@link RDRHistory} class. Null by default.
*/
String RDR_HISTORY_CLASS = AbstractTripleStore.class.getName()
+ ".rdrHistoryClass";
/**
* If this option is set to false, do not compute closure for sids.
*/
public static String COMPUTE_CLOSURE_FOR_SIDS = AbstractTripleStore.class
.getName() + ".computeClosureForSids";
public static String DEFAULT_COMPUTE_CLOSURE_FOR_SIDS = "true";
}
protected Class determineAxiomClass() {
// axiomsClass
{
/*
* Note: axioms may not be defined unless the lexicon is enabled
* since the axioms require the lexicon in order to resolve
* their expression as Statements into their expression as SPOs.
*/
final String className = getProperty(Options.AXIOMS_CLASS,
Options.DEFAULT_AXIOMS_CLASS);
final Class cls;
try {
cls = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Bad option: "
+ Options.AXIOMS_CLASS, e);
}
if (!BaseAxioms.class.isAssignableFrom(cls)) {
throw new RuntimeException(Options.AXIOMS_CLASS
+ ": Must extend: " + BaseAxioms.class.getName());
}
if (cls != NoAxioms.class && quads) {
throw new UnsupportedOperationException(Options.QUADS
+ " does not support inference ("
+ Options.AXIOMS_CLASS + ")");
}
return cls;
}
}
protected Class determineVocabularyClass() {
// vocabularyClass
{
final String className = getProperty(Options.VOCABULARY_CLASS,
Options.DEFAULT_VOCABULARY_CLASS);
final Class cls;
try {
cls = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Bad option: "
+ Options.VOCABULARY_CLASS, e);
}
if (!BaseVocabulary.class.isAssignableFrom(cls)) {
throw new RuntimeException(Options.VOCABULARY_CLASS
+ ": Must extend: "
+ BaseVocabulary.class.getName());
}
return cls;
}
}
/**
* Ctor specified by {@link DefaultResourceLocator}.
*
* @see Options
*/
@SuppressWarnings("unchecked")
protected AbstractTripleStore(final IIndexManager indexManager,
final String namespace, final Long timestamp,
final Properties properties) {
super(indexManager, namespace, timestamp, properties);
ServiceProviderHook.forceLoad();
/*
* Reads off the property for the inference engine that tells us whether
* or not the justification index is being used. This is used to
* conditionally enable the logic to retract justifications when the
* corresponding statements is retracted.
*/
this.lexicon = Boolean.parseBoolean(getProperty(Options.LEXICON,
Options.DEFAULT_LEXICON));
DatabaseMode mode = null;
if (Boolean.parseBoolean(getProperty(Options.TRIPLES_MODE,
Options.DEFAULT_TRIPLES_MODE))) {
mode = DatabaseMode.TRIPLES;
} else if (Boolean.parseBoolean(getProperty(Options.TRIPLES_MODE_WITH_PROVENANCE,
Options.DEFAULT_TRIPLES_MODE_WITH_PROVENANCE))) {
if (mode != null) {
throw new UnsupportedOperationException(
"please select only one of triples, provenance, or quads modes");
}
mode = DatabaseMode.PROVENANCE;
} else if (Boolean.parseBoolean(getProperty(Options.QUADS_MODE,
Options.DEFAULT_QUADS_MODE))) {
if (mode != null) {
throw new UnsupportedOperationException(
"please select only one of triples, provenance, or quads modes");
}
mode = DatabaseMode.QUADS;
}
if (lexicon) {
this.vocabularyClass = determineVocabularyClass();
} else {
this.vocabularyClass = NoVocabulary.class;
}
properties.setProperty(Options.VOCABULARY_CLASS, vocabularyClass.getName());
if (mode != null) {
switch (mode) {
case TRIPLES: {
this.quads = false;
this.statementIdentifiers = false;
this.axiomClass = determineAxiomClass();
properties.setProperty(Options.QUADS, "false");
properties.setProperty(Options.STATEMENT_IDENTIFIERS, "false");
break;
}
case PROVENANCE: {
this.quads = false;
this.statementIdentifiers = true;
this.axiomClass = determineAxiomClass();
properties.setProperty(Options.QUADS, "false");
properties.setProperty(Options.STATEMENT_IDENTIFIERS, "true");
break;
}
case QUADS: {
this.quads = true;
this.statementIdentifiers = false;
this.axiomClass = NoAxioms.class;
properties.setProperty(Options.QUADS, "true");
properties.setProperty(Options.STATEMENT_IDENTIFIERS, "false");
properties.setProperty(Options.AXIOMS_CLASS, NoAxioms.class.getName());
break;
}
default:
throw new AssertionError();
}
} else {
this.quads = Boolean.valueOf(getProperty(Options.QUADS,
Options.DEFAULT_QUADS));
this.statementIdentifiers = Boolean.parseBoolean(getProperty(
Options.STATEMENT_IDENTIFIERS,
Options.DEFAULT_STATEMENT_IDENTIFIERS));
if (lexicon) {
axiomClass = determineAxiomClass();
// vocabularyClass = determineVocabularyClass();
} else {
/*
* no axioms if no lexicon (the lexicon is required to write the
* axioms).
*/
axiomClass = NoAxioms.class;
// vocabularyClass = NoVocabulary.class;
}
}
this.justify = Boolean.parseBoolean(getProperty(Options.JUSTIFY,
Options.DEFAULT_JUSTIFY));
this.spoKeyArity = quads ? 4 : 3;
if (statementIdentifiers && quads) {
throw new UnsupportedOperationException(Options.QUADS
+ " does not support the provenance mode ("
+ Options.STATEMENT_IDENTIFIERS + ")");
}
// closureClass
{
final String className = getProperty(Options.CLOSURE_CLASS,
Options.DEFAULT_CLOSURE_CLASS);
final Class cls;
try {
cls = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Bad option: "
+ Options.CLOSURE_CLASS, e);
}
if (!BaseClosure.class.isAssignableFrom(cls)) {
throw new RuntimeException(Options.CLOSURE_CLASS
+ ": Must extend: "
+ BaseClosure.class.getName());
}
closureClass = cls;
}
this.constrainXXXCShards = Boolean.valueOf(getProperty(
Options.CONSTRAIN_XXXC_SHARDS,
Options.DEFAULT_CONSTRAIN_XXXC_SHARDS));
this.bottomUpEvaluation = Boolean.valueOf(getProperty(
Options.BOTTOM_UP_EVALUATION,
Options.DEFAULT_BOTTOM_UP_EVALUATION));
{ // RDR History class
final String className = getProperty(Options.RDR_HISTORY_CLASS, null);
if (className != null && className.length() > 0) {
if (!statementIdentifiers) {
throw new RuntimeException("statement identifiers must be enabled for RDR history");
}
final Class cls;
try {
cls = Class.forName(className);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Bad option: "
+ Options.RDR_HISTORY_CLASS, e);
}
if (!RDRHistory.class.isAssignableFrom(cls)) {
throw new RuntimeException(Options.RDR_HISTORY_CLASS
+ ": Must extend: "
+ RDRHistory.class.getName());
}
rdrHistoryClass = cls;
} else {
rdrHistoryClass = null;
}
}
this.computeClosureForSids = Boolean.valueOf(getProperty(
Options.COMPUTE_CLOSURE_FOR_SIDS,
Options.DEFAULT_COMPUTE_CLOSURE_FOR_SIDS));
/*
* Setup namespace mapping for serialization utility methods.
*
* TODO Why not leverage the defined Vocabulary?
*/
addNamespace(RDF.NAMESPACE, "rdf");
addNamespace(RDFS.NAMESPACE, "rdfs");
addNamespace(OWL.NAMESPACE, "owl");
addNamespace(XMLSchema.NAMESPACE, "xsd");
}
/**
* Return <code>true</code> iff the store is safe for concurrent readers
* and writers. This property depends on primarily on the concurrency
* control mechanisms (if any) that are used to prevent concurrent access to
* an unisolated index while a thread is writing on that index. Stores based
* on the {@link IBigdataFederation} automatically inherent the
* appropriate concurrency controls as would a store whose index access was
* intermediated by the executor service of an {@link IConcurrencyManager}.
* <p>
* Note: if {@link #isConcurrent()} returns <code>true</code> then the
* database will provide concurrency control for write tasks submitted
* against the same index. However, concurrent writers are always supported
* on distinct indices and concurrent readers on an index are always
* supported IF there is no concurrent writer on the index. If
* {@link #isConcurrent()} is <code>false</code> then you need to avoid
* submitting a write task concurrently with ANY other task for the same
* index (concurrent reads or concurrent writes will both cause problems
* with a write task in the absence of concurrency controls).
* <p>
* The main place where this is an issue is rule execution, where the
* entailments are being written back onto the database while reads are
* proceeding concurrently. The new rule execution layer finesses this by
* have a read view and a write view, so an {@link IProgram} will read from
* an historical state and write on the {@link ITx#UNISOLATED} indices. This
* also works for closure - each round of closure updates the read-behind
* point so that all writes from the last round become visible during the
* next round.
* <p>
* Note that closure must read against a stable state of the database and
* that concurrent writes on the database NOT related to the closure
* operation are disallowed. With these guidelines, you can force the
* application to use a single thread for all mutation operations on the
* {@link AbstractTripleStore} (readers from historical states may be
* concurrent) and concurrency problems will not arise.
*/
abstract public boolean isConcurrent();
/**
* <strong>DO NOT INVOKE FROM APPLICATION CODE</strong> - this method
* deletes the KB instance and destroys the backing database instance. It is
* used to help tear down unit tests.
*/
final public void __tearDownUnitTest() {
if(isOpen())
destroy();
getIndexManager().destroy();
}
public boolean isOpen() {
return open;
}
public void close() {
shutdown();
}
/**
* Default is a NOP - invoked by {@link #close()} and
* {@link #__tearDownUnitTest()}
*/
final protected void shutdown() {
open = false;
}
private boolean open = true;
/**
* True iff the backing store is stable (exists on disk somewhere and may be
* closed and re-opened).
* <p>
* Note: This is mainly used by the test suites.
*/
abstract public boolean isStable();
@Override
public AbstractTripleStore init() {
super.init();
return this;
}
@Override
public void create() {
if (INFO)
log.info(toString());
assertWritable();
final IResourceLock resourceLock = acquireExclusiveLock();
try {
final Properties tmp = PropertyUtil.flatCopy(getProperties());
// set property that will let the contained relations locate their container.
tmp.setProperty(RelationSchema.CONTAINER, getNamespace());
if (Boolean.valueOf(tmp.getProperty(Options.TEXT_INDEX,
Options.DEFAULT_TEXT_INDEX))) {
/*
* If the text index is enabled for a new kb instance, then disable
* the fieldId component of the full text index key since it is not
* used by the RDF database and will just waste space in the index.
*
* Note: Also see below where this is set on the global row store.
*/
tmp.setProperty(FullTextIndex.Options.FIELDS_ENABLED, "false");
}
/**
* We must not write the properties onto the global row store until
* they have been fully initialized.
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/617">
* Concurrent KB create fails with "No axioms defined?" </a>
*/
// super.create();
final String SPO_NAMESPACE = getNamespace() + "."
+ SPORelation.NAME_SPO_RELATION;
final String LEXICON_NAMESPACE = lexicon ? getNamespace() + "."
+ LexiconRelation.NAME_LEXICON_RELATION : null;
if (lexicon) {
/*
* Setup the vocabulary.
*/
{
assert vocabRef.get() == null;
try {
final Constructor<? extends BaseVocabulary> ctor = vocabularyClass
.getConstructor(new Class[] { String.class });
// save reference.
vocabRef.set(ctor
.newInstance(new Object[] { LEXICON_NAMESPACE }));
} catch (Exception ex) {
throw new RuntimeException(ex);
}
// initialize.
((BaseVocabulary) vocabRef.get()).init();
}
/*
* For performance reasons, we also store the geospatial configuration
* in the global row store, in case geospatial is enabled.
*/
{
assert geoSpatialConfigRef.get() == null;
final Boolean geoSpatial = Boolean.parseBoolean(getProperty(
AbstractTripleStore.Options.GEO_SPATIAL,
AbstractTripleStore.Options.DEFAULT_GEO_SPATIAL));
if (geoSpatial!=null && geoSpatial) { // geospatial enabled
final Boolean geoSpatialIncludeBuiltinDatatypes = Boolean.parseBoolean(getProperty(
AbstractTripleStore.Options.GEO_SPATIAL_INCLUDE_BUILTIN_DATATYPES,
AbstractTripleStore.Options.DEFAULT_GEO_SPATIAL_INCLUDE_BUILTIN_DATATYPES));
final String geoSpatialDefaultDatatype = getProperty(
AbstractTripleStore.Options.GEO_SPATIAL_DEFAULT_DATATYPE,
AbstractTripleStore.Options.DEFAULT_GEO_SPATIAL_DEFAULT_DATATYPE);
// initialized geospatial configuration if geospatial is enabled
if (geoSpatial) {
/**
* We have configuration strings of the form
* - [AbstractTripleStore.Options.GEO_SPATIAL_DATATYPE_CONFIG].0 = ...
* - [AbstractTripleStore.Options.GEO_SPATIAL_DATATYPE_CONFIG].1 = ...
* - [AbstractTripleStore.Options.GEO_SPATIAL_DATATYPE_CONFIG].2 = ...
* ...
*
* We read this configuration up to the first index that is not defined.
* If no explicit configuration is provided, we fallback on our single
* latitude-longitude-time default.
*/
final List<String> geoSpatialDatatypeConfigs = new LinkedList<String>();
boolean finished = false;
for (int i=0; !finished; i++) {
final String curId = AbstractTripleStore.Options.GEO_SPATIAL_DATATYPE_CONFIG + "." + i;
final String curVal = getProperty(curId, null /* fallback */);
if (curVal!=null) {
if (INFO)
log.info("Adding geospatial datatype #" + i);
geoSpatialDatatypeConfigs.add(curVal);
} else {
finished = true; // we're done
}
}
// also register built-in datatypes, if enabled
if (geoSpatialIncludeBuiltinDatatypes) {
if (INFO)
log.info("Adding geospatial built-in datatype v1/LAT+LON");
geoSpatialDatatypeConfigs.add(
AbstractTripleStore.Options.GEO_SPATIAL_LITERAL_V1_LAT_LON_CONFIG);
if (INFO)
log.info("Adding geospatial built-in datatype v1/LAT+LON+TIME");
geoSpatialDatatypeConfigs.add(
AbstractTripleStore.Options.GEO_SPATIAL_LITERAL_V1_LAT_LON_TIME_CONFIG);
}
final GeoSpatialConfig geoSpatialConfig =
new GeoSpatialConfig(geoSpatialDatatypeConfigs, geoSpatialDefaultDatatype);
geoSpatialConfigRef.compareAndSet(null, geoSpatialConfig);
}
}
}
lexiconRelation = new LexiconRelation(this/* container */,
getIndexManager(), LEXICON_NAMESPACE, getTimestamp(),
new Properties(tmp)// Note: Must wrap properties!
);
lexiconRelation.create();//assignedSplits);
valueFactory = lexiconRelation.getValueFactory();
}
spoRelationRef.set(new SPORelation(this/* container */,
getIndexManager(), SPO_NAMESPACE, getTimestamp(),
new Properties(tmp)// Note: must wrap properties!
));
spoRelationRef.get().create();
/*
* The axioms require the lexicon to pre-exist. The axioms also
* requires the SPORelation to pre-exist.
*/
if(lexicon) {
/*
* Setup the axiom model.
*/
{
assert axioms == null;
try {
final Constructor<? extends BaseAxioms> ctor = axiomClass
.getConstructor(new Class[] { String.class });
// save reference.
axioms = ctor.newInstance(new Object[] { LEXICON_NAMESPACE });
} catch (Exception ex) {
throw new RuntimeException(ex);
}
// initialize (writes on the lexicon and statement indices).
((BaseAxioms) axioms).init(this);
}
}
/**
* Write on the global row store. We atomically set all
* properties, including the axioms and the vocabulary objects.
*
* @see <a
* href="https://sourceforge.net/apps/trac/bigdata/ticket/617">
* Concurrent KB create fails with "No axioms defined?"
* </a>
*/
{
/*
* Convert the Properties to a Map.
*/
final Map<String, Object> map = GlobalRowStoreUtil.convert(tmp);
// primary key.
map.put(RelationSchema.NAMESPACE, getNamespace());
if (axioms != null) {
// axioms.
map.put(TripleStoreSchema.AXIOMS, axioms);
// setProperty(TripleStoreSchema.AXIOMS,axioms);
}
if (vocabRef.get() != null) {
// vocabulary.
map.put(TripleStoreSchema.VOCABULARY, vocabRef.get());
// setProperty(TripleStoreSchema.VOCABULARY,vocab);
}
if (geoSpatialConfigRef.get() != null) {
// geospatial config.
map.put(TripleStoreSchema.GEO_SPATIAL_CONFIG, geoSpatialConfigRef.get());
// setProperty(TripleStoreSchema.GEO_SPATIAL_CONFIG,geoSpatoalConfig)
}
/*
* Note: This will now be false automatically since the [map] is
* based on the Properties object [tmp] and we have already set
* this property to [false] in tmp.
*/
// if (lexiconRelation.isTextIndex()) {
// /*
// * Per the logic and commentary at the top of create(),
// * disable this option on the global row store.
// */
// map.put(FullTextIndex.Options.FIELDS_ENABLED, "false");
// }
// Write the map on the row store.
final Map<String, Object> afterMap = getIndexManager()
.getGlobalRowStore()
.write(RelationSchema.INSTANCE, map);
if(log.isDebugEnabled()) {
log.debug("Properties after write: " + afterMap);
}
/*
* Note: A commit is required in order for a read-committed view
* to have access to the registered indices.
*
* @todo have the caller do this? It does not really belong here
* since you can not make a large operation atomic if you do a
* commit here.
*/
commit();
/*
* Add this instance to the locator cache, but NOT before we
* have committed the changes to the global row store.
*
* Note: Normally, the instances are created by the locator
* cache itself. In general the only time the application
* creates an instance directly is when it is going to attempt
* to create the relation. This takes advantage of that pattern
* to notify the locator that it should cache this instance.
*/
((DefaultResourceLocator) getIndexManager()
.getResourceLocator()).putInstance(this);
}
} catch (Throwable t) {
if (!InnerCause.isInnerCause(t, InterruptedException.class)) {
log.error(t, t);
}
throw new RuntimeException(t);
} finally {
unlock(resourceLock);
}
}
@Override
final public void destroy() {
assertWritable();
// Conditionally destroyed below. See #948.
final BigdataValueFactory tmp = valueFactory;
// FIXME unit tests fail here during tear down if the federation has
// already been disconnected/destroyed since they can not reach the
// lock service. The code should handle this better.
final IResourceLock resourceLock = acquireExclusiveLock();
try {
if (lexicon) {
{
final LexiconRelation lex = getLexiconRelation();
if (lex != null
&& lex.getIndexManager() == getIndexManager()) {
/*
* Destroy the lexicon, but only if it is backed by the
* same index manager. (This prevents a lexicon on the
* database from being destroyed when a statements-only
* instance on a temporary store is destroyed.
*/
lex.destroy();
}
}
lexiconRelation = null;
valueFactory = null;
axioms = null;
vocabRef.set(null);
}
{
final SPORelation spo = getSPORelation();
if (spo != null)
spo.destroy();
}
spoRelationRef.set(null/* clearRef */);
super.destroy();
/**
* Discard the value factory for the lexicon's namespace.
*
* Note: The LexiconRelation already does this. However, the
* AtomicDelete operation on the GRS winds up de-serializing the
* Vocabulary class as part of the delete of the declaration of
* the KB instance. This causes the Vocabulary object to be re-created
* within the BigdataValueFactoryImpl cache. So we need to wipe it out
* again here.
*
* @see #948
*/
if(lexicon && tmp != null)
tmp.remove();
} finally {
unlock(resourceLock);
}
}
/**
* The configured axioms. This is stored in the global row store and set
* automatically if it is found in the {@link Properties}. Otherwise it is
* set by {@link #create()}.
*
* @throws IllegalStateException
* if there is no lexicon.
*
* @see Options#LEXICON
* @see com.bigdata.rdf.store.AbstractTripleStore.Options#AXIOMS_CLASS
*/
final public Axioms getAxioms() {
if (!lexicon)
throw new IllegalStateException();
if (axioms == null) {
synchronized (this) {
if (axioms == null) {
/*
* The vocabulary is stored in properties for the triple
* store instance in the global row store. However, we
* pre-materialize those properties so we can directly
* retrieve the vocabulary from the materialized properties.
*/
axioms = (Axioms) getBareProperties().get(
TripleStoreSchema.AXIOMS);
// axioms = (Axioms) getIndexManager().getGlobalRowStore()
// .get(RelationSchema.INSTANCE, getNamespace(),
// TripleStoreSchema.AXIOMS);
if (axioms == null)
throw new RuntimeException("No axioms defined? : "
+ this);
if (INFO)
log.info("read axioms: " + axioms.getClass().getName()
+ ", size=" + axioms.size());
}
}
}
return axioms;
}
private volatile Axioms axioms;
/**
* Return the configured {@link Vocabulary}. This consists of
* {@link BigdataValue}s of interest that have been pre-evaluated against
* the lexicon and are associated with their correct term identifiers.
*
* @return The predefined vocabulary.
*
* @throws IllegalStateException
* if there is no lexicon.
*
* @see Options#LEXICON
* @see Options#VOCABULARY_CLASS
*/
final public Vocabulary getVocabulary() {
if (!lexicon)
throw new IllegalStateException();
Vocabulary vocab = vocabRef.get();
if (vocab == null) {
synchronized (vocabRef) {
vocab = vocabRef.get();
if (vocab == null) {
/*
* The vocabulary is stored in properties for the triple
* store instance in the global row store. However, we
* pre-materialize those properties so we can directly
* retrieve the vocabulary from the materialized properties.
*/
vocab = (Vocabulary) getBareProperties().get(
TripleStoreSchema.VOCABULARY);
// vocab = (Vocabulary) getIndexManager().getGlobalRowStore().get(
// RelationSchema.INSTANCE, getNamespace(),
// TripleStoreSchema.VOCABULARY);
if (vocab == null)
throw new RuntimeException("No vocabulary defined? : "
+ this);
if (INFO)
log.info("read vocabulary: "
+ vocab.getClass().getName() + ", size="
+ vocab.size());
if (!this.vocabRef.compareAndSet(null/* expect */, vocab)) {
throw new AssertionError();
}
}
}
}
return vocab;
}
/**
* Return the configured {@link GeoSpatialConfig}. The GeoSpatialConfig
* defines the structure and storage details of registered geospatial
* datatypes.
*
* @return the geospatial configuration
*
* @throws IllegalStateException
* if there is no lexicon.
*
* @see GeoSpatialConfigOptions.Options#GEO_SPATIAL
* @see GeoSpatialConfigOptions.Options#GEO_SPATIAL_DATATYPE_CONFIG
* @see GeoSpatialConfigOptions.Options#GEO_SPATIAL_DEFAULT_DATATYPE
* @see GeoSpatialConfigOptions.Options#GEO_SPATIAL_INCLUDE_BUILTIN_DATATYPES
*/
final public GeoSpatialConfig getGeoSpatialConfig() {
if (!lexicon)
throw new IllegalStateException();
GeoSpatialConfig geoSpatialConfig = geoSpatialConfigRef.get();
if (geoSpatialConfig == null) {
synchronized (geoSpatialConfigRef) {
geoSpatialConfig = geoSpatialConfigRef.get();
if (geoSpatialConfig == null) {
/*
* The vocabulary is stored in properties for the triple
* store instance in the global row store. However, we
* pre-materialize those properties so we can directly
* retrieve the vocabulary from the materialized properties.
*/
geoSpatialConfig = (GeoSpatialConfig) getBareProperties().get(
TripleStoreSchema.GEO_SPATIAL_CONFIG);
if (geoSpatialConfig == null)
throw new RuntimeException("No geospatial config defined? : " + this);
if (!this.geoSpatialConfigRef.compareAndSet(null/* expect */, geoSpatialConfig)) {
throw new AssertionError();
}
}
}
}
return geoSpatialConfig;
}
/**
* Note: This is used both as a monitor object and as an atomic reference.
*
* @see #getVocabulary()
*/
private final AtomicReference<Vocabulary> vocabRef = new AtomicReference<Vocabulary>();
/**
* The geospatial configuration -- if null, geospatial is disabled.
*
* @see #getGeoSpatialConfig()
*/
private final AtomicReference<GeoSpatialConfig> geoSpatialConfigRef =
new AtomicReference<GeoSpatialConfig>();
/**
* The {@link SPORelation} (triples and their access paths).
*/
final public SPORelation getSPORelation() {
if (spoRelationRef.get() == null) {
/*
* Note: double-checked locking pattern (mostly non-blocking). Only
* synchronized if not yet resolved. The AtomicReference is reused
* as the monitor to serialize the resolution of the SPORelation in
* order to have that operation not contend with any other part of
* the API.
*/
synchronized (this) {
if (spoRelationRef.get() == null) {
spoRelationRef.set((SPORelation) getIndexManager()
.getResourceLocator().locate(
getNamespace() + "."
+ SPORelation.NAME_SPO_RELATION,
getTimestamp()));
}
}
}
return spoRelationRef.get();
}
private final AtomicReference<SPORelation> spoRelationRef = new AtomicReference<SPORelation>();
/**
* The {@link LexiconRelation} handles all things related to the indices
* mapping RDF {@link Value}s onto internal 64-bit term identifiers.
*/
final synchronized public LexiconRelation getLexiconRelation() {
if (lexiconRelation == null && lexicon) {
long t = getTimestamp();
if (TimestampUtility.isReadWriteTx(t)) {
/*
* A read-write tx must use the unisolated view of the lexicon.
*/
t = ITx.UNISOLATED;
}
lexiconRelation = (LexiconRelation) getIndexManager()
.getResourceLocator().locate(
getNamespace() + "."
+ LexiconRelation.NAME_LEXICON_RELATION,
t);
}
return lexiconRelation;
}
private LexiconRelation lexiconRelation;
// Note: Use LexiconRelation#getSearchEngine().
// /**
// * Full text information retrieval for RDF essentially treats the RDF
// * Literals as "documents." The literals are broken down into "token"s to
// * obtain a "token frequency distribution" for that literal/document. The
// * full text index contains the indexed token data.
// *
// * @return The object managing the text search indices or <code>null</code>
// * iff text search is not enabled.
// *
// * @see Options#TEXT_INDEX
// * @see Options#TEXT_INDEX_DATATYPE_LITERALS
// */
// final public ITextIndexer getSearchEngine() {
//
// if (!lexicon)
// return null;
//
// return getLexiconRelation().getSearchEngine();
// }
@Override
final public long getNamedGraphCount() {
if(!isQuads())
throw new UnsupportedOperationException();
final Iterator<?> itr = getSPORelation().distinctTermScan(
SPOKeyOrder.CSPO);
long n = 0;
while(itr.hasNext()) {
itr.next();
n++;
}
return n;
}
@Override
final public long getStatementCount() {
return getStatementCount(null/* c */, false/* exact */);
}
@Override
final public long getStatementCount(final boolean exact) {
return getStatementCount(null/*c*/, exact);
}
@Override
final public long getStatementCount(final Resource c) {
return getStatementCount(c, false/* exact */);
}
/**
* {@inheritDoc}
* <p>
* Core implementation.
*/
@Override
final public long getStatementCount(final Resource c, final boolean exact) {
if (exact) {
return getAccessPath(null/* s */, null/* p */, null/* o */, c)
.rangeCount(exact);
} else {
return getAccessPath(null/* s */, null/* p */, null/* o */, c)
.rangeCount(exact);
}
}
/**
* The #of explicit statements in the database (exact count based on
* key-range scan).
* <p>
* Note: In order to get the #of explicit statements in the repository we
* have to actually do a range scan and figure out for each statement
* whether or not it is explicit.
*
* @param c
* The context (optional). When not given, the count is reported
* across all named graphs.
*/
public long getExplicitStatementCount(final Resource c) {
return getAccessPath(null/* s */, null/* p */, null/* o */, c,
ExplicitSPOFilter.INSTANCE, null).rangeCount(true/* exact */);
}
/**
* Clears hard references to any indices, relations, etc. MUST be extended
* to discard write sets for impls with live indices.
*
* @throws IllegalStateException
* if the view is read only.
*/
@SuppressWarnings("unchecked")
synchronized public void abort() {
if (isReadOnly())
throw new IllegalStateException();
/*
* Relations have hard references to indices so they must be discarded.
* Note only do we need to release our hard references to those
* relations but we need to inform the relation locator that they should
* be discarded from its cache as well. Otherwise the same objects will
* be returned from the cache and buffered writes on the indices for
* those relations (if they are local index objects) will still be
* visible.
*/
@SuppressWarnings("rawtypes")
final IResourceLocator locator = getIndexManager()
.getResourceLocator();
if (lexiconRelation != null) {
locator.discard(lexiconRelation, false/*destroyed*/);
lexiconRelation = null;
}
final SPORelation tmp = spoRelationRef.getAndSet(null/* clearRef */);
if (tmp != null) {
locator.discard(tmp, false/* destroyed */);
}
}
/**
* {@inheritDoc}
* <p>
* Note: This method MUST be extended to perform commit for implementations
* with live indices.
*
* @throws IllegalStateException
* if the view is read only.
*/
@Override
public long commit() {
if (isReadOnly())
throw new IllegalStateException();
return 0L;
}
final public long getJustificationCount() {
if (justify) {
return getSPORelation().getJustificationIndex().rangeCount();
}
return 0L;
}
@Override
final public long getTermCount() {
long rangeCount = 0L;
rangeCount += getLexiconRelation().getTerm2IdIndex().rangeCount();
IIndex blobsIndex = null;
try {
blobsIndex = getLexiconRelation().getBlobsIndex();
rangeCount += blobsIndex.rangeCount();
} catch(IllegalStateException ex) {
// No blobs index. fall through
}
// rangeCount += getLexiconRelation().getBlobsIndex().rangeCount();
return rangeCount;
}
@Override
final public long getURICount() {
long rangeCount = 0L;
{
final byte[] fromKey = new byte[] {
// KeyBuilder.encodeByte(
ITermIndexCodes.TERM_CODE_URI
// )
};
final byte[] toKey = new byte[] {
// KeyBuilder.encodeByte(
(byte) (ITermIndexCodes.TERM_CODE_URI + 1)
// )
};
rangeCount += getLexiconRelation().getTerm2IdIndex().rangeCount(
fromKey, toKey);
}
{
final byte[] fromKey = new byte[] {//
KeyBuilder.encodeByte(BlobIV.toFlags(VTE.URI)) //
};
final byte[] toKey = SuccessorUtil.successor(fromKey.clone());
IIndex blobsIndex = null;
try {
blobsIndex = getLexiconRelation().getBlobsIndex();
rangeCount += blobsIndex.rangeCount(fromKey, toKey);
} catch(IllegalStateException ex) {
// No blobs index. fall through
}
}
return rangeCount;
}
@Override
final public long getLiteralCount() {
long rangeCount = 0L;
{
// Note: the first of the kinds of literals (plain).
final byte[] fromKey = new byte[] {
// KeyBuilder.encodeByte(
ITermIndexCodes.TERM_CODE_LIT
// )
};
// Note: spans the last of the kinds of literals.
final byte[] toKey = new byte[] {
// KeyBuilder.encodeByte(
(byte) (ITermIndexCodes.TERM_CODE_DTL + 1)
// ))
};
rangeCount += getLexiconRelation().getTerm2IdIndex().rangeCount(
fromKey, toKey);
}
{
final byte[] fromKey = new byte[] {//
KeyBuilder.encodeByte(BlobIV.toFlags(VTE.LITERAL)) //
};
final byte[] toKey = SuccessorUtil.successor(fromKey.clone());
IIndex blobsIndex = null;
try {
blobsIndex = getLexiconRelation().getBlobsIndex();
rangeCount += blobsIndex.rangeCount(fromKey, toKey);
} catch(IllegalStateException ex) {
// No blobs index. fall through
}
}
return rangeCount;
}
/**
* {@inheritDoc}
* <p>
* Note: Will always return zero (0) if {@value Options#STORE_BLANK_NODES}
* is <code>false</code>.
*/
@Override
final public long getBNodeCount() {
if (!getLexiconRelation().isStoreBlankNodes())
return 0L;
long rangeCount = 0L;
{
final byte[] fromKey = new byte[] {
// KeyBuilder.encodeByte(
ITermIndexCodes.TERM_CODE_BND
// )
};
final byte[] toKey = new byte[] {
// KeyBuilder.encodeByte(
(byte) (ITermIndexCodes.TERM_CODE_BND + 1)
// )
};
rangeCount += getLexiconRelation().getTerm2IdIndex().rangeCount(
fromKey, toKey);
}
{
final byte[] fromKey = new byte[] {//
KeyBuilder.encodeByte(BlobIV.toFlags(VTE.BNODE)) //
};
final byte[] toKey = SuccessorUtil.successor(fromKey.clone());
IIndex blobsIndex = null;
try {
blobsIndex = getLexiconRelation().getBlobsIndex();
rangeCount += blobsIndex.rangeCount(fromKey, toKey);
} catch(IllegalStateException ex) {
// No blobs index. fall through
}
}
return rangeCount;
}
/*
* term index
*/
public IV addTerm(final Value value) {
final BigdataValue[] terms = new BigdataValue[] {//
getValueFactory().asValue(value) //
};
getLexiconRelation().addTerms(terms, 1, false/* readOnly */);
return terms[0].getIV();
}
/**
* This method is extremely inefficient for scale-out as it does one RMI per
* request!
*
* @return the corresponding {@link BigdataValue} if found and
* <code>null</code> if not found or if the {@link LexiconRelation}
* is not available (e.g., a {@link TempTripleStore} .
*/
final public BigdataValue getTerm(final IV iv) {
final LexiconRelation r = getLexiconRelation();
if (r == null) {
// Note: No LexiconRelation.
return null;
}
return r.getTerm(iv);
}
/**
* <strong>WARNING DO NOT USE OUTSIDE OF THE UNIT TESTS: </strong> This
* method is extremely inefficient for scale-out as it does one RMI per
* request!
*
* @deprecated Not even the unit tests should be doing this.
*/
final public IV getIV(final Value value) {
return getLexiconRelation().getIV(value);
}
@Override
public void addTerms(final BigdataValue[] terms) {
getLexiconRelation().addTerms(terms, terms.length, false/*readOnly*/);
}
/*
* singletons.
*/
private volatile InferenceEngine inferenceEngine = null;
@Override
final public InferenceEngine getInferenceEngine() {
synchronized (this) {
if(inferenceEngine == null) {
inferenceEngine = new InferenceEngine(this);
}
}
return inferenceEngine;
}
private WeakReference<DataLoader> dataLoaderRef = null;
@Override
final public DataLoader getDataLoader() {
synchronized (this) {
DataLoader dataLoader = dataLoaderRef == null ? null
: dataLoaderRef.get();
if (dataLoader == null) {
dataLoader = new DataLoader(this);
dataLoaderRef = new WeakReference<DataLoader>(dataLoader);
}
return dataLoader;
}
}
/*
* Sesame integration.
*/
@Override
final public void addStatement(final Resource s, final URI p, final Value o) {
addStatement(s, p, o, null);
}
@Override
final public void addStatement(final Resource s, final URI p,
final Value o, final Resource c) {
if (quads && c == null) {
/*
* The context position MUST be bound for a quad store.
*/
throw new UnsupportedOperationException();
}
/*
* Note: This uses the batch API.
*/
final IStatementBuffer<Statement> buffer = new StatementBuffer<Statement>(null/* focusStore */, this,
1/* capacity */, 0/* queueCapacity */);
buffer.add(s, p, o, c);
buffer.flush();
}
final public ISPO getStatement(final IV s, final IV p, final IV o) {
return getStatement(s, p, o, null/* c */);
}
final public ISPO getStatement(final IV s, final IV p, final IV o,
final IV c) {
if (s == null || p == null || o == null
|| (c == null && spoKeyArity == 4)) {
throw new IllegalArgumentException();
}
final SPO spo = spoKeyArity == 4 ?
new SPO(s, p, o) : new SPO(s, p, o, c);
final IIndex ndx = getSPORelation().getPrimaryIndex();
final SPOTupleSerializer tupleSer = (SPOTupleSerializer) ndx
.getIndexMetadata().getTupleSerializer();
final byte[] key = tupleSer.serializeKey(spo);
final byte[] val = ndx.lookup(key);
if (val == null) {
// The statement is not in the database.
return null;
}
/*
* Decode the value, set it on the SPO, and return the SPO.
*
* Note: If SIDs are enabled, then this will set the statement
* identifier on the SID if it is an explicit statement.
*/
return tupleSer.decodeValue(spo, val);
}
/**
* Return true if the triple pattern matches any statement(s) in the store
* (non-batch API).
* <p>
* Note: This method does not verify whether or not the statement is
* explicit.
*
* @param s
* @param p
* @param o
*
* @deprecated by {@link #hasStatement(IV, IV, IV, IV)}
*/
final public boolean hasStatement(final IV s, final IV p, final IV o) {
return hasStatement(s, p, o, null/* c */);
}
/**
* Return true if the statement pattern matches any statement(s) in the store
* (non-batch API).
* <p>
* Note: This method does not verify whether or not the statement is
* explicit.
*
* @param s
* @param p
* @param o
* @param c
*/
final public boolean hasStatement(final IV s, final IV p, final IV o,
final IV c) {
if (s != null && p != null && o != null && (!quads || c != null)) {
/*
* Point test.
*/
final IIndex ndx = getSPORelation().getPrimaryIndex();
final SPO spo = new SPO(s, p, o, c);
final byte[] key = ndx.getIndexMetadata().getTupleSerializer()
.serializeKey(spo);
final boolean found = ndx.contains(key);
if(DEBUG) {
log.debug(spo + " : found=" + found + ", key="
+ BytesUtil.toString(key));
}
return found;
}
/*
* This uses a range scan over the statement pattern to see if there are
* any matches.
*/
return !getSPORelation().getAccessPath(s, p, o, c).isEmpty();
}
/**
* This method is extremely inefficient for scale-out as it does one RMI per
* request!
*/
@Override
final public boolean hasStatement(final Resource s, final URI p,
final Value o) {
return hasStatement(s, p, o, null/* c */);
}
/**
* {@inheritDoc}
* <p>
* This method is extremely inefficient for scale-out as it does multiple
* RMIs per request (one for each Value and one or more for the statement
* indices)!
*/
@Override
final public boolean hasStatement(Resource s, URI p, Value o, Resource c) {
/*
* convert other Value object types to our object types.
*/
final BigdataValueFactory valueFactory = getValueFactory();
s = (Resource) valueFactory.asValue(s);
p = (URI) valueFactory.asValue(p);
o = valueFactory.asValue(o);
c = valueFactory.asValue(c);
/*
* Convert our object types to internal identifiers.
*
* Note: If a value was specified and it is not in the terms index then
* the statement can not exist in the KB.
*/
final IV _s = getIV(s);
if (_s == null && s != null) {
return false;
}
final IV _p = getIV(p);
if (_p == null && p != null) {
return false;
}
final IV _o = getIV(o);
if (_o == null && o != null) {
return false;
}
final IV _c = getIV(c);
if (_c == null && c != null) {
return false;
}
final boolean found = hasStatement(_s, _p, _o, _c);
// if(log.isDebugEnabled()) {
//
// log.debug("<" + s + "," + p + "," + o + "> : found=" + found);
//
// }
return found;
}
@Override
final public long removeStatements(final Resource s, final URI p,
final Value o) {
return removeStatements(s, p, o, null/* c */);
}
@Override
final public long removeStatements(final Resource s, final URI p,
final Value o, final Resource c) {
return getAccessPath(s, p, o, c).removeAll();
}
final public BigdataStatement getStatement(final Statement s) {
return getStatement(s.getSubject(), s.getPredicate(),
s.getObject(), s.getContext());
}
@Override
final public BigdataStatement getStatement(final Resource s, final URI p,
final Value o) {
return getStatement(s, p, o, null/* c */);
}
@Override
final public BigdataStatement getStatement(final Resource s, final URI p,
final Value o, final Resource c) {
if (s == null || p == null || o == null || (quads && c == null)) {
throw new IllegalArgumentException();
}
final BigdataStatementIterator itr = getStatements(s, p, o, c);
try {
if (!itr.hasNext()) {
return null;
}
return itr.next();
} finally {
itr.close();
}
}
@Override
final public BigdataStatementIterator getStatements(final Resource s,
final URI p, final Value o) {
return getStatements(s, p, o, null/* c */);
}
@Override
final public BigdataStatementIterator getStatements(final Resource s,
final URI p, final Value o, final Resource c) {
return asStatementIterator(getAccessPath(s, p, o, c).iterator());
}
/**
* Efficient batched, streaming resolution of triple patterns to statements
* spanned by those triple patterns that are present in the data.
* <p>
* Note: If the input contains triple patterns that have a high cardinality
* in the data, then a large number of statements may be returned.
*
* @param triplePatterns
* A collection of triple patterns or fully bound statements. If
* this collection contains triple patterns that have a high
* cardinality in the data, then a large number of statements may
* be returned.
*
* @return An iterator from which the materialized statements spanned by
* those triple patterns may be read.
*
* @see <a href="http://trac.blazegraph.com/ticket/866" > Efficient batch
* remove of a collection of triple patterns </a>
*/
public BigdataStatementIterator getStatements(
final IChunkedOrderedIterator<BigdataTriplePattern> triplePatterns) {
return asStatementIterator(new ChunkedWrappedIterator<ISPO>(
new BigdataTriplePatternMaterializer(this, triplePatterns)
.start(getExecutorService())));
}
@Override
final public BigdataValue asValue(final Value value) {
return getValueFactory().asValue(value);
}
@Override
public BigdataStatement asStatement(final ISPO spo) {
/*
* Use batch API to resolve the term identifiers.
*/
final List<IV<?,?>> ivs = new ArrayList<IV<?,?>>(4);
ivs.add(spo.s());
ivs.add(spo.p());
ivs.add(spo.o());
final IV<?,?> c = spo.c();
if (c != null) {
ivs.add(c);
}
final Map<IV<?,?>, BigdataValue> terms = getLexiconRelation()
.getTerms(ivs);
/*
* Expose as a Sesame compatible Statement object.
*/
return getValueFactory().createStatement(//
(BigdataResource) terms.get(spo.s()),//
(BigdataURI) terms.get(spo.p()),//
(BigdataValue) terms.get(spo.o()),//
(BigdataResource) (c != null ? terms.get(c) : null),//
spo.getStatementType(),//
spo.getUserFlag());
}
@Override
public BigdataStatementIterator asStatementIterator(
final IChunkedOrderedIterator<ISPO> src) {
return new BigdataStatementIteratorImpl(this, src)
.start(getExecutorService());
}
@Override
public IAccessPath<ISPO> getAccessPath(final Resource s, final URI p,
final Value o) {
return getAccessPath(s, p, o, null/*c*/, null/* filter */, null/* range */);
}
public IAccessPath<ISPO> getAccessPath(final Resource s, final URI p,
final Value o, final IElementFilter<ISPO> filter) {
return getAccessPath(s, p, o, null/* c */, filter, null/* range */);
}
@Override
final public IAccessPath<ISPO> getAccessPath(final Resource s, final URI p,
final Value o, Resource c) {
return getAccessPath(s, p, o, c, null/* filter */, null/* range */);
}
final public IAccessPath<ISPO> getAccessPath(final Resource s, final URI p,
final Value o, final Resource c, final IElementFilter<ISPO> filter,
final RangeBOp range) {
/*
* Convert other Value object types to our object types.
*
* Note: the value factory is not requested unless we need to translate
* some value. This hack allows temporary stores without a lexicon to
* use the same entry points as those with one. Without this methods
* such as getStatementCount(c,exact) would throw exceptions when the
* lexicon was not associated with the store.
*/
final BigdataValueFactory valueFactory = (s != null || p != null || o != null
| c != null) ? getValueFactory() : null;
final BigdataResource _s = valueFactory == null ? null : valueFactory
.asValue(s);
final BigdataURI _p = valueFactory == null ? null : valueFactory
.asValue(p);
final BigdataValue _o = valueFactory == null ? null : valueFactory
.asValue(o);
// Note: _c is null unless quads.
final BigdataValue _c = quads ? valueFactory == null ? null
: valueFactory.asValue(c) : null;
/*
* Batch resolve all non-null values to get their term identifiers.
*/
int nnonNull = 0;
final BigdataValue[] values = new BigdataValue[spoKeyArity];
{
if (s != null)
values[nnonNull++] = _s;
if (p != null)
values[nnonNull++] = _p;
if (o != null)
values[nnonNull++] = _o;
if (c != null && quads)
values[nnonNull++] = _c;
if (nnonNull > 0)
getLexiconRelation()
.addTerms(values, nnonNull, true/* readOnly */);
}
/*
* If any value was given but is not known to the lexicon then use an
* empty access path since no statements can exist for the given
* statement pattern.
*/
if (s != null && _s.getIV() == null)
return new EmptyAccessPath<ISPO>();
if (p != null && _p.getIV() == null)
return new EmptyAccessPath<ISPO>();
if (o != null && _o.getIV() == null)
return new EmptyAccessPath<ISPO>();
if (quads && c != null && _c.getIV() == null)
return new EmptyAccessPath<ISPO>();
// /*
// * Convert our object types to internal identifiers.
// *
// * Note: If a value was specified and it is not in the terms index then
// * the statement can not exist in the KB.
// */
// final long _s = getTermId(s);
//
// if (_s == NULL && s != null)
// return new EmptyAccessPath<ISPO>();
//
// final long _p = getTermId(p);
//
// if (_p == NULL && p != null)
// return new EmptyAccessPath<ISPO>();
//
// final long _o = getTermId(o);
//
// if (_o == NULL && o != null)
// return new EmptyAccessPath<ISPO>();
/*
* Return the access path.
*/
return getSPORelation().getAccessPath(//
s == null ? null : _s.getIV(), //
p == null ? null : _p.getIV(),//
o == null ? null : _o.getIV(),//
(c == null || !quads) ? null : _c.getIV(),//
filter, range//
);
}
public IPredicate<ISPO> getPredicate(final Resource s, final URI p,
final Value o) {
return getPredicate(s, p, o, null/*c*/, null/* filter */, null/* range */);
}
public IPredicate<ISPO> getPredicate(final Resource s, final URI p,
final Value o, final Resource c) {
return getPredicate(s, p, o, c, null/* filter */, null/* range */);
}
/**
* Convert a Sesame Value based triple pattern into a bigdata Predicate.
* Will return null if any of the Sesame Values are not present in the
* database.
*/
final public IPredicate<ISPO> getPredicate(final Resource s, final URI p,
final Value o, final Resource c, final IElementFilter<ISPO> filter,
final RangeBOp range) {
/*
* Convert other Value object types to our object types.
*
* Note: the value factory is not requested unless we need to translate
* some value. This hack allows temporary stores without a lexicon to
* use the same entry points as those with one. Without this methods
* such as getStatementCount(c,exact) would throw exceptions when the
* lexicon was not associated with the store.
*/
final BigdataValueFactory valueFactory = (s != null || p != null || o != null
| c != null) ? getValueFactory() : null;
final BigdataResource _s = valueFactory == null ? null : valueFactory
.asValue(s);
final BigdataURI _p = valueFactory == null ? null : valueFactory
.asValue(p);
final BigdataValue _o = valueFactory == null ? null : valueFactory
.asValue(o);
// Note: _c is null unless quads.
final BigdataValue _c = quads ? valueFactory == null ? null
: valueFactory.asValue(c) : null;
/*
* Batch resolve all non-null values to get their term identifiers.
*/
int nnonNull = 0;
final BigdataValue[] values = new BigdataValue[spoKeyArity];
{
if (s != null)
values[nnonNull++] = _s;
if (p != null)
values[nnonNull++] = _p;
if (o != null)
values[nnonNull++] = _o;
if (c != null && quads)
values[nnonNull++] = _c;
if (nnonNull > 0)
getLexiconRelation()
.addTerms(values, nnonNull, true/* readOnly */);
}
/*
* If any value was given but is not known to the lexicon then use an
* empty access path since no statements can exist for the given
* statement pattern.
*/
if (s != null && _s.getIV() == null)
return null; // new EmptyAccessPath<ISPO>();
if (p != null && _p.getIV() == null)
return null; // new EmptyAccessPath<ISPO>();
if (o != null && _o.getIV() == null)
return null; // new EmptyAccessPath<ISPO>();
if (quads && c != null && _c.getIV() == null)
return null; // new EmptyAccessPath<ISPO>();
// /*
// * Convert our object types to internal identifiers.
// *
// * Note: If a value was specified and it is not in the terms index then
// * the statement can not exist in the KB.
// */
// final long _s = getTermId(s);
//
// if (_s == NULL && s != null)
// return new EmptyAccessPath<ISPO>();
//
// final long _p = getTermId(p);
//
// if (_p == NULL && p != null)
// return new EmptyAccessPath<ISPO>();
//
// final long _o = getTermId(o);
//
// if (_o == NULL && o != null)
// return new EmptyAccessPath<ISPO>();
/*
* Return the access path.
*/
return getSPORelation().getPredicate(//
s == null ? null : _s.getIV(), //
p == null ? null : _p.getIV(),//
o == null ? null : _o.getIV(),//
(c == null || !quads) ? null : _c.getIV(),//
filter, range//
);
}
final public IAccessPath<ISPO> getAccessPath(final IV s, final IV p,
final IV o) {
return getSPORelation()
.getAccessPath(s, p, o, null/* c */);
}
final public IAccessPath<ISPO> getAccessPath(final IV s, final IV p,
final IV o, final IElementFilter<ISPO> filter) {
return getSPORelation().getAccessPath(s, p, o, null/* c */, filter);
}
final public IAccessPath<ISPO> getAccessPath(final IV s, final IV p,
final IV o,final IV c) {
return getSPORelation()
.getAccessPath(s, p, o, c);
}
final public IAccessPath<ISPO> getAccessPath(final IV s, final IV p,
final IV o,final IV c, final RangeBOp range) {
return getSPORelation()
.getAccessPath(s, p, o, c, range);
}
final public IAccessPath<ISPO> getAccessPath(final IV s, final IV p,
final IV o, final IV c, final IElementFilter<ISPO> filter) {
return getSPORelation().getAccessPath(s, p, o, c, filter);
}
final public IAccessPath<ISPO> getAccessPath(final IKeyOrder<ISPO> keyOrder) {
return getAccessPath(keyOrder, null/* filter */);
}
/**
*
* @param keyOrder
* @param filter
* The filter will be incorporated as a constraint on the
* {@link IPredicate} for the {@link IAccessPath} and will be
* evaluated close to the data.
* @return
*/
// @SuppressWarnings("unchecked")
final public IAccessPath<ISPO> getAccessPath(
final IKeyOrder<ISPO> keyOrder, final IElementFilter<ISPO> filter) {
final SPORelation r = getSPORelation();
final SPOPredicate p = new SPOPredicate(
quads ?
new BOp[]{//
Var.var("s"),//
Var.var("p"),//
Var.var("o"),//
Var.var("c")//
}: new BOp[]{
Var.var("s"),//
Var.var("p"),//
Var.var("o"),//
},//
NV.asMap(new NV[] {//
new NV(IPredicate.Annotations.RELATION_NAME,
new String[] { r.getNamespace() }),//
// new NV(IPredicate.Annotations.KEY_ORDER,
// keyOrder),//
new NV(IPredicate.Annotations.INDEX_LOCAL_FILTER,
ElementFilter.newInstance(filter)),//
new NV(SPOPredicate.Annotations.INCLUDE_HISTORY, true),
}));
// final SPOPredicate p = new SPOPredicate(//
// new String[] { r.getNamespace() },//
// -1, // partitionId
// Var.var("s"),//
// Var.var("p"),//
// Var.var("o"),//
// quads ? Var.var("c") : null,//
// false, // optional
// filter,//
// null // expander
// );
return r.getAccessPath(keyOrder, p);
}
/*
* statement externalization serialization stuff.
*/
// namespace to prefix
private final Map<String, String> uriToPrefix = new LinkedHashMap<String, String>();
/**
* Defines a transient mapping from a URI to a namespace prefix that will be
* used for that URI by {@link #toString()}.
*
* @param namespace
*
* @param prefix
*/
final public void addNamespace(final String namespace, final String prefix) {
uriToPrefix.put(namespace, prefix);
}
/**
* Return an unmodifiable view of the mapping from namespaces to namespace
* prefixes.
* <p>
* Note: this is NOT a persistent map. It is used by {@link #toString(IV)}
* when externalizing URIs.
*/
final public Map<String, String> getNamespaces() {
return Collections.unmodifiableMap(uriToPrefix);
}
/**
* Return the namespace for the given prefix.
*
* @param prefix
* The prefix.
*
* @return The associated namespace -or- <code>null</code> if no namespace
* was mapped to that prefix.
*/
final public String getNamespace(final String prefix) {
// Note: this is not an efficient operation.
final Iterator<Map.Entry<String/* namespace */, String/* prefix */>> itr = uriToPrefix
.entrySet().iterator();
while (itr.hasNext()) {
final Map.Entry<String/* namespace */, String/* prefix */> entry = itr
.next();
if (entry.getValue().equals(prefix)) {
return entry.getKey();
}
}
return null;
}
/**
* Removes the namespace associated with the prefix.
*
* @param prefix
* The prefix.
* @return The namespace associated with that prefic (if any) and
* <code>null</code> otherwise.
*/
final public String removeNamespace(final String prefix) {
final Iterator<Map.Entry<String/* namespace */, String/* prefix */>> itr = uriToPrefix
.entrySet().iterator();
while (itr.hasNext()) {
final Map.Entry<String/* namespace */, String/* prefix */> entry = itr
.next();
if (entry.getValue().equals(prefix)) {
itr.remove();
return entry.getKey();
}
}
return null;
}
/**
* Clears the namespace map.
*/
final public void clearNamespaces() {
uriToPrefix.clear();
}
final public String toString(final IV s, final IV p, final IV o) {
return toString(s, p, o, null);
}
final public String toString(final IV s, final IV p, final IV o,
final IV c) {
return ("< " + toString(s) + ", " + toString(p) + ", " + toString(o)
+ ", " + toString(c) + " >");
}
final public String toString(final ISPO spo) {
return toString(spo.s(), spo.p(), spo.o(), spo.c());
}
final public String toString(final IV iv) {
if (iv == null)
return IRawTripleStore.NULLSTR;
// if(iv.isStatement()){
//
// // Note: SIDs are not stored in the reverse lexicon.
// return Long.toString(iv.getTermId()) + "S";
//
// }
String s = "";
try {
final BigdataValue v = getTerm(iv);
if (v == null)
return "<NOT_FOUND#" + iv + ">";
s = (v instanceof URI ? abbrev((URI) v) : v.toString());
} catch (NotMaterializedException ex) { }
return s + ("(" + iv + ")");
}
// private final String TERM_NOT_FOUND = "<NOT_FOUND>";
/**
* Substitutes in well know namespaces (rdf, rdfs, etc).
*/
final private String abbrev(final URI uri) {
final String uriString = uri.toString();
// final int index = uriString.lastIndexOf('#');
//
// if(index==-1) return uriString;
//
// final String namespace = uriString.substring(0, index);
final String namespace = uri.getNamespace();
final String prefix = uriToPrefix.get(namespace);
if (prefix != null) {
return prefix + ":" + uri.getLocalName();
}
return uriString;
}
// final public StringBuilder predicateUsage() {
//
// return predicateUsage(this);
//
// }
//
// /**
// * Dumps the #of statements using each predicate in the kb (tab delimited,
// * unordered).
// *
// * @param resolveTerms
// * Used to resolve term identifiers to terms (you can use this to
// * dump a {@link TempTripleStore} that is using the term
// * dictionary of the main database).
// *
// * @see SD, which can now compute and report both the predicate partition
// * usage and the class partition usage.
// */
// final public StringBuilder predicateUsage(
// final AbstractTripleStore resolveTerms) {
//
// if (getSPORelation().oneAccessPath) {
//
// // The necessary index (POS or POCS) does not exist.
// throw new UnsupportedOperationException();
//
// }
//
// // visit distinct term identifiers for the predicate position.
// final IChunkedIterator<IV> itr = getSPORelation().distinctTermScan(
// quads ? SPOKeyOrder.POCS : SPOKeyOrder.POS);
//
// // resolve term identifiers to terms efficiently during iteration.
// final BigdataValueIterator itr2 = new BigdataValueIteratorImpl(
// resolveTerms, itr);
//
// try {
//
// final StringBuilder sb = new StringBuilder();
//
// while (itr2.hasNext()) {
//
// final BigdataValue term = itr2.next();
//
// final IV p = term.getIV();
//
// final long n = getSPORelation().getAccessPath(null, p, null,
// null).rangeCount(false/* exact */);
//
// /*
// * FIXME do efficient term resolution for scale-out. This will
// * require an expander pattern where we feed one iterator into
// * another and both are chunked.
// */
// sb.append(n + "\t" + resolveTerms.toString(p) + "\n");
//
// }
//
// return sb;
//
// } finally {
//
// itr2.close();
//
// }
//
// }
/**
* Utility method dumps the statements in the store using the SPO index
* (subject order).
*/
final public StringBuilder dumpStore() {
return dumpStore(true, true, true);
}
final public StringBuilder dumpStore(final boolean explicit,
final boolean inferred, final boolean axioms) {
return dumpStore(this, explicit, inferred, axioms);
}
final public StringBuilder dumpStore(final boolean history) {
return dumpStore(this, true, true, true, history, false);
}
final public StringBuilder dumpStore(
final AbstractTripleStore resolveTerms, final boolean explicit,
final boolean inferred, final boolean axioms) {
return dumpStore(resolveTerms, explicit, inferred, axioms, false/* justifications */);
}
/**
* Dumps the store in a human readable format (not suitable for
* interchange).
*
* @param resolveTerms
* Used to resolve term identifiers to terms (you can use this to
* dump a {@link TempTripleStore} that is using the term
* dictionary of the main database).
* @param explicit
* Show statements marked as explicit.
* @param inferred
* Show statements marked inferred.
* @param axioms
* Show statements marked as axioms.
* @param justifications
* Dump the justifications index also.
*/
final public StringBuilder dumpStore(
final AbstractTripleStore resolveTerms, final boolean explicit,
final boolean inferred, final boolean axioms,
final boolean justifications) {
return dumpStore(resolveTerms, explicit, inferred, axioms, true,
justifications, true, getSPORelation().getPrimaryKeyOrder());
}
final public StringBuilder dumpStore(
final AbstractTripleStore resolveTerms, final boolean explicit,
final boolean inferred, final boolean axioms,
final boolean history, final boolean justifications) {
return dumpStore(resolveTerms, explicit, inferred, axioms, history,
justifications, true, getSPORelation().getPrimaryKeyOrder());
}
/**
* Dumps the store in a human readable format (not suitable for
* interchange).
*
* @param resolveTerms
* Used to resolve term identifiers to terms (you can use this to
* dump a {@link TempTripleStore} that is using the term
* dictionary of the main database).
* @param explicit
* Show statements marked as explicit.
* @param inferred
* Show statements marked inferred.
* @param axioms
* Show statements marked as axioms.
* @param justifications
* Dump the justifications index also.
* @param keyOrder
* The access path to use.
*/
public StringBuilder dumpStore(
final AbstractTripleStore resolveTerms, final boolean explicit,
final boolean inferred, final boolean axioms, final boolean history,
final boolean justifications, final boolean sids,
final IKeyOrder<ISPO> keyOrder) {
final StringBuilder sb = new StringBuilder();
final long nstmts = getAccessPath(keyOrder).rangeCount(true/* exact */);
long nexplicit = 0;
long ninferred = 0;
long naxioms = 0;
long nhistory = 0;
{
// Full SPO scan efficiently resolving SPOs to BigdataStatements.
final BigdataStatementIterator itr = resolveTerms
.asStatementIterator(getAccessPath(keyOrder)
.iterator());
int i = 0;
try {
while (itr.hasNext()) {
final BigdataStatement stmt = itr.next();
if (!sids && stmt.getSubject().getIV().isStatement()) {
continue;
}
switch (stmt.getStatementType()) {
case Explicit:
nexplicit++;
if (!explicit)
continue;
else
break;
case Inferred:
ninferred++;
if (!inferred)
continue;
else
break;
case Axiom:
naxioms++;
if (!axioms)
continue;
else
break;
case History:
nhistory++;
if (!history)
continue;
else
break;
default:
throw new AssertionError();
}
sb.append("#"
+ (i + 1)
+ "\t"
+ stmt
+ (" (" + stmt.s() + "," + stmt.p() + "," + stmt.o()
+ "," + stmt.c() + ")") + "\n");
i++;
}
} finally {
itr.close();
}
}
int njust = 0;
if (justifications && justify) {
final IIndex ndx = getSPORelation().getJustificationIndex();
final ITupleIterator<Justification> itrj = ndx.rangeIterator();
while (itrj.hasNext()) {
final Justification jst = (Justification)itrj.next().getObject();
sb.append("#" + (njust + 1) //+ "\t"
+ jst.toString(resolveTerms)+"\n");
njust++;
}
}
sb.append("dumpStore: #statements=" + nstmts + ", #explicit="
+ nexplicit + ", #inferred=" + ninferred + ", #axioms="
+ naxioms + ", #history=" + nhistory
+ (justifications ? ", #just=" + njust : ""));
return sb;
}
/**
* Dumps the access path, efficiently resolving term identifiers to terms.
*
* @param accessPath
*/
public StringBuilder dumpStatements(final IAccessPath<ISPO> accessPath) {
final StringBuilder sb = new StringBuilder();
final BigdataStatementIterator itr = asStatementIterator(accessPath
.iterator());
try {
while(itr.hasNext()) {
sb.append("\n"+itr.next());
}
return sb;
} finally {
itr.close();
}
}
/*
* IRawTripleStore
*/
/**
* Copies the statements from <i>this</i> store into the specified store
* using the <strong>same</strong> term identifiers (the lexicon is neither
* copied to nor asserted on the target).
* <p>
* Note: This method MUST NOT be used unless it is known in advance that the
* statements in <i>this</i> store use term identifiers that are consistent
* with (term for term identical to) those in the destination store. If
* statement identifiers are enabled, then they MUST be enabled for both
* stores (statement identifiers are assigned by, and stored in, the foward
* lexicon and replicated into the statement indices).
* <p>
* Note: The statements in <i>this</i> store are NOT removed.
*
* @param dst
* The persistent database (destination).
* @param filter
* An optional filter to be applied. Statements in <i>this</i>
* matching the filter will NOT be copied.
* @param copyJustifications
* When true, the justifications will be copied as well.
*
* @return The #of statements inserted into <i>dst</i> (the count only
* reports those statements that were not already in the main
* store).
*
* @todo method signature could be changed to accept the source access path
* for the read and then just write on the database
*/
public long copyStatements(//
final AbstractTripleStore dst,//
final IElementFilter<ISPO> filter,//
final boolean copyJustifications//
) {
return copyStatements(dst, filter, copyJustifications,
null/* changeLog */);
}
public long copyStatements(//
final AbstractTripleStore dst,//
final IElementFilter<ISPO> filter,//
final boolean copyJustifications,//
final IChangeLog changeLog
) {
if (dst == this)
throw new IllegalArgumentException();
// obtain a chunked iterator reading from any access path.
final IChunkedOrderedIterator<ISPO> itr = getAccessPath(
getSPORelation().getPrimaryKeyOrder(), filter).iterator();
try {
if (!copyJustifications) {
if (changeLog == null) {
// add statements to the target store.
return dst
.addStatements(dst, true/* copyOnly */, itr, null/* filter */);
} else {
return com.bigdata.rdf.changesets.StatementWriter.addStatements(
dst,
dst,
true/* copyOnly */,
null/* filter */,
itr,
changeLog);
}
} else {
/*
* Use a thread pool to write out the statement and the
* justifications concurrently. This drammatically reduces the
* latency when also writing justifications.
*/
final List<Callable<Long>> tasks = new ArrayList<Callable<Long>>(
2);
/*
* Note: we reject using the filter before stmts or
* justifications make it into the buffer so we do not need to
* apply the filter again here.
*/
// set as a side-effect.
final AtomicLong nwritten = new AtomicLong();
// task will write SPOs on the statement indices.
tasks.add(new StatementWriter(dst, dst, true/* copyOnly */,
itr, nwritten, changeLog));
// task will write justifications on the justifications index.
final AtomicLong nwrittenj = new AtomicLong();
if (justify) {
final IJustificationIterator jitr = new JustificationIterator(
getIndexManager(),
getSPORelation().getJustificationIndex(),
0/* capacity */);
tasks.add(new JustificationWriter(dst, jitr, nwrittenj));
}
final List<Future<Long>> futures;
final long elapsed_SPO;
final long elapsed_JST;
try {
futures = getIndexManager().getExecutorService().invokeAll(
tasks);
elapsed_SPO = futures.get(0).get();
if (justify) {
elapsed_JST = futures.get(1).get();
} else {
elapsed_JST = 0;
}
} catch (InterruptedException ex) {
throw new RuntimeException(ex);
} catch (ExecutionException ex) {
throw new RuntimeException(ex);
}
if (INFO)
log
.info("Copied "
+ nwritten
+ " statements in "
+ elapsed_SPO
+ "ms"
+ (justify ? (" and " + nwrittenj
+ " justifications in "
+ elapsed_JST + "ms") : ""));
return nwritten.get();
}
} finally {
itr.close();
}
}
public IChunkedOrderedIterator<ISPO> bulkFilterStatements(
final ISPO[] stmts, final int numStmts, boolean present) {
if (numStmts == 0) {
return new EmptyChunkedIterator<ISPO>(getSPORelation()
.getPrimaryKeyOrder());
}
return bulkFilterStatements(new ChunkedArrayIterator<ISPO>(numStmts,
stmts, null/* keyOrder */), present);
}
public IChunkedOrderedIterator<ISPO> bulkFilterStatements(
final IChunkedOrderedIterator<ISPO> itr, final boolean present) {
return new ChunkedConvertingIterator<ISPO, ISPO>(itr,
new BulkFilterConverter(getSPORelation().getPrimaryIndex(),
present));
}
public IChunkedOrderedIterator<ISPO> bulkCompleteStatements(
final SPO[] stmts, final int numStmts) {
if (numStmts == 0) {
return new EmptyChunkedIterator<ISPO>(getSPORelation()
.getPrimaryKeyOrder());
}
return bulkCompleteStatements(new ChunkedArrayIterator<ISPO>(numStmts,
stmts, null/* keyOrder */));
}
public ISPO[] bulkCompleteStatements(final ISPO[] stmts) {
return new BulkCompleteConverter(getSPORelation().getPrimaryIndex())
.convert(stmts);
}
public IChunkedOrderedIterator<ISPO> bulkCompleteStatements(
final IChunkedOrderedIterator<ISPO> itr) {
return new ChunkedConvertingIterator(itr, new BulkCompleteConverter(
getSPORelation().getPrimaryIndex()));
}
public long addStatements(final ISPO[] stmts, final int numStmts) {
if (numStmts == 0)
return 0;
return addStatements(new ChunkedArrayIterator<ISPO>(numStmts, stmts,
null/* keyOrder */), null /* filter */);
}
public long addStatements(final ISPO[] stmts, final int numStmts,
final IElementFilter<ISPO> filter) {
if (numStmts == 0)
return 0;
return addStatements(new ChunkedArrayIterator<ISPO>(numStmts, stmts,
null/* keyOrder */), filter);
}
public long addStatements(final IChunkedOrderedIterator<ISPO> itr,
final IElementFilter<ISPO> filter) {
return addStatements(this/* statementStore */, false/* copyOnly */,
itr, filter);
}
/**
* Add statements to the <i>statementStore</i>.
* <p>
* Note: If {@link Options#STATEMENT_IDENTIFIERS} was specified, then
* statement identifiers are assigned using the lexicon associated with
* <i>this</i> database. This is done in a preprocessing stage for each
* "chunk" reported by the source <i>itr</i>. This step sets the statement
* identifier on the {@link SPO} so that it is present when we write on the
* statement indices.
*
* @param statementStore
* Either <i>this</i> database or the focusStore (the latter
* option is used only during truth maintenance).
* @param copyOnly
* When <code>true</code>, it is assumed that the {@link SPO}s
* are being copied from another store using a consistent lexicon
* (or onto a store that uses the same lexicon). The flag only
* has an effect when statement identifiers are enabled, since it
* is then presumed that {@link SPO#getStatementIdentifier()}
* will return a pre-assigned statement identifier and that we do
* NOT need to invoke
* {@link #addStatementIdentifiers(SPO[], int)}. This is only an
* optimization - the value <code>false</code> is always safe
* for this flag, but it will do some extra work in the case
* described here. See {@link StatementWriter}, which uses this
* flag and
* {@link #copyStatements(AbstractTripleStore, IElementFilter, boolean)}
* which always specifies <code>true</code> for this flag.
* @param itr
* The source from which the {@link SPO}s are read.
* @param filter
* An optional filter.
*
* @return The mutation count, which is the #of statements that were written
* on the indices. A statement that was previously an axiom or
* inferred and that is converted to an explicit statement by this
* method will be reported in this count as well as any statement
* that was not pre-existing in the database.
*/
public long addStatements(final AbstractTripleStore statementStore,
final boolean copyOnly, final IChunkedOrderedIterator<ISPO> itr,
final IElementFilter<ISPO> filter) {
if (statementStore == null)
throw new IllegalArgumentException();
if (itr == null)
throw new IllegalArgumentException();
try {
// final LexiconRelation lexiconRelation = getLexiconRelation();
final SPORelation spoRelation = statementStore.getSPORelation();
if (!itr.hasNext())
return 0;
long mutationCount = 0;
/*
* Note: We process the iterator a "chunk" at a time. If the
* iterator is backed by an SPO[] then it will all be processed in
* one "chunk".
*/
while (itr.hasNext()) {
final ISPO[] a = itr.nextChunk();
final int numStmts = a.length;
final long statementIdentifierTime;
if (statementIdentifiers && !copyOnly) {
final long begin = System.currentTimeMillis();
// /*
// * Note: the statement identifiers are always assigned by
// * the database. During truth maintenance, the
// * [statementStore] is NOT the database but the statement
// * identifiers are still assigned by the database. The
// * situation is exactly parallel to the manner in which term
// * identifiers are always assigned by the database. This is
// * done to ensure that they remain consistent between the
// * focusStore using by truth maintenance and the database.
// */
// lexiconRelation.addStatementIdentifiers(a, numStmts);
// /*
// * No more statement identifiers in the lexicon - they are
// * now inlined directly into the statement indices using
// * the SidIV class.
// *
// * Mark explicit statements as "sidable". The actual sid
// * will be produced on-demand to reduce heap pressure.
// */
// for (ISPO spo : a) {
// if (spo.isExplicit()) {
// spo.setStatementIdentifier(true);
// }
// }
statementIdentifierTime = System.currentTimeMillis()
- begin;
} else {
statementIdentifierTime = 0L;
}
// @todo raise the filter into the caller's iterator?
final long numWritten = spoRelation.insert(a, numStmts, filter);
mutationCount += numWritten;
if (numStmts > 1000) {
if(INFO)
log.info("Wrote "
+ numStmts
+ " statements (mutationCount="
+ numWritten
+ ")"
+ (statementStore != this ? "; truthMaintenance"
: "") //
+ (statementIdentifiers ? "; sid="
+ statementIdentifierTime + "ms" : "") //
);
}
} // nextChunk
return mutationCount;
} finally {
itr.close();
}
}
public long removeStatements(final ISPO[] stmts, final int numStmts) {
return removeStatements(new ChunkedArrayIterator<ISPO>(numStmts, stmts,
null/* keyOrder */), true/*computeClosureForStatementIdentifiers*/);
}
public long removeStatements(final IChunkedOrderedIterator<ISPO> itr) {
return removeStatements(itr, true/*computeClosureForStatementIdentifiers*/);
}
/**
* This processes a chunk of {@link SPO}s at a time and then submits tasks
* to parallel threads to remove those statements from each of the statement
* indices. This continues until all statements visited by the iterator have
* been removed.
* <p>
* Note: If {@link #justify justifications} are being used to support truth
* maintenance, then all justifications for the removed statements are also
* removed.
*
* @param itr
* An iterator visiting {@link SPO}s to be removed from the
* database.
*
* @param computeClosureForStatementIdentifiers
* When <code>false</code> the caller asserts that they have
* pre-computed the closure of the statements that assert
* metadata about statement identifiers to be deleted. When
* <code>true</code> this method will compute that closure on
* behalf of the caller with the effect that any statements made
* about statements to be removed are also removed. This option
* has no effect when {@link #statementIdentifiers} are not
* enabled. See {@link Options#STATEMENT_IDENTIFIERS}
*
* @todo If you are using statement identifiers but you are NOT using truth
* maintenance then this method does NOT guarantee consistency when
* removing statements in the face of concurrent writers on the
* statement indices. The problem is that we collect the statement
* identifiers in one unisolated operation, then collect the
* statements that use those statement identifiers two other
* operations, and finally we remove those statements. In order to be
* consistent you need to obtain an exclusive lock (which is difficult
* to do with distributed clients) or be encompassed by a transaction.
* (This is the same constraint that applies when truth maintenance is
* enabled since you have to serialize incremental TM operations
* anyway.)
*/
public long removeStatements(//
IChunkedOrderedIterator<ISPO> itr,
final boolean computeClosureForStatementIdentifiers//
) {
if (itr == null)
throw new IllegalArgumentException();
long mutationCount = 0;
if (statementIdentifiers && computeClosureForStatementIdentifiers) {
itr = computeClosureForStatementIdentifiers(itr);
}
try {
while (itr.hasNext()) {
final ISPO[] stmts = itr.nextChunk();
// The #of statements that will be removed.
final int numStmts = stmts.length;
mutationCount += getSPORelation().delete(stmts, numStmts);
}
} finally {
itr.close();
}
return mutationCount;
}
/**
* Return an iterator which will visit the closure of the statements visited
* by the source iterator plus any statements in the database made using a
* statement identifier found on any of the statements visited by the source
* iterator (only explicit statements have statement identifiers and then
* iff {@link #statementIdentifiers} are enabled).
* <p>
* Note: This uses a {@link TempTripleStore} which is iteratively populated
* until a fix point is obtained. The {@link TempTripleStore} is released
* when the returned iterator is {@link IChunkedIterator#close() closed} or
* when it is finalized.
*
* @param src
* The source iterator.
*
* @see BLZG-1268 (NPE during read-only namespace resolution caused by RDR
* Truth Maintenance and GRS concurrent modification)
*/
@SuppressWarnings("unused")
public IChunkedOrderedIterator<ISPO> computeClosureForStatementIdentifiers(
IChunkedOrderedIterator<ISPO> src) {
if (!statementIdentifiers) {
// There will be no statement identifiers unless they were enabled.
return src;
}
/*
* If RDR history is enabled we don't want to remove non-grounded
* sids.
*/
if (rdrHistoryClass != null) {
return src;
}
/*
* Bypass if requested.
*/
if (!computeClosureForSids) {
return src;
}
final Properties properties = getProperties();
// do not store terms
properties.setProperty(Options.LEXICON, "false");
// only store the SPO index
properties.setProperty(Options.ONE_ACCESS_PATH, "true");
// @todo test w/ SPO bloom filter enabled and see if this improves performance.
properties.setProperty(Options.BLOOM_FILTER, "false");
// no axioms.
properties.setProperty(
com.bigdata.rdf.store.AbstractTripleStore.Options.AXIOMS_CLASS,
NoAxioms.class.getName());
final TempTripleStore tmp = new TempTripleStore(getIndexManager()
.getTempStore(), properties, this);
/*
* buffer everything in a temp triple store.
*
* Note: copyOnly is false since we need to make sure that the sids are
* defined for explicit statements before we attempt to compute the
* fixed point for the statements about statements. This will have the
* side effect of writing on the lexicon for the database if the caller
* supplies an explicit SPO that does not exist yet in the database.
*/
this.addStatements(tmp, false/*copyOnly*/, src, null/*filter*/);
fixPointStatementIdentifiers(this, tmp);
/*
* Note: The returned iterator will automatically release the backing
* temporary store when it is closed or finalized.
*
* Note: SIDS are only used with triples so the SPO index will exist.
*/
return new DelegateChunkedIterator<ISPO>(tmp.getAccessPath(
SPOKeyOrder.SPO).iterator()) {
public void close() {
super.close();
tmp.close();
}
protected void finalize() throws Throwable {
super.finalize();
if(tmp.isOpen()) {
tmp.close();
}
}
};
}
/**
* Computes the fixed point of those statements in the database which make
* assertions about statement identifiers in the tmp store.
*
* @param db
* The database.
* @param tempStore
* The temporary store.
*/
static public void fixPointStatementIdentifiers(
final AbstractTripleStore db, final AbstractTripleStore tempStore) {
/*
* Fix point the temp triple store.
*
* Note: A [sids] collection is used to filter out statement identifiers
* for which we have already queried against the database and thereby
* avoid a re-scan for the use of that statement identifier in the
* database.
*
* FIXME A TempTripleStore does not support concurrent readers and
* writers because it is backed by a TemporaryStore and not a Journal
* (with its concurrency controls). This is handled in AccessPath by
* always using a fully buffered iterator(), so we do not really gain
* the ability to scale-up from the TempTripleStore (we could create the
* TempTripleStore on a Journal using Temporary buffer mode and get the
* concurrency controls).
*/
long statementCount0;
long statementCount1;
final HashSet<IV> sids = new HashSet<IV>();
// final LongOpenHashSet sids = new LongOpenHashSet();
int nrounds = 0;
do {
// increment the round counter.
nrounds++;
// note: count will be exact.
statementCount0 = tempStore.getStatementCount();
/*
* Visit the explicit statements since only they can have statement
* identifiers.
*
* Note: SIDs are only used with triples so the SPO index will
* exist.
*/
final IChunkedOrderedIterator<ISPO> itr = tempStore.getAccessPath(
SPOKeyOrder.SPO, ExplicitSPOFilter.INSTANCE).iterator();
try {
while(itr.hasNext()) {
final IV sid = itr.next().getStatementIdentifier();
if(sids.contains(sid)) continue;
// sid in the subject position.
tempStore.addStatements(tempStore, true/* copyOnly */, db
.getAccessPath(sid, null, null).iterator(),
null/* filter */);
/*
* sid in the predicate position.
*
* Note: this case is not allowed by RDF but a TMGraph model
* might use it.
*/
tempStore.addStatements(tempStore, true/* copyOnly */, db
.getAccessPath(null, sid, null).iterator(),
null/* filter */);
// sid in the object position.
tempStore.addStatements(tempStore, true/*copyOnly*/, db
.getAccessPath((IV) null, (IV) null, (IV) sid).iterator(),
null/* filter */);
// finished with this sid.
sids.add(sid);
}
} finally {
itr.close();
}
// note: count will be exact.
statementCount1 = tempStore.getStatementCount();
if (INFO)
log.info("Finished " + nrounds + " rounds: statementBefore="
+ statementCount0 + ", statementsAfter="
+ statementCount1);
} while (statementCount0 < statementCount1);
}
// Use getLexiconRelation().getSearchEngine().search(...)
// /**
// * <p>
// * Performs a full text search against literals returning an {@link IHit}
// * list visiting the term identifiers for literals containing tokens parsed
// * from the query. Those term identifiers may be used to join against the
// * statement indices in order to bring back appropriate results.
// * </p>
// * <p>
// * Note: If you want to discover a data typed value, then form the
// * appropriate data typed {@link Literal} and use
// * {@link IRawTripleStore#getTermId(Value)}. Likewise, that method is also
// * more appropriate when you want to lookup a specific {@link URI}.
// * </p>
// *
// * @param languageCode
// * The language code that should be used when tokenizing the
// * query (an empty string will be interpreted as the default
// * {@link Locale}).
// * @param text
// * The query (it will be parsed into tokens).
// *
// * @return An iterator that visits each term in the lexicon in which one or
// * more of the extracted tokens has been found. The value returned
// * by {@link IHit#getDocId()} is in fact the <i>termId</i> and you
// * can resolve it to the term using {@link #getTerm(long)}.
// *
// * @throws InterruptedException
// * if the search operation is interrupted.
// *
// * @todo Abstract the search api so that it queries the terms index directly
// * when a data typed literal or a URI is used (typed query).
// */
// @SuppressWarnings("unchecked")
// public Iterator<IHit> textSearch(final String languageCode,
// final String text) throws InterruptedException {
//
// return getSearchEngine().search(text, languageCode);
//
// }
/**
*
* @param solutionFlags
* See {@link IJoinNexus#ELEMENT} and friends.
* @param filter
* Optional filter.
* @return
*/
public IJoinNexusFactory newJoinNexusFactory(RuleContextEnum ruleContext,
ActionEnum action, int solutionFlags, IElementFilter filter) {
return newJoinNexusFactory(ruleContext, action, solutionFlags, filter,
isJustify(), false/* backchain */,
DefaultEvaluationPlanFactory2.INSTANCE);
}
/**
*
* @param solutionFlags
* See {@link IJoinNexus#ELEMENT} and friends.
* @param filter
* Optional filter.
* @return
*/
public IJoinNexusFactory newJoinNexusFactory(
final RuleContextEnum ruleContext, final ActionEnum action,
final int solutionFlags, final IElementFilter filter,
final boolean justify, final boolean backchain,
final IEvaluationPlanFactory planFactory) {
return newJoinNexusFactory(ruleContext, action, solutionFlags, filter,
justify, backchain, planFactory, null/*overrides*/);
}
/**
*
* @param solutionFlags
* See {@link IJoinNexus#ELEMENT} and friends.
* @param filter
* Optional filter.
* @param overrides
* Optional overrides of the properties controlling the rule
* execution layer. When given, the property values will override
* those inherited from {@link AbstractResource}.
* @return
*/
public IJoinNexusFactory newJoinNexusFactory(
final RuleContextEnum ruleContext, //
final ActionEnum action,//
final int solutionFlags,//
final IElementFilter filter,//
final boolean justify, //
final boolean backchain,//
final IEvaluationPlanFactory planFactory,//
final Properties overrides//
) {
if (ruleContext == null)
throw new IllegalArgumentException();
if (action == null)
throw new IllegalArgumentException();
if(action.isMutation()) {
assertWritable();
}
// use the timestamp for the database view.
final long writeTimestamp = getTimestamp();
/*
* Use the timestamp for the database view.
*
* Note: The choice here effects the behavior for MUTATION only (e.g.,
* computing or updating the closure of the db) and reflects the use of
* the UnisolatedReadWriteIndex to allow interleaved reads and writes on
* the unisolated indices when using a Journal or Temporary(Raw)Store.
* When running against an IBigdataFederation, the ConcurrencyManager
* will be interposed and unisolated writes will result in commits.
*
* Note: If we are only reading (Query) then we just use the timestamp
* of the view.
*
* --- LTS (closure using nested or pipeline joins)
*
* Closure can't use READ_COMMITTED for LTS unless we force commits
* after each rule executes. Therefore it uses UNISOLATED indices for
* both reads and writes. This works since (a) the entire JOIN task is
* executed while holding all required locks; and (b) the
* UnisolatedReadWriteIndex is used to avoid concurrency problems that
* would otherwise arise since there are readers running concurrent with
* the writer that flushes solutions to the relation.
*
* Using UNISOLATED JOINs for LTS means that parallel programs will be
* (mostly) serialized since they will be ordered by the requirement for
* resource locks on the various indices, including the head relation
* (SPORelation) on which the rules will write. However, as noted above,
* our only other choice is to enforce auto-commit semantics as we do
* for LDS/EDS/JDS.
*
* ---- Federations and pipeline joins.
*
* Federations (LDS/EDS/JDS) use auto-commit semantics. For EDS/JDS we
* do NOT use READ_COMMITTED for closure since that can lead to stale
* locator problems during overflow processing. Instead we advance the
* readTimestamp before each mutation rule is run. For simplicity, we do
* this for LDS as well as EDS/JDS, while it is in fact only required
* for EDS/JDS since LDS does not use locators.
*
* Federation JoinTasks require access to the dataService so that they
* can obtain the live journal. However, they use read-historical access
* and you can do that without declaring locks. More to the point, if
* they attempted to access the UNISOLATED index that would cause a
* deadlock with the tasks responsible for flushing the generated
* solutions onto the head relation.
*/
final long readTimestamp;
if (action.isMutation()) {
if (writeTimestamp != ITx.UNISOLATED) {
// mutation requires the UNISOLATED view.
throw new UnsupportedOperationException();
}
if (getIndexManager() instanceof IBigdataFederation<?>) {
/*
* Use historical reads.
*
* Note: The read timestamp will be automatically updated before
* each mutation step so that all mutation operations will see
* the last committed state of the database but will also avoid
* problems with stale locators.
*/
readTimestamp = TimestampUtility.asHistoricalRead(getIndexManager()
.getLastCommitTime());
} else {
/*
* LTS closure operations.
*
* Note: This means that we use UNISOLATED reads since mutation
* requires that the caller is using the UNISOLATED relation.
*/
readTimestamp = getTimestamp();
}
} else {
/*
* Query.
*
* Use the same timestamp as the relation.
*/
readTimestamp = getTimestamp();
}
/*
* true iff owl:sameAs is (a) supported by the axiom model; and (b)
* there is at least one owl:sameAs assertion in the database.
*/
final boolean isOwlSameAsUsed = getAxioms().isOwlSameAs()
&& !getAccessPath(null, getVocabulary().get(OWL.SAMEAS), null)
.isEmpty();
final IRuleTaskFactory defaultRuleTaskFactory = DefaultRuleTaskFactory.PIPELINE;
// = isNestedSubquery() //
// ? DefaultRuleTaskFactory.SUBQUERY//
// : DefaultRuleTaskFactory.PIPELINE//
// ;
// Note: returns a Properties wrapping the resource's properties.
final Properties tmp = getProperties();
if (overrides != null) {
/* FIXME overrides should apply to the properties above here as well!
* Layer in the overrides.
*
* Note: If the caller passes in a Properties object, then only the
* top level of that properties object will be copied in. This can
* be "fixed" by the caller using PropertyUtil.flatten(Properties).
*/
tmp.putAll(overrides);
}
return new RDFJoinNexusFactory(
action, //
writeTimestamp,//
readTimestamp, //
tmp,//
solutionFlags, //
filter, //
planFactory, //
defaultRuleTaskFactory,//
ruleContext,//
justify,//
backchain, //
isOwlSameAsUsed //
);
}
/**
* Specialized {@link IRule} execution using the full text index to identify
* possible completions of the given literals for which there exists a
* subject <code>s</code> such that:
*
* <pre>
* SELECT ?s, ?t, ?lit
* (?lit completionOf, lits)
* AND (?s ?p ?lit)
* AND (?s rdf:type ?t)
* AND (?t rdfs:subClassOf cls)
* WHERE
* p IN {preds}
* </pre>
*
* Note: The JOIN runs asynchronously.
*
* @param lits
* One or more literals. The completions of these literals will
* be discovered using the {@link FullTextIndex}. (A completion
* is any literal having one of the given literals as a prefix
* and includes an exact match on the litteral as a degenerate
* case.)
* @param preds
* One or more predicates that link the subjects of interest to
* the completions of the given literals. Typically this array
* will include <code>rdf:label</code>.
* @param cls
* All subjects visited by the iterator will be instances of this
* class.
*
* @return An {@link ICloseableIterator} visiting {@link IBindingSet}s. Each
* {@link IBindingSet} will have bound {@link BigdataValue}s for
* <code>s</code>, <code>t</code>, <code>p</code>, and
* <code>lit</code> where those variables are defined per the
* pseudo-code JOIN above.
*
* @throws InterruptedException
* if the operation is interrupted.
*
* FIXME quads : Modify match() to allow an optional context
* argument. When present, the match would be restricted to the
* specified context.
*/
public ICloseableIterator<IBindingSet> match(final Literal[] lits,
final URI[] preds, final URI cls) {
if (lits == null || lits.length == 0)
throw new IllegalArgumentException();
if (preds == null || preds.length == 0)
throw new IllegalArgumentException();
if (cls == null)
throw new IllegalArgumentException();
/*
* Batch resolve BigdataValues with their associated term identifiers
* for the given predicates and cls.
*/
final BigdataValue[] terms = new BigdataValue[preds.length + 1/* cls */];
{
final BigdataValueFactory valueFactory = getValueFactory();
for (int i = 0; i < preds.length; i++) {
// pred[i] is in terms[i].
terms[i] = valueFactory.asValue(preds[i]);
}
// cls is in the last index position.
terms[preds.length] = valueFactory.asValue(cls);
// batch resolve (readOnly).
getLexiconRelation()
.addTerms(terms, terms.length, true/* readOnly */);
}
/*
* Translate the predicates into term identifiers.
*/
final IConstant<IV>[] _preds = new IConstant[preds.length];
{
int nknown = 0;
for (int i = 0; i < preds.length; i++) {
final IV iv = terms[i].getIV();
if (iv != null)
nknown++;
_preds[i] = new Constant<IV>(iv);
}
if (nknown == 0) {
if (log.isInfoEnabled())
log.warn("No known predicates: preds=" + Arrays.toString(preds));
return new EmptyChunkedIterator<IBindingSet>(null/* keyOrder */);
}
}
/*
* Translate the class constraint into a term identifier.
*/
final IV _cls = terms[preds.length].getIV();
if (_cls == null) {
if (log.isInfoEnabled())
log.warn("Unknown class: class=" + cls);
return new EmptyChunkedIterator<IBindingSet>(null/* keyOrder */);
}
/*
* Generate a program from the possible completions of the literals.
*/
final IProgram program = getMatchProgram(lits, _preds, _cls);
final int solutionFlags = IJoinNexus.BINDINGS;
final IJoinNexus joinNexus = newJoinNexusFactory(//
RuleContextEnum.HighLevelQuery,//
ActionEnum.Query,//
solutionFlags,//
null // filter
).newInstance(getIndexManager());
/*
* Resolve ISolutions to their binding sets and efficiently resolves
* term identifiers in those binding sets to BigdataValues.
*/
try {
return new BigdataSolutionResolverator(this, joinNexus
.runQuery(program)).start(getExecutorService());
} catch(Exception ex) {
// runQuery() throws Exception.
throw new RuntimeException(ex);
}
}
/**
* Generate a program from the possible completions of the literals.
*
* @param lits
* One or more literals. The completions of these literals will
* be discovered using the {@link FullTextIndex}. (A completion
* is any literal having one of the given literals as a prefix
* and includes an exact match on the litteral as a degenerate
* case.)
* @param _preds
* One or more term identifiers for predicates that link the
* subjects of interest to the completions of the given literals.
* Typically this array will include the term identifier for
* <code>rdf:label</code>.
* @param _cls
* All subjects visited by the iterator will be instances of the
* class assigned this term identifier.
*
* @return A generated program. When run as a query, the program will
* produce {@link ISolution}s corresponding to the head of the
* {@link MatchRule}.
*/
protected Program getMatchProgram(final Literal[] lits,
final IConstant<IV>[] _preds, final IV _cls) {
final Iterator<IV> ivIterator = getLexiconRelation().prefixScan(
lits);
// the term identifier for the completed literal.
final IVariable<IV> lit = Var.var("lit");
// instantiate the rule.
final Rule r = new MatchRule(getSPORelation().getNamespace(),
getVocabulary(), lit, _preds, new Constant<IV>(_cls));
// // bindings used to specialize the rule for each completed literal.
// final IBindingSet bindings = new ArrayBindingSet(r.getVariableCount());
final Program program = new Program("match", true/* parallel */);
// specialize and apply to each completed literal.
while (ivIterator.hasNext()) {
final IV iv = ivIterator.next();
final IBindingSet constants = new ListBindingSet();//new ArrayBindingSet(1);
constants.set(lit, new Constant<IV>(iv));
final IRule tmp = r.specialize(constants, null/* constraints */);
if (DEBUG)
log.debug(tmp.toString());
program.addStep(tmp);
}
return program;
}
}