package eu.fbk.knowledgestore.datastore;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.openrdf.model.URI;
import eu.fbk.knowledgestore.data.Record;
import eu.fbk.knowledgestore.data.Stream;
import eu.fbk.knowledgestore.data.XPath;
import eu.fbk.knowledgestore.runtime.DataCorruptedException;
import eu.fbk.knowledgestore.vocabulary.KS;
/**
* A {@code DataStore} transaction.
* <p>
* A {@code DataTransaction} is a unit of work over the contents of a {@link DataStore} that
* provides atomicity (i.e., changes are either completely stored or discarded), isolation (i.e.,
* other transaction do not see the modifications of this transaction) and durability (i.e.,
* changes are persisted across different program runs) guarantees.
* </p>
* <p>
* A <tt>DataTransaction</tt> supports the following features:
* </p>
* <ul>
* <li>Lookup of records by ID, via method {@link #lookup(URI, Set, Set)};</li>
* <li>Matching of records based on type and optional condition, consisting either in the
* retrieval of (selected properties of) matching records (method
* {@link #retrieve(URI, XPath, Set)}) or of their count (method {@link #count(URI, XPath)});</li>
* <li>Matching of record combinations (method {@link #match(Map, Map, Map)});</li>
* <li>Storing and deletion of single records (methods {@link #store(URI, Record)},
* {@link #delete(URI, URI)}).</li>
* </ul>
* <p>
* Note that the latter modification methods are not available for read-only transactions (an
* {@link IllegalStateException} is thrown in that case); moreover, they can return to the caller
* even if the operation has not yet completed (e.g., due to buffering), in which case it is
* however guaranteed that following read operation will be able to see newly written data. For
* all methods accepting a type URI parameter, that parameter can only be one of the supported
* record types listed in {@link DataStore#SUPPORTED_TYPES}.
* </p>
* <p>
* Transactions are terminated via {@link #end(boolean)}, whose parameter specifies whether
* changes should be committed or not (this doesn't matter for read-only transactions). Method
* {@code end()} has always the effect of terminating the transaction: if it throws an exception a
* rollback must be assumed, even if a commit was asked. In case the JVM is abruptly shutdown
* during a transaction, the effects of the transaction should be the same as if a rollback was
* performed. As a particular case of <tt>IOException</tt>, method {@code end()} may throw a
* {@link DataCorruptedException} in case neither a commit or rollback were possible and the
* {@code DataStore} is left in some unpredictable state with no possibility of automatic
* recovery.
* </p>
* <p>
* {@code DataTransaction} objects are not required to be thread safe. Access by at most one
* thread at a time is guaranteed externally. However, it must be allowed for operations to be
* issued while streams from previous operations are still open; if a stream is open and a write
* operations is performed that affects one of the objects still to be returned by the stream (or
* made an object returnable/not returnable by the stream), then it is allowed for the stream both
* to return the previous state of the object or to return the new state.
* </p>
*/
public interface DataTransaction {
/**
* Returns a stream of records having the type and IDs specified.
*
* @param type
* the URI of the type of records to return
* @param ids
* a set with the IDs of the records to return, not to be modified by the method
* @param properties
* a set with the properties to return for matching records, not modified by the
* method; if null, all the available properties must be returned
* @return a stream with the records matching the IDs and type specified, possibly empty and
* in no particular order
* @throws IOException
* in case some IO error occurs
* @throws IllegalArgumentException
* in case the type specified is not supported
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended
*/
Stream<Record> lookup(URI type, Set<? extends URI> ids, @Nullable Set<? extends URI> properties)
throws IOException, IllegalArgumentException, IllegalStateException;
/**
* Returns a stream of records having the type and matching the optional condition specified.
*
* @param type
* the URI of the type of records to return
* @param condition
* an optional condition to be satisfied by matching records; if null, no condition
* must be checked
* @param properties
* a set with the properties to return for matching records, not modified by the
* method; if null, all the available properties must be returned
* @return a stream over the records matching the condition and type specified, possibly empty
* and in no particular order
* @throws IOException
* in case some IO error occurs
* @throws IllegalArgumentException
* in case the type specified is not supported
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended
*/
Stream<Record> retrieve(URI type, @Nullable XPath condition,
@Nullable Set<? extends URI> properties) throws IOException, IllegalArgumentException,
IllegalStateException;
/**
* Counts the records having the type and matching the optional condition specified. This
* method performs similarly to {@link #retrieve(URI, XPath, Set)}, but returns only the
* number of matching instances instead of retrieving the corresponding {@code Record}
* objects.
*
* @param type
* the URI of the type of records to return
* @param condition
* an optional condition to be satisfied by matching records; if null, no condition
* must be checked
* @return the number of records matching the optional condition and type specified
* @throws IOException
* in case some IO error occurs
* @throws IllegalArgumentException
* in case the type specified is not supported
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended
*/
long count(URI type, @Nullable XPath condition) throws IOException, IllegalArgumentException,
IllegalStateException;
/**
* Evaluates a {@code match} request with the parameters supplied. The operation:
* <ol>
* <li>Considers all the combinations {@code <resource, mention, entity, axiom>} such that
* <ul>
* <li>{@code mention} {@link KS#MENTION_OF} {@code resource};</li>
* <li>{@code mention} {@link KS#REFERS_TO} {@code entity} (optional if no condition or
* projection on entities);</li>
* <li>{@code mention} {@link KS#EXPRESSES} {@code axiom} (optional if no condition or
* projection on axioms).</li>
* </ul>
* </li>
* <li>Filters the combinations so that optional {@code conditions} / {@code ids} selections
* on resource, mention, entity and axiom components are satisfied.</li>
* <li>Perform projection with duplicate removal of filtered combinations, keeping only the
* components occurring in {@code properties.keySet()}, returning for each component the
* subset of properties of {@code properties.get(component_type_URI)}.</li>
* </ol>
* In the maps supplied as parameters, components are identified by their type URI, that is
* {@link KS#RESOURCE}, {@link KS#MENTION}, {@link KS#ENTITY} and {@link KS#AXIOM}.
*
* @param conditions
* a non-null map with optional component conditions, indexed by the component type
* URI; note the map may be possibly empty or contain conditions only for a subset
* of components
* @param ids
* a non-null map with optional ID selections for different components, indexed by
* the component type URI; note the map may be possibly empty or contain selections
* only for a subset of components
* @param properties
* a non-null, non-empty map with the properties to return for different
* components, indexed by the component type URI; if the set of property URIs
* mapped to a component is null or empty, then all the properties of the component
* should be returned; if a component is not referenced in the map, then it must
* not be returned
* @return a {@code Stream} of combination records
* @throws IOException
* in case some IO error occurs
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended
*/
Stream<Record> match(Map<URI, XPath> conditions, final Map<URI, Set<URI>> ids,
final Map<URI, Set<URI>> properties) throws IOException, IllegalStateException;
/**
* Stores a record in the {@code DataStore}. A record may or may not exist for the same ID; in
* case it exists, it is replaced by the newly specified record. In case the method call
* returns successfully, there is no guarantee that the write operation completed (e.g.,
* because of internal buffering); however, it is guaranteed (e.g., via internal flushing)
* that read operations called subsequently will see the result of the modification. In case
* the method call fails with an {@code IOException}, there is no guarantee that the
* {@code DataStore} is left in the same state it was at the time of calling.
*
* @param type
* the URI of the type of record to store, not null
* @param record
* the record to store, not null
* @throws IOException
* in case the operation failed, with no guarantee that the {@code DataStore} is
* left in the same state if was when the method was called; note that this
* exception may trigger a rollback on the caller side
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended, or if it is read-only
*/
void store(URI type, Record record) throws IOException, IllegalStateException;
/**
* Deletes the record stored in the {@code DataStore} with the ID specified.A record may or
* may not be stored for the specified ID; in case it exists, it is deleted by the operation.
* In case the method call returns successfully, there is no guarantee that the write
* operation completed (e.g., because of internal buffering); however, it is guaranteed (e.g.,
* via internal flushing) that read operations called subsequently will see the result of the
* modification. In case the method call fails with an {@code IOException}, there is no
* guarantee that the {@code DataStore} is left in the same state it was at the time of
* calling.
*
* @param type
* the URI of the type of record to store, not null
* @param id
* the ID of the record to delete, not null
* @throws IOException
* in case the operation failed, with no guarantee that the {@code DataStore} is
* left in the same state if was when the method was called; note that this
* exception may trigger a rollback on the caller side
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended, or if it is read-only
*/
void delete(URI type, URI id) throws IOException, IllegalStateException;
/**
* Ends the transaction, either committing or rolling back its changes (if any). This method
* always tries to terminate the transaction: if commit is requested but fails, a rollback is
* forced by the method and an {@code IOException} is thrown. If it is not possible either to
* commit or rollback, then the {@code DataStore} is possibly left in an unknown state and a
* {@code DataCorruptedException} is thrown to signal a data corruption situation that cannot
* be automatically recovered.
*
* @param commit
* <tt>true</tt> in case changes made by the transaction should be committed
* @throws IOException
* in case some IO error occurs or the commit request cannot be satisfied for any
* reason; it is however guaranteed that a forced rollback has been performed
* @throws DataCorruptedException
* in case it was not possible either to commit or rollback, which implies the
* state of the {@code DataStore} is unknown and automatic recovery is not
* possible (hence, data is corrupted)
* @throws IllegalStateException
* if the {@code DataTransaction} has been already ended
*/
void end(boolean commit) throws DataCorruptedException, IOException, IllegalStateException;
}
// DESIGN NOTES
//
// XXX 'union' merge criteria has a natural mapping in a HBase layout where the value is
// incorporated in the column name; when writing an attribute, this layout avoid the need to
// retrieve the previous values of an attribute in order to do the merge and compute the new
// values, which is more efficient in case a large number of values can be associated to the
// attribute; however, we do not expect this to be the case (apart from the 'isReferredBy'
// attribute, that is not stored however) -> ignoring this consideration, it seems OK to move all
// the logic related to merge criteria in the frontend
//
// XXX Coprocessors could be used in order to implement the merge and update primitives (the
// latter via a custom CoprocessorProtocol); still, they would need to implement: merge criteria,
// validation, update of related object (e.g., to manipulate bidirectional relations). If we avoid
// coprocessors, then the KS server (= HBase client) would need to fetch the previous values for
// the object being modified and handle merge criteria, validation and enforcing of
// bidirectionality locally. This would require an additional communication between the KS server
// and the affected region server(s), whose cost depend on round-trip latency and bandwidth. We
// may ignore bandwidth (100+MBits/sec in a LAN) and use batching techniques (HBase batch calls)
// to distribute latency (~1ms) over multiple calls, so to make it almost irrelevant. By adopting
// this approach, the benefits of using coprocessors seems greatly overcome by their far greater
// implementation costs, hence they are not adopted
//
// XXX AggregateClient can be used to implement count (in future, we may extract more elaborated
// statistics introducing some kind of 'stats' primitive and a corresponding coprocessor)
//
// XXX an alternative way to delete records would be something like delete(condition), which would
// allow deleting a bunch of objects satisfying a condition without first retrieving them; still,
// it is unlikely the frontend may delete objects without looking at their data and fixing related
// objects, so a retrieval would still be needed in most cases; given also that delete
// performances are not so important as the performances of other operations, the decisions is to
// stick with delete(object) which seems simpler to implement