// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.janusgraph.diskstorage.keycolumnvalue.scan;
import org.janusgraph.diskstorage.EntryList;
import org.janusgraph.diskstorage.StaticBuffer;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.keycolumnvalue.SliceQuery;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
/**
* A global computation over
*
* @author Matthias Broecheler (me@matthiasb.com)
*/
public interface ScanJob extends Cloneable {
/**
* Invoked before a block of computation (i.e. multiple process() calls) is handed to this particular ScanJob.
* Can be used to initialize the iteration. This method is called exactly once for each before a block of computation.
* This method is semantically aligned with {@link org.tinkerpop.gremlin.process.computer.VertexProgram#workerIterationStart()}
*
* This method may not be called if there is no data to be processed. Correspondingly, the end method won't be called either.
*
* No-op default implementation.
*
* @param jobConfiguration configuration for this particular job
* @param graphConfiguration configuration options for the entire graph against which this job is executed
* @param metrics {@link org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics} for this job
*/
public default void workerIterationStart(Configuration jobConfiguration,
Configuration graphConfiguration, ScanMetrics metrics) {}
/**
* Invoked after a block of computation (i.e. multiple process() calls) is handed to this particular ScanJob.
* Can be used to close any resources held by this job. This method is called exactly once for each after a block of computation.
* This method is semantically aligned with {@link org.tinkerpop.gremlin.process.computer.VertexProgram#workerIterationEnd()}
*
* This method may not be called if there is no data to be processed. Correspondingly, the start method won't be called either.
*
* No-op default implementation.
*
* @param metrics {@link org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics} for this job
*/
public default void workerIterationEnd(ScanMetrics metrics) {}
/**
* Run this {@code ScanJob}'s computation on the supplied row-key and entries.
* <p>
* This method will be called by a client of this interface if and only if both
* of the following criteria are satisfied:
* <ul>
* <li>
* The predicate returned by {@link #getKeyFilter()} must evaluate to true
* on the {@code key}.
* </li>
* <li>
* The {@code entries} parameter must contain an entry whose key is the first
* {@code SliceQuery} returned by {@link #getQueries()} and whose value
* is an {@code EntryList} with at least size one. In other words, the
* initial query in this job's query list must have matched at least once.
* </li>
* </ul>
* <p>
* Implementations may assume these two conditions are satisfied. Calling this
* method when either of these two conditions is false yields undefined behavior.
*
* <p>
* It is the caller's responsibility to construct an {@code entries} map that
* maps each {@code SliceQuery} to that query's matches.
* The caller is also responsible for truncating the {@code entries} values
* to honor {@link SliceQuery#getLimit()} when {@link SliceQuery#hasLimit()}
* is true. Passing in an {@code entries} value longer than the limit set in
* its respective key yields undefined behavior.
* <p>
* This method may be called by concurrent threads in a single process.
*
* @param key
* @param entries
* @param metrics
*/
public void process(StaticBuffer key, Map<SliceQuery,EntryList> entries, ScanMetrics metrics);
/**
* Returns one or more {@code SliceQuery} instances belonging to this {@code ScanJob}.
* <p>
* Before calling
* {@link #process(org.janusgraph.diskstorage.StaticBuffer, java.util.Map, ScanMetrics)},
* users of this interface must check that the key in question contains at least one
* entry matching the initial {@code SliceQuery} returned by this method. See the javadoc
* for the {@code process} method for more information.
* <p>
* If this method returns more than one query, then the initial query's lower bound must
* be all zero bits and the initial query's upper bound must be all one bits (per the
* preconditions in {@code StandardScannerExecutor}, the reference {@code ScanJob} executor).
*
* @return one or more queries
*/
public List<SliceQuery> getQueries();
/**
* A predicate that determines whether
* {@link #process(org.janusgraph.diskstorage.StaticBuffer, java.util.Map, ScanMetrics)}
* should be invoked for the given key. If the predicate returns true,
* then users of this interface should invoke {@code process} for the key and
* its associated entries. If the predicate returns false, then users of this
* interface need not invoke {@code process} for the key and its associated entries.
* <p>
* This is essentially an optimization that lets implementations of this interface
* signal to client code that a row can be safely skipped without affecting the
* execution of this {@code ScanJob}.
* <p>
* The returned predicate may be called by concurrent threads in a single process.
*
* @return a threadsafe predicate for edgestore keys
*/
public default Predicate<StaticBuffer> getKeyFilter() {
return b -> true; //No filter by default
}
/**
* Returns a clone of this ScanJob. The clone will not yet be initialized for computation but all of
* its internal state (if any) must match that of the original copy.
*
* @return A clone of this {@link org.janusgraph.diskstorage.keycolumnvalue.scan.ScanJob}
*/
public ScanJob clone();
}