/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.execution.steps;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.diqube.data.column.ColumnPage;
import org.diqube.data.column.ColumnShard;
import org.diqube.data.column.StandardColumnShard;
import org.diqube.data.dictionary.Dictionary;
import org.diqube.execution.consumers.AbstractThreadedColumnBuiltConsumer;
import org.diqube.execution.consumers.AbstractThreadedColumnVersionBuiltConsumer;
import org.diqube.execution.consumers.AbstractThreadedRowIdConsumer;
import org.diqube.execution.consumers.ColumnBuiltConsumer;
import org.diqube.execution.consumers.ColumnVersionBuiltConsumer;
import org.diqube.execution.consumers.DoneConsumer;
import org.diqube.execution.consumers.GenericConsumer;
import org.diqube.execution.consumers.OverwritingRowIdConsumer;
import org.diqube.execution.consumers.RowIdConsumer;
import org.diqube.execution.exception.ExecutablePlanBuildException;
import org.diqube.execution.exception.ExecutablePlanExecutionException;
import org.diqube.executionenv.ExecutionEnvironment;
import org.diqube.executionenv.VersionedExecutionEnvironment;
import org.diqube.executionenv.querystats.QueryableColumnShard;
import org.diqube.queries.QueryRegistry;
import org.diqube.queries.QueryUuid;
import org.diqube.queries.QueryUuid.QueryUuidThreadState;
import org.diqube.util.HashingBatchCollector;
import org.diqube.util.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Loads the inputRowIds of the rows that have a specific inequality relation to a specific value in a specific column.
*
* <p>
* This includes the inequality operators >, >=, <, <=.
*
* <p>
* This step can optionally be executed on a column that still needs to be constructed. In that case, a
* {@link ColumnBuiltConsumer} input needs to be specified which keeps this step up to date with the construction of
* that column. If no {@link ColumnBuiltConsumer} is specified, then simply the full column is searched in defaulEnv.
*
* <p>
* Additionally, this step can be wired to the output of another {@link RowIdConsumer} which will force this instance to
* only take those RowIds into account that are provided by the input {@link RowIdConsumer} - effectively building a AND
* concatenation. In contrast to a {@link RowIdAndStep} though, the two {@link RowIdInequalStep}s that are connected
* that way would be executed after each other, not parallel to each other. Therefore, usually a {@link RowIdAndStep} is
* used.
*
* <p>
* This step can be used in the non-default execution by wiring an input {@link ColumnVersionBuiltConsumer}. It will
* then not run once-off, but continuously will run completely again based on a new
* {@link VersionedExecutionEnvironment}. The output in that case will not be the default {@link RowIdConsumer}, but an
* {@link OverwritingRowIdConsumer}. If no {@link ColumnVersionBuiltConsumer} is wired as input, the result will be a
* {@link RowIdConsumer}.
*
* <p>
* Only {@link StandardColumnShard}s supported.
*
* <p>
* Input: 1 optional {@link ColumnBuiltConsumer}, 1 optional {@link ColumnVersionBuiltConsumer}, 1 optional
* {@link RowIdConsumer} <br>
* Output: {@link RowIdConsumer}s or {@link OverwritingRowIdConsumer}s (see above).
*
* @author Bastian Gloeckle
*/
public class RowIdInequalStep extends AbstractThreadedExecutablePlanStep {
private static final Logger logger = LoggerFactory.getLogger(RowIdInequalStep.class);
private AtomicInteger columnsBuilt = new AtomicInteger(0);
private AbstractThreadedColumnBuiltConsumer colBuiltConsumer = new AbstractThreadedColumnBuiltConsumer(this) {
@Override
protected void doColumnBuilt(String colName) {
if (RowIdInequalStep.this.colName.equals(colName)
|| (RowIdInequalStep.this.otherColName != null && RowIdInequalStep.this.otherColName.equals(colName)))
columnsBuilt.incrementAndGet();
}
@Override
protected void allSourcesAreDone() {
}
};
private VersionedExecutionEnvironment newestVersionedEnvironment = null;
private Object newestVersionedEnvironmentSync = new Object();
private AbstractThreadedColumnVersionBuiltConsumer columnVersionBuiltConsumer =
new AbstractThreadedColumnVersionBuiltConsumer(this) {
@Override
protected void allSourcesAreDone() {
}
@Override
protected void doColumnBuilt(VersionedExecutionEnvironment env, String colName, Set<Long> adjustedRowIds) {
if (RowIdInequalStep.this.colName.equals(colName)
|| (RowIdInequalStep.this.otherColName != null && RowIdInequalStep.this.otherColName.equals(colName))) {
synchronized (newestVersionedEnvironmentSync) {
if (newestVersionedEnvironment == null || env.getVersion() > newestVersionedEnvironment.getVersion())
newestVersionedEnvironment = env;
}
}
}
};
private AtomicBoolean rowIdSourceIsDone = new AtomicBoolean(false);
private ConcurrentLinkedDeque<Long> inputRowIds = new ConcurrentLinkedDeque<>();
private AbstractThreadedRowIdConsumer rowIdConsumer = new AbstractThreadedRowIdConsumer(this) {
@Override
public void allSourcesAreDone() {
RowIdInequalStep.this.rowIdSourceIsDone.set(true);
}
@Override
protected void doConsume(Long[] rowIds) {
for (long rowId : rowIds)
RowIdInequalStep.this.inputRowIds.add(rowId);
}
};
private ExecutionEnvironment defaultEnv;
/** name of the column to search the values in */
private String colName;
/**
* Only set if we're not supposed to compare the value of one col to a constant, but of one col to another col.
* <code>null</code> if {@link #value} is set.
*/
private String otherColName;
/**
* Only set if we should compare the values of one column to a constant value. <code>null</code> if
* {@link #otherColName} is set.
*/
private Object value;
/**
* The comparator of the requested inequality operation.
*/
private RowIdComparator comparator;
/**
* rowIds that have been reported to the {@link #rowIdConsumer} as input before. This is only maintained if
* {@link #columnVersionBuiltConsumer} is wired (and we therefore provide {@link OverwritingRowIdConsumer} output).
*/
private NavigableSet<Long> cachedActiveRowIds = new TreeSet<>();
/**
* The left operand to the comparison will always be the column, the right operand the constant.
*
* @param value
* The constant to compare to.
* @param comparator
* Freshly created instance of an implementation of {@link RowIdComparator}. If this step should compare with
* > {@link GtRowIdComparator}, if >= then {@link GtEqRowIdComparator}, if < {@link LtRowIdComparator}, if <=
* {@link LtEqRowIdComparator}.
*/
public RowIdInequalStep(int stepId, QueryRegistry queryRegistry, ExecutionEnvironment defaultEnv, String colName,
Object value, RowIdComparator comparator) {
super(stepId, queryRegistry);
this.defaultEnv = defaultEnv;
this.colName = colName;
this.value = value;
this.comparator = comparator;
this.otherColName = null;
}
/**
*
* @param comparator
* Freshly created instance of an implementation of {@link RowIdComparator}. If this step should compare with
* > {@link GtRowIdComparator}, if >= then {@link GtEqRowIdComparator}, if < {@link LtRowIdComparator}, if <=
* {@link LtEqRowIdComparator}.
* @param otherColNameUsed
* provide true always. Needed because constructor is overloaded.
*/
public RowIdInequalStep(int stepId, QueryRegistry queryRegistry, ExecutionEnvironment env, String colName,
String otherColName, RowIdComparator comparator, boolean otherColNameUsed) {
super(stepId, queryRegistry);
this.defaultEnv = env;
this.colName = colName;
this.otherColName = otherColName;
this.comparator = comparator;
this.value = null;
}
@Override
public void execute() {
ExecutionEnvironment curEnv;
synchronized (newestVersionedEnvironmentSync) {
curEnv = newestVersionedEnvironment;
}
boolean allInputColumnsFullyBuilt = (otherColName == null) ? columnsBuilt.get() == 1 : columnsBuilt.get() == 2;
if ((colBuiltConsumer.getNumberOfTimesWired() > 0 && columnVersionBuiltConsumer.getNumberOfTimesWired() > 0
&& !allInputColumnsFullyBuilt && curEnv == null) || // both column consumer are wired, none has updates
(colBuiltConsumer.getNumberOfTimesWired() > 0 && columnVersionBuiltConsumer.getNumberOfTimesWired() == 0
&& !allInputColumnsFullyBuilt)) // only the ColumnVersionBuilt is wired and has no updates
// we need to wait for a column to be built but it is not yet built.
return;
if (curEnv == null || allInputColumnsFullyBuilt)
curEnv = defaultEnv;
else {
// using a VersionedExecutionEnvironment. Check if all needed cols are available already.
if (otherColName == null && curEnv.getColumnShard(colName) == null || //
(otherColName != null
&& (curEnv.getColumnShard(colName) == null || curEnv.getColumnShard(otherColName) == null)))
// at least one of the required columns is not yet available in curEnv
return;
}
NavigableSet<Long> activeRowIds = null;
if (rowIdConsumer.getNumberOfTimesWired() > 0) {
activeRowIds = new TreeSet<>(cachedActiveRowIds);
Long rowId;
while ((rowId = inputRowIds.poll()) != null)
activeRowIds.add(rowId);
if (activeRowIds.isEmpty()) {
if (rowIdSourceIsDone.get() && inputRowIds.isEmpty()) {
forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone());
doneProcessing();
return;
}
}
if (columnVersionBuiltConsumer.getNumberOfTimesWired() > 0)
cachedActiveRowIds = activeRowIds;
}
if (curEnv.getColumnShard(colName) == null)
throw new ExecutablePlanExecutionException("Could not find column " + colName);
StandardColumnShard columnShard = curEnv.getPureStandardColumnShard(colName);
NavigableMap<Long, ColumnPage> pages = columnShard.getPages();
if (pages.size() > 0) {
if (value != null) {
// we're supposed to compare one column to constant values.
compareToConstant(curEnv, value, columnShard, activeRowIds, comparator);
} else {
// we're supposed to compare to cols to each other.
if (curEnv.getColumnShard(otherColName) == null)
throw new ExecutablePlanExecutionException("Could not find column " + otherColName);
if (!curEnv.getColumnType(colName).equals(curEnv.getColumnType(otherColName)))
throw new ExecutablePlanExecutionException("Cannot compare column " + colName + " to column " + otherColName
+ " as they have different data types.");
QueryableColumnShard otherColumnShard = curEnv.getColumnShard(otherColName);
if (((StandardColumnShard) otherColumnShard.getDelegate()).getPages().size() > 0)
executeOnOtherCol(curEnv, columnShard, otherColumnShard, activeRowIds, comparator);
}
}
if (columnVersionBuiltConsumer.getNumberOfTimesWired() == 0 || allInputColumnsFullyBuilt) {
forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone());
doneProcessing();
}
}
/**
* Executes the comparison of the given column to a constant value using the given comparator. The left operand to the
* comparison will always be the column, the right operand the constant.
*
* @param constantValue
* The value to compare to.
* @param column
* The column whose values should be compared to the constant value.
* @param activeRowIds
* Those rowIds that we should take into account for searching. May be <code>null</code> in which case we'll
* search all rows.
* @param comparator
* The comparator implementing >, >=, < and <=.
*/
private void compareToConstant(ExecutionEnvironment curEnv, Object constantValue, StandardColumnShard column,
NavigableSet<Long> activeRowIds, RowIdComparator comparator) {
sendRowIds(curEnv, rowIdStreamOfConstant(curEnv, column, constantValue, activeRowIds, comparator));
}
/**
* Executes a terminal operation on the given stream that will send the row IDs to all output {@link RowIdConsumer}s.
*
* @param rowIdStreamPair
* Pair of stream producing rowIds, and the {@link QueryUuidThreadState} that should be re-constructed as
* soon as the terminal operation on the stream was executed.
*/
private void sendRowIds(ExecutionEnvironment curEnv, Pair<Stream<Long>, QueryUuidThreadState> rowIdStreamPair) {
Stream<Long> rowIdStream = rowIdStreamPair.getLeft();
QueryUuidThreadState uuidState = rowIdStreamPair.getRight();
AtomicLong numberOfRows = new AtomicLong(0);
if (columnVersionBuiltConsumer.getNumberOfTimesWired() == 0) {
// RowIdConsumer is wired, we therefore split the resulting IDs in nice 100-piece packets.
rowIdStream. //
collect(new HashingBatchCollector<Long>( // RowIds are unique, so using BatchCollector is ok.
100, // Batch size
len -> new Long[len], // new result array
new Consumer<Long[]>() { // Batch-collect the row IDs
@Override
public void accept(Long[] t) {
numberOfRows.addAndGet(t.length);
QueryUuid.setCurrentThreadState(uuidState);
try {
forEachOutputConsumerOfType(RowIdConsumer.class, c -> c.consume(t));
} finally {
QueryUuid.clearCurrent();
}
}
}));
QueryUuid.setCurrentThreadState(uuidState);
} else {
// OverwritingRowIdConsumer is wired - we cannot split the result, otherwise we "overwrite" our own ones!
Long[] resultRowIds = rowIdStream.toArray(l -> new Long[l]);
QueryUuid.setCurrentThreadState(uuidState);
numberOfRows.set(resultRowIds.length);
forEachOutputConsumerOfType(OverwritingRowIdConsumer.class, c -> c.consume(curEnv, resultRowIds));
}
logger.trace("Reported {} matching rows on {}.", numberOfRows.get(), curEnv);
}
/**
* Executes a comparison of the given column to the given constant value.
*
* <p>
* The left operand to the comparison is the column, the right one is the constant:
*
* Example: COL >= constant
*
* @return A Stream containing the RowIds that matched and the {@link QueryUuidThreadState} that should be
* re-constructed as soon as the terminal operation on the stream was executed.
*/
private Pair<Stream<Long>, QueryUuidThreadState> rowIdStreamOfConstant(ExecutionEnvironment env,
StandardColumnShard column, Object constantValue, NavigableSet<Long> activeRowIds, RowIdComparator comparator) {
Long referenceColumnValueId = comparator.findReferenceColumnValueId(column, constantValue);
QueryUuidThreadState uuidState = QueryUuid.getCurrentThreadState();
if (referenceColumnValueId == null)
// no entry matches, return empty stream.
return new Pair<>(new ArrayList<Long>().stream(), uuidState);
return new Pair<>(column.getPages().values().stream().parallel(). // stream all Pages in parallel
filter(new Predicate<ColumnPage>() { // filter out inactive Pages
@Override
public boolean test(ColumnPage page) {
QueryUuid.setCurrentThreadState(uuidState);
try {
if (activeRowIds != null) {
// If we're restricting the row IDs, we check if the page contains any row that we are interested in.
Long interestedRowId = activeRowIds.ceiling(page.getFirstRowId());
if (interestedRowId == null || interestedRowId > page.getFirstRowId() + page.size())
return false;
}
return comparator.pageContainsAnyRelevantValue(page, referenceColumnValueId);
} finally {
QueryUuid.clearCurrent();
}
}
}).map(new Function<ColumnPage, Pair<ColumnPage, Set<Long>>>() { // find ColumnPageValue IDs that match the
// comparison
@Override
public Pair<ColumnPage, Set<Long>> apply(ColumnPage page) {
QueryUuid.setCurrentThreadState(uuidState);
try {
queryRegistry.getOrCreateCurrentStatsManager().registerPageAccess(page,
env.isTemporaryColumn(column.getName()));
Set<Long> pageValueIds = comparator.findActivePageValueIds(page, referenceColumnValueId);
return new Pair<>(page, pageValueIds);
} finally {
QueryUuid.clearCurrent();
}
}
}).flatMap(new Function<Pair<ColumnPage, Set<Long>>, Stream<Long>>() { // resolve RowIDs and map them flat
// into a single stream
@Override
public Stream<Long> apply(Pair<ColumnPage, Set<Long>> pagePair) {
QueryUuid.setCurrentThreadState(uuidState);
try {
ColumnPage page = pagePair.getLeft();
Set<Long> searchedPageValueIds = pagePair.getRight();
List<Long> res = new LinkedList<>();
if (activeRowIds != null) {
// If we're restricted to a specific set of row IDs, we decompress only the corresponding values and
// check those.
SortedSet<Long> activeRowIdsInThisPage = activeRowIds.subSet( //
page.getFirstRowId(), page.getFirstRowId() + page.size());
List<Integer> valueIndices = activeRowIdsInThisPage.stream()
.map(rowId -> (int) (rowId - page.getFirstRowId())).collect(Collectors.toList());
List<Long> decompressedColumnPageIds = page.getValues().getMultiple(valueIndices);
for (int i = 0; i < decompressedColumnPageIds.size(); i++) {
Long decompressedColumnPageId = decompressedColumnPageIds.get(i);
if (searchedPageValueIds.contains(decompressedColumnPageId))
res.add(valueIndices.get(i) + page.getFirstRowId());
}
} else {
// TODO #2 STAT use statistics to decide if we should decompress the whole array here.
long[] decompressedValues = page.getValues().decompressedArray();
for (int i = 0; i < decompressedValues.length; i++) {
if (searchedPageValueIds.contains(decompressedValues[i]))
res.add(i + page.getFirstRowId());
}
}
return res.stream();
} finally {
QueryUuid.clearCurrent();
}
}
}), uuidState);
}
/**
* Traverses the pages of two columns and finds rowIDs where the values of the two columns match the comparsion.
*
* @param leftColumn
* The left column of the equation.
* @param rightColumn
* The right column of the equation, the {@link QueryableColumnShard#getDelegate()} must return a
* {@link StandardColumnShard}.
* @param activeRowIds
* Set of active row IDs, used to filter the column pages. Can be <code>null</code>.
* @param comparator
* The comparator implementing >, >=, < or <=.
*/
private void executeOnOtherCol(ExecutionEnvironment curEnv, StandardColumnShard leftColumn,
QueryableColumnShard rightColumn, NavigableSet<Long> activeRowIds, RowIdComparator comparator) {
NavigableMap<Long, Long> comparisonMap = comparator.calculateComparisonMap(leftColumn, rightColumn);
Long[] colValueIds1 = comparisonMap.keySet().stream().sorted().toArray(l -> new Long[l]);
QueryUuidThreadState uuidState = QueryUuid.getCurrentThreadState();
Stream<Long> resultRowIdStream;
resultRowIdStream = leftColumn.getPages().values().stream().parallel().
// filter out pairs that either do not match the rowID range or where the left page does not contain any
// interesting value
filter(new Predicate<ColumnPage>() {
@Override
public boolean test(ColumnPage leftColPage) {
QueryUuid.setCurrentThreadState(uuidState);
try {
if (activeRowIds != null) {
// If we're restricting the row IDs, we check if the page contains any row that we are interested in.
Long interestedRowId = activeRowIds.ceiling(leftColPage.getFirstRowId());
if (interestedRowId == null || interestedRowId > leftColPage.getFirstRowId() + leftColPage.size())
return false;
}
if (!leftColPage.getColumnPageDict().containsAnyValue(colValueIds1))
return false;
return true;
} finally {
QueryUuid.clearCurrent();
}
}
}).flatMap(new Function<ColumnPage, Stream<Long>>() {
@Override
public Stream<Long> apply(ColumnPage leftColPage) {
QueryUuid.setCurrentThreadState(uuidState);
try {
// resolve ColumnPage value IDs from column value IDs for left page for all column value IDs we're
// interested in.
queryRegistry.getOrCreateCurrentStatsManager().registerPageAccess(leftColPage,
curEnv.isTemporaryColumn(leftColumn.getName()));
NavigableMap<Long, Long> leftPageIdsToColumnIds = new TreeMap<>();
Long[] leftPageValueIds = leftColPage.getColumnPageDict().findIdsOfValues(colValueIds1);
for (int i = 0; i < leftPageValueIds.length; i++)
leftPageIdsToColumnIds.put(leftPageValueIds[i], colValueIds1[i]);
List<Long> res = new ArrayList<>();
// decompress value arrays and traverse them
// TODO #2 STAT decide if full value array should be decompressed when there are activeRowIds.
long[] leftValues = leftColPage.getValues().decompressedArray();
for (int i = 0; i < leftValues.length; i++) {
long rowId = leftColPage.getFirstRowId() + i;
if (activeRowIds == null || activeRowIds.contains(rowId)) {
long leftPageValueId = leftValues[i];
Long leftColumnValueId = leftPageIdsToColumnIds.get(leftPageValueId);
// check if we're interested in that column value ID.
if (leftColumnValueId != null) {
// TODO #2 STAT decide if we should decompress the whole array for the right side, too.
if (comparator.rowMatches(leftColumnValueId, rowId, rightColumn, comparisonMap))
res.add(rowId);
}
}
}
return res.stream();
} finally {
QueryUuid.clearCurrent();
}
}
});
sendRowIds(curEnv, new Pair<>(resultRowIdStream, uuidState));
}
@Override
public List<GenericConsumer> inputConsumers() {
return new ArrayList<>(
Arrays.asList(new GenericConsumer[] { colBuiltConsumer, rowIdConsumer, columnVersionBuiltConsumer }));
}
@Override
protected void validateOutputConsumer(GenericConsumer consumer) throws IllegalArgumentException {
if (!(consumer instanceof DoneConsumer) && !(consumer instanceof RowIdConsumer)
&& !(consumer instanceof OverwritingRowIdConsumer))
throw new IllegalArgumentException("Only RowIdConsumers and OverwritingRowIdConsumer accepted!");
}
@Override
protected void validateWiredStatus() throws ExecutablePlanBuildException {
boolean outputContainsDefault = outputConsumers.stream().anyMatch(c -> c instanceof RowIdConsumer);
boolean outputContainsOverwriting = outputConsumers.stream().anyMatch(c -> c instanceof OverwritingRowIdConsumer);
if (outputContainsDefault && outputContainsOverwriting)
throw new ExecutablePlanBuildException(
"Only either a RowIdConsumer or a OverwritingRowIdConsumer can be wired " + "as output!");
if (columnVersionBuiltConsumer.getNumberOfTimesWired() > 0 && !outputContainsOverwriting
|| columnVersionBuiltConsumer.getNumberOfTimesWired() == 0 && !outputContainsDefault)
// TODO #112
throw new ExecutablePlanBuildException("If ColumnVersionBuiltConsumer is wired, the overwriting output "
+ "consumer needs to be wired, if no ColumnVersionBuiltConsumer is wired then the RowIdConsumer output "
+ "needs to be wired.");
}
@Override
protected String getAdditionalToStringDetails() {
if (value != null)
return "colName=" + colName + ",value=" + value;
return "colName=" + colName + ",otherColName=" + otherColName;
}
/**
* Implements one of the inequality comparisons supported by {@link RowIdInequalStep}.
*/
public static interface RowIdComparator {
/**
* Finds a reference column value ID for a specific constant value.
*
* <p>
* The returned reference column value will later be used to call
* {@link #pageContainsAnyRelevantValue(ColumnPage, Long)} and {@link #findActivePageValueIds(ColumnPage, Long)}.
*
* <p>
* This method is called if a column is compared to a constant value.
*
* @return <code>null</code> in case the column does not contain /any/ element that matches the comparator.
*/
public <T> Long findReferenceColumnValueId(ColumnShard column, Object value);
/**
* Quickly validates if a page contains any interesting rows when comparing to the given reference column value ID.
* The latter was resolved before using {@link #findReferenceColumnValueId(ColumnShard, Object)}.
*
* <p>
* This method is called if a column is compared to a constant value.
*/
public boolean pageContainsAnyRelevantValue(ColumnPage page, Long referenceValueColumnValueId);
/**
* Finds all rowIds that match the comparison of a column to a constant value, the latter being identified by its
* column value ID which has been returned by a call to {@link #findReferenceColumnValueId(ColumnShard, Object)}
* before.
*
* <p>
* This method is called if a column is compared to a constant value.
*/
public Set<Long> findActivePageValueIds(ColumnPage page, Long referenceValueColumnValueId);
/**
* Calculates a comparison map used for a comparison between two columns.
*
* <p>
* The returned map contains all interesting column value IDs of the leftCol as keys (= those column value IDs where
* there are matching column value IDs of the rightCol). The value is typically a column value ID of the right col,
* which though will be interpreted by {@link #rowMatches(long, long, ColumnShard, Map)} differently based on the
* class implementing this interface.
*
* <p>
* This method is called if a column is compared to another column.
*
* <p>
* The two columns are expected to have the same column type.
*
* @param rightCol
* although a {@link QueryableColumnShard}, this needs to be a {@link StandardColumnShard} (=
* {@link QueryableColumnShard#getDelegate()} needs to return a {@link StandardColumnShard}!)
*/
public <T> NavigableMap<Long, Long> calculateComparisonMap(StandardColumnShard leftCol,
QueryableColumnShard rightCol);
/**
* Evaluates if a specific row where the leftCol matched a key the comparison map actually is a row that matches the
* comparison and should therefore be returned by this step.
*
* <p>
* The comparison map used was created before using
* {@link #calculateComparisonMap(StandardColumnShard, StandardColumnShard)}.
*
* <p>
* This method is called if a column is compared to another column.
*/
public boolean rowMatches(long leftColumnValueId, long rowId, QueryableColumnShard rightCol,
Map<Long, Long> comparisonMap);
}
/**
* Implemented a 'greater or equal' comparison.
*/
public static class GtEqRowIdComparator implements RowIdComparator {
@Override
public boolean pageContainsAnyRelevantValue(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().containsAnyValueGtEq(referenceValueColumnValueId);
}
@Override
public Set<Long> findActivePageValueIds(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().findIdsOfValuesGtEq(referenceValueColumnValueId);
}
@Override
@SuppressWarnings("unchecked")
public <T> Long findReferenceColumnValueId(ColumnShard column, Object value) {
Long grEqId;
try {
grEqId = ((Dictionary<T>) column.getColumnShardDictionary()).findGtEqIdOfValue((T) value);
} catch (ClassCastException e) {
throw new ExecutablePlanExecutionException(
"Cannot compare column " + column.getName() + " with value of type " + value.getClass().getSimpleName());
}
if (grEqId == null)
return null;
// ignore positive/negative encoding
if (grEqId < 0)
grEqId = -(grEqId + 1);
return grEqId;
}
@Override
@SuppressWarnings("unchecked")
public <T> NavigableMap<Long, Long> calculateComparisonMap(StandardColumnShard leftCol,
QueryableColumnShard rightCol) {
return ((Dictionary<T>) leftCol.getColumnShardDictionary())
.findGtEqIds((Dictionary<T>) rightCol.getColumnShardDictionary());
}
@Override
public boolean rowMatches(long leftColumnValueId, long rowId, QueryableColumnShard rightCol,
Map<Long, Long> comparisonMap) {
long rightColumnValueId = rightCol.resolveColumnValueIdForRow(rowId);
long comparisonOtherId = comparisonMap.get(leftColumnValueId);
// ignore positive/negative encoding of findGrEqIds
if (comparisonOtherId < 0)
comparisonOtherId = -(comparisonOtherId + 1);
return rightColumnValueId != -1 && rightColumnValueId <= comparisonOtherId;
}
}
/**
* Implements a string 'greater' comparison.
*/
public static class GtRowIdComparator implements RowIdComparator {
@Override
public boolean pageContainsAnyRelevantValue(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().containsAnyValueGt(referenceValueColumnValueId);
}
@Override
public Set<Long> findActivePageValueIds(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().findIdsOfValuesGt(referenceValueColumnValueId);
}
@SuppressWarnings("unchecked")
@Override
public <T> Long findReferenceColumnValueId(ColumnShard column, Object value) {
// search less than or equal ID, when doing a > search later, this will find us the right results.
Long ltEqId;
try {
ltEqId = ((Dictionary<T>) column.getColumnShardDictionary()).findLtEqIdOfValue((T) value);
} catch (ClassCastException e) {
throw new ExecutablePlanExecutionException(
"Cannot compare column " + column.getName() + " with value of type " + value.getClass().getSimpleName());
}
if (ltEqId == null)
// no value <= our searched value, therefore /all/ values are valid. As we compare with "gt" later, lets use -1
// here.
return -1L;
// ignore positive/negative encoding
if (ltEqId < 0)
ltEqId = -(ltEqId + 1);
return ltEqId;
}
@Override
@SuppressWarnings("unchecked")
public <T> NavigableMap<Long, Long> calculateComparisonMap(StandardColumnShard leftCol,
QueryableColumnShard rightCol) {
// start off with a 'greater or equal' map.
NavigableMap<Long, Long> res = ((Dictionary<T>) leftCol.getColumnShardDictionary())
.findGtEqIds((Dictionary<T>) rightCol.getColumnShardDictionary());
for (Iterator<Entry<Long, Long>> it = res.entrySet().iterator(); it.hasNext();) {
Entry<Long, Long> e = it.next();
if (e.getValue() == 0) // leftCol == everything colB[i] for i <= 0 -> we're not interested in ==, therefore
// remove entry.
it.remove();
else if (e.getValue() > 0) // leftCol == colB[value]. As we want > relation, leftCol is > colB[value -1].
e.setValue(e.getValue() - 1);
else if (e.getValue() < 0) // leftCol is > colB[i] for i <= -(value + 1). See JavaDoc findGrEqIds.
e.setValue(-(e.getValue() + 1));
}
return res;
}
@Override
public boolean rowMatches(long leftColumnValueId, long rowId, QueryableColumnShard rightCol,
Map<Long, Long> comparisonMap) {
long rightColumnValueId = rightCol.resolveColumnValueIdForRow(rowId);
return (rightColumnValueId != -1 && rightColumnValueId <= comparisonMap.get(leftColumnValueId));
}
}
/**
* Implements a 'less than or equal' comparison.
*/
public static class LtEqRowIdComparator implements RowIdComparator {
@Override
public boolean pageContainsAnyRelevantValue(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().containsAnyValueLtEq(referenceValueColumnValueId);
}
@Override
public Set<Long> findActivePageValueIds(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().findIdsOfValuesLtEq(referenceValueColumnValueId);
}
@Override
@SuppressWarnings("unchecked")
public <T> Long findReferenceColumnValueId(ColumnShard column, Object value) {
Long ltEq;
try {
ltEq = ((Dictionary<T>) column.getColumnShardDictionary()).findLtEqIdOfValue((T) value);
} catch (ClassCastException e) {
throw new ExecutablePlanExecutionException(
"Cannot compare column " + column.getName() + " with value of type " + value.getClass().getSimpleName());
}
if (ltEq == null)
return null;
// ignore positive/negative encoding.
if (ltEq < 0)
ltEq = -(ltEq + 1);
return ltEq;
}
@Override
@SuppressWarnings("unchecked")
public <T> NavigableMap<Long, Long> calculateComparisonMap(StandardColumnShard leftCol,
QueryableColumnShard rightCol) {
return ((Dictionary<T>) leftCol.getColumnShardDictionary())
.findLtEqIds((Dictionary<T>) rightCol.getColumnShardDictionary());
}
@Override
public boolean rowMatches(long leftColumnValueId, long rowId, QueryableColumnShard rightCol,
Map<Long, Long> comparisonMap) {
long rightColumnValueId = rightCol.resolveColumnValueIdForRow(rowId);
long comparisonOtherID = comparisonMap.get(leftColumnValueId);
// ignore positive/negative encoding of findLtEqIds
if (comparisonOtherID < 0)
comparisonOtherID = -(comparisonOtherID + 1);
return rightColumnValueId != -1 && rightColumnValueId >= comparisonOtherID;
}
}
/**
* Implements a string 'less than' comparison.
*/
public static class LtRowIdComparator implements RowIdComparator {
@Override
public boolean pageContainsAnyRelevantValue(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().containsAnyValueLt(referenceValueColumnValueId);
}
@Override
public Set<Long> findActivePageValueIds(ColumnPage page, Long referenceValueColumnValueId) {
return page.getColumnPageDict().findIdsOfValuesLt(referenceValueColumnValueId);
}
@Override
@SuppressWarnings("unchecked")
public <T> Long findReferenceColumnValueId(ColumnShard column, Object value) {
// find the ID of the next >= value compared to the requested one. When executing a strong < comparison later,
// this will return the correct results.
Long grEqId;
try {
grEqId = ((Dictionary<T>) column.getColumnShardDictionary()).findGtEqIdOfValue((T) value);
} catch (ClassCastException e) {
throw new ExecutablePlanExecutionException(
"Cannot compare column " + column.getName() + " with value of type " + value.getClass().getSimpleName());
}
if (grEqId == null) {
Long maxId = column.getColumnShardDictionary().getMaxId();
if (maxId == null)
// shard dict is empty!
return null;
// return maxId + 1, as we compare using Lt later. That will give the correct results.
return maxId + 1;
}
// ignore positive/negative result encoding.
if (grEqId < 0)
grEqId = -(grEqId + 1);
return grEqId;
}
@Override
@SuppressWarnings("unchecked")
public <T> NavigableMap<Long, Long> calculateComparisonMap(StandardColumnShard leftCol,
QueryableColumnShard rightCol) {
// start off with a 'less than or equal' comparison map
NavigableMap<Long, Long> res = ((Dictionary<T>) leftCol.getColumnShardDictionary())
.findLtEqIds((Dictionary<T>) rightCol.getColumnShardDictionary());
StandardColumnShard rightStandardCol = (StandardColumnShard) rightCol.getDelegate();
for (Iterator<Entry<Long, Long>> it = res.entrySet().iterator(); it.hasNext();) {
Entry<Long, Long> e = it.next();
if (e.getValue() == rightCol.getFirstRowId() + rightStandardCol.getNumberOfRowsInColumnShard() - 1)
// leftCol == everything colB[lastIdx] -> we're not interested in ==, therefore remove entry.
it.remove();
else if (e.getValue() > 0) // leftCol == colB[value]. As we want < relation, leftCol is < colB[value +1].
e.setValue(e.getValue() + 1);
else if (e.getValue() < 0) // leftCol is < colB[i] for i >= -(value + 1). See JavaDoc findLtEqIds.
e.setValue(-(e.getValue() + 1));
}
return res;
}
@Override
public boolean rowMatches(long leftColumnValueId, long rowId, QueryableColumnShard rightCol,
Map<Long, Long> comparisonMap) {
long rightColumnValueId = rightCol.resolveColumnValueIdForRow(rowId);
return (rightColumnValueId != -1 && rightColumnValueId >= comparisonMap.get(leftColumnValueId));
}
}
}