/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.query.h2.opt;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import javax.cache.CacheException;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.IgniteInterruptedException;
import org.apache.ignite.IgniteLogger;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.internal.GridKernalContext;
import org.apache.ignite.internal.GridTopic;
import org.apache.ignite.internal.IgniteInterruptedCheckedException;
import org.apache.ignite.internal.managers.communication.GridIoPolicy;
import org.apache.ignite.internal.managers.communication.GridMessageListener;
import org.apache.ignite.internal.processors.cache.CacheObject;
import org.apache.ignite.internal.processors.cache.GridCacheContext;
import org.apache.ignite.internal.processors.cache.distributed.dht.GridReservable;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2IndexRangeRequest;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2IndexRangeResponse;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2RowMessage;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2RowRange;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2RowRangeBounds;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2ValueMessage;
import org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2ValueMessageFactory;
import org.apache.ignite.internal.util.*;
import org.apache.ignite.internal.util.lang.*;
import org.apache.ignite.internal.util.typedef.CIX2;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.internal.CU;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.apache.ignite.lang.IgniteBiPredicate;
import org.apache.ignite.lang.IgniteBiTuple;
import org.apache.ignite.logger.NullLogger;
import org.apache.ignite.plugin.extensions.communication.Message;
import org.apache.ignite.spi.indexing.IndexingQueryFilter;
import org.h2.engine.Session;
import org.h2.index.BaseIndex;
import org.h2.index.Cursor;
import org.h2.index.IndexCondition;
import org.h2.index.IndexLookupBatch;
import org.h2.index.ViewIndex;
import org.h2.message.DbException;
import org.h2.result.Row;
import org.h2.result.SearchRow;
import org.h2.table.IndexColumn;
import org.h2.table.TableFilter;
import org.h2.util.DoneFuture;
import org.h2.value.Value;
import org.h2.value.ValueNull;
import org.jetbrains.annotations.Nullable;
import static java.util.Collections.emptyIterator;
import static java.util.Collections.singletonList;
import static org.apache.ignite.internal.processors.query.h2.opt.DistributedJoinMode.LOCAL_ONLY;
import static org.apache.ignite.internal.processors.query.h2.opt.DistributedJoinMode.OFF;
import static org.apache.ignite.internal.processors.query.h2.opt.GridH2AbstractKeyValueRow.KEY_COL;
import static org.apache.ignite.internal.processors.query.h2.opt.GridH2AbstractKeyValueRow.VAL_COL;
import static org.apache.ignite.internal.processors.query.h2.opt.GridH2CollocationModel.buildCollocationModel;
import static org.apache.ignite.internal.processors.query.h2.opt.GridH2QueryType.MAP;
import static org.apache.ignite.internal.processors.query.h2.opt.GridH2QueryType.PREPARE;
import static org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2IndexRangeResponse.STATUS_ERROR;
import static org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2IndexRangeResponse.STATUS_NOT_FOUND;
import static org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2IndexRangeResponse.STATUS_OK;
import static org.apache.ignite.internal.processors.query.h2.twostep.msg.GridH2RowRangeBounds.rangeBounds;
import static org.h2.result.Row.MEMORY_CALCULATE;
/**
* Index base.
*/
public abstract class GridH2IndexBase extends BaseIndex {
/** */
private static final Object EXPLICIT_NULL = new Object();
/** */
private static final AtomicLong idxIdGen = new AtomicLong();
/** */
protected final long idxId = idxIdGen.incrementAndGet();
/** */
private final ThreadLocal<Object> snapshot = new ThreadLocal<>();
/** */
private Object msgTopic;
/** */
private GridMessageListener msgLsnr;
/** */
private IgniteLogger log;
/** */
private final CIX2<ClusterNode,Message> locNodeHnd = new CIX2<ClusterNode,Message>() {
@Override public void applyx(ClusterNode clusterNode, Message msg) throws IgniteCheckedException {
onMessage0(clusterNode.id(), msg);
}
};
protected GridCacheContext<?, ?> ctx;
/**
* @param tbl Table.
*/
protected final void initDistributedJoinMessaging(GridH2Table tbl) {
final GridH2RowDescriptor desc = tbl.rowDescriptor();
if (desc != null && desc.context() != null) {
ctx = desc.context();
GridKernalContext ctx = desc.context().kernalContext();
log = ctx.log(getClass());
msgTopic = new IgniteBiTuple<>(GridTopic.TOPIC_QUERY, tbl.identifier() + '.' + getName());
msgLsnr = new GridMessageListener() {
@Override public void onMessage(UUID nodeId, Object msg) {
GridSpinBusyLock l = desc.indexing().busyLock();
if (!l.enterBusy())
return;
try {
onMessage0(nodeId, msg);
}
finally {
l.leaveBusy();
}
}
};
ctx.io().addMessageListener(msgTopic, msgLsnr);
}
else {
msgTopic = null;
msgLsnr = null;
log = new NullLogger();
}
}
/** {@inheritDoc} */
@Override public final void close(Session ses) {
// No-op. Actual index destruction must happen in method destroy.
}
/**
* Attempts to destroys index and release all the resources.
* We use this method instead of {@link #close(Session)} because that method
* is used by H2 internally.
*/
public void destroy() {
if (msgLsnr != null)
kernalContext().io().removeMessageListener(msgTopic, msgLsnr);
}
/**
* @return Index segment ID for current query context.
*/
protected int threadLocalSegment() {
if(segmentsCount() == 1)
return 0;
GridH2QueryContext qctx = GridH2QueryContext.get();
if(qctx == null)
throw new IllegalStateException("GridH2QueryContext is not initialized.");
return qctx.segment();
}
/**
* Puts row.
*
* @param row Row.
* @return Existing row or {@code null}.
*/
public abstract GridH2Row put(GridH2Row row);
/**
* Puts row.
*
* @param row Row.
* @return {@code True} if replaced existing row.
*/
public boolean putx(GridH2Row row) {
return put(row) != null;
}
/**
* Remove row from index.
*
* @param row Row.
* @return Removed row.
*/
public abstract GridH2Row remove(SearchRow row);
/**
* Remove row from index, does not return removed row.
*
* @param row Row.
*/
public void removex(SearchRow row) {
remove(row);
}
/**
* Finds a single row by the given row.
*
* @param row Search row.
* @return Search result.
*/
public abstract GridH2Row findOne(GridH2Row row);
/**
* Takes or sets existing snapshot to be used in current thread.
*
* @param s Optional existing snapshot to use.
* @param qctx Query context.
* @return Snapshot.
*/
public final Object takeSnapshot(@Nullable Object s, GridH2QueryContext qctx) {
assert snapshot.get() == null;
if (s == null)
s = doTakeSnapshot();
if (s != null) {
if (s instanceof GridReservable && !((GridReservable)s).reserve())
return null;
snapshot.set(s);
if (qctx != null)
qctx.putSnapshot(idxId, s);
}
return s;
}
/**
* @param ses Session.
*/
private static void clearViewIndexCache(Session ses) {
Map<Object,ViewIndex> viewIdxCache = ses.getViewIndexCache(true);
if (!viewIdxCache.isEmpty())
viewIdxCache.clear();
}
/**
* @param ses Session.
* @param filters All joined table filters.
* @param filter Current filter.
* @return Multiplier.
*/
public final int getDistributedMultiplier(Session ses, TableFilter[] filters, int filter) {
GridH2QueryContext qctx = GridH2QueryContext.get();
// We do optimizations with respect to distributed joins only on PREPARE stage only.
// Notice that we check for isJoinBatchEnabled, because we can do multiple different
// optimization passes on PREPARE stage.
// Query expressions can not be distributed as well.
if (qctx == null || qctx.type() != PREPARE || qctx.distributedJoinMode() == OFF ||
!ses.isJoinBatchEnabled() || ses.isPreparingQueryExpression())
return GridH2CollocationModel.MULTIPLIER_COLLOCATED;
// We have to clear this cache because normally sub-query plan cost does not depend on anything
// other than index condition masks and sort order, but in our case it can depend on order
// of previous table filters.
clearViewIndexCache(ses);
assert filters != null;
GridH2CollocationModel c = buildCollocationModel(qctx, ses.getSubQueryInfo(), filters, filter, false);
return c.calculateMultiplier();
}
/** {@inheritDoc} */
@Override public GridH2Table getTable() {
return (GridH2Table)super.getTable();
}
/**
* Takes and returns actual snapshot or {@code null} if snapshots are not supported.
*
* @return Snapshot or {@code null}.
*/
@Nullable protected abstract IgniteTree doTakeSnapshot();
/**
* @return Thread local snapshot.
*/
@SuppressWarnings("unchecked")
protected <T> T threadLocalSnapshot() {
return (T)snapshot.get();
}
/**
* Releases snapshot for current thread.
*/
public void releaseSnapshot() {
Object s = snapshot.get();
assert s != null;
snapshot.remove();
if (s instanceof GridReservable)
((GridReservable)s).release();
if (s instanceof AutoCloseable)
U.closeQuiet((AutoCloseable)s);
}
/**
* Filters rows from expired ones and using predicate.
*
* @param cursor GridCursor over rows.
* @param filter Optional filter.
* @return Filtered iterator.
*/
protected GridCursor<GridH2Row> filter(GridCursor<GridH2Row> cursor, IndexingQueryFilter filter) {
return new FilteringCursor(cursor, U.currentTimeMillis(), filter, getTable().spaceName());
}
/**
* @return Filter for currently running query or {@code null} if none.
*/
protected static IndexingQueryFilter threadLocalFilter() {
GridH2QueryContext qctx = GridH2QueryContext.get();
return qctx != null ? qctx.filter() : null;
}
/** {@inheritDoc} */
@Override public long getDiskSpaceUsed() {
return 0;
}
/** {@inheritDoc} */
@Override public void checkRename() {
throw DbException.getUnsupportedException("rename");
}
/** {@inheritDoc} */
@Override public void add(Session ses, Row row) {
throw DbException.getUnsupportedException("add");
}
/** {@inheritDoc} */
@Override public void remove(Session ses, Row row) {
throw DbException.getUnsupportedException("remove row");
}
/** {@inheritDoc} */
@Override public void remove(Session ses) {
// No-op: destroyed from owning table.
}
/** {@inheritDoc} */
@Override public void truncate(Session ses) {
throw DbException.getUnsupportedException("truncate");
}
/** {@inheritDoc} */
@Override public boolean needRebuild() {
return false;
}
/** {@inheritDoc} */
@Override public IndexLookupBatch createLookupBatch(TableFilter[] filters, int filter) {
GridH2QueryContext qctx = GridH2QueryContext.get();
if (qctx == null || qctx.distributedJoinMode() == OFF || !getTable().isPartitioned())
return null;
IndexColumn affCol = getTable().getAffinityKeyColumn();
GridH2RowDescriptor desc = getTable().rowDescriptor();
int affColId = -1;
boolean ucast = false;
if (affCol != null) {
affColId = affCol.column.getColumnId();
int[] masks = filters[filter].getMasks();
if (masks != null) {
ucast = (masks[affColId] & IndexCondition.EQUALITY) != 0 ||
desc.checkKeyIndexCondition(masks, IndexCondition.EQUALITY);
}
}
GridCacheContext<?, ?> cctx = getTable().rowDescriptor().context();
return new DistributedLookupBatch(cctx, ucast, affColId);
}
/** {@inheritDoc} */
@Override public void removeChildrenAndResources(Session session) {
// The sole purpose of this override is to pass session to table.removeIndex
assert table instanceof GridH2Table;
((GridH2Table)table).removeIndex(session, this);
remove(session);
database.removeMeta(session, getId());
}
/**
* @param nodes Nodes.
* @param msg Message.
*/
private void send(Collection<ClusterNode> nodes, Message msg) {
if (!getTable().rowDescriptor().indexing().send(msgTopic,
-1,
nodes,
msg,
null,
locNodeHnd,
GridIoPolicy.IDX_POOL,
false))
throw new GridH2RetryException("Failed to send message to nodes: " + nodes + ".");
}
/**
* @param nodeId Source node ID.
* @param msg Message.
*/
private void onMessage0(UUID nodeId, Object msg) {
ClusterNode node = kernalContext().discovery().node(nodeId);
if (node == null)
return;
try {
if (msg instanceof GridH2IndexRangeRequest)
onIndexRangeRequest(node, (GridH2IndexRangeRequest)msg);
else if (msg instanceof GridH2IndexRangeResponse)
onIndexRangeResponse(node, (GridH2IndexRangeResponse)msg);
}
catch (Throwable th) {
U.error(log, "Failed to handle message[nodeId=" + nodeId + ", msg=" + msg + "]", th);
if (th instanceof Error)
throw th;
}
}
/**
* @return Kernal context.
*/
private GridKernalContext kernalContext() {
return getTable().rowDescriptor().context().kernalContext();
}
/**
* @param node Requesting node.
* @param msg Request message.
*/
private void onIndexRangeRequest(final ClusterNode node, final GridH2IndexRangeRequest msg) {
GridH2IndexRangeResponse res = new GridH2IndexRangeResponse();
res.originNodeId(msg.originNodeId());
res.queryId(msg.queryId());
res.originSegmentId(msg.originSegmentId());
res.segment(msg.segment());
res.batchLookupId(msg.batchLookupId());
GridH2QueryContext qctx = GridH2QueryContext.get(kernalContext().localNodeId(), msg.originNodeId(),
msg.queryId(), msg.originSegmentId(), MAP);
if (qctx == null)
res.status(STATUS_NOT_FOUND);
else {
try {
RangeSource src;
if (msg.bounds() != null) {
// This is the first request containing all the search rows.
IgniteTree snapshotTree = qctx.getSnapshot(idxId);
assert !msg.bounds().isEmpty() : "empty bounds";
src = new RangeSource(msg.bounds(), msg.segment(), snapshotTree, qctx.filter());
}
else {
// This is request to fetch next portion of data.
src = qctx.getSource(node.id(), msg.segment(), msg.batchLookupId());
assert src != null;
}
List<GridH2RowRange> ranges = new ArrayList<>();
int maxRows = qctx.pageSize();
assert maxRows > 0 : maxRows;
while (maxRows > 0) {
GridH2RowRange range = src.next(maxRows);
if (range == null)
break;
ranges.add(range);
if (range.rows() != null)
maxRows -= range.rows().size();
}
assert !ranges.isEmpty();
if (src.hasMoreRows()) {
// Save source for future fetches.
if (msg.bounds() != null)
qctx.putSource(node.id(), msg.segment(), msg.batchLookupId(), src);
}
else if (msg.bounds() == null) {
// Drop saved source.
qctx.putSource(node.id(), msg.segment(), msg.batchLookupId(), null);
}
res.ranges(ranges);
res.status(STATUS_OK);
}
catch (Throwable th) {
U.error(log, "Failed to process request: " + msg, th);
res.error(th.getClass() + ": " + th.getMessage());
res.status(STATUS_ERROR);
}
}
send(singletonList(node), res);
}
/**
* @param node Responded node.
* @param msg Response message.
*/
private void onIndexRangeResponse(ClusterNode node, GridH2IndexRangeResponse msg) {
GridH2QueryContext qctx = GridH2QueryContext.get(kernalContext().localNodeId(),
msg.originNodeId(), msg.queryId(), msg.originSegmentId(), MAP);
if (qctx == null)
return;
Map<SegmentKey, RangeStream> streams = qctx.getStreams(msg.batchLookupId());
if (streams == null)
return;
RangeStream stream = streams.get(new SegmentKey(node, msg.segment()));
assert stream != null;
stream.onResponse(msg);
}
/**
* @param v1 First value.
* @param v2 Second value.
* @return {@code true} If they equal.
*/
private boolean equal(Value v1, Value v2) {
return v1 == v2 || (v1 != null && v2 != null && v1.compareTypeSafe(v2, getDatabase().getCompareMode()) == 0);
}
/**
* @param qctx Query context.
* @param batchLookupId Batch lookup ID.
* @param segmentId Segment ID.
* @return Index range request.
*/
private static GridH2IndexRangeRequest createRequest(GridH2QueryContext qctx, int batchLookupId, int segmentId) {
GridH2IndexRangeRequest req = new GridH2IndexRangeRequest();
req.originNodeId(qctx.originNodeId());
req.queryId(qctx.queryId());
req.originSegmentId(qctx.segment());
req.segment(segmentId);
req.batchLookupId(batchLookupId);
return req;
}
/**
* @param qctx Query context.
* @param cctx Cache context.
* @param isLocalQry Local query flag.
* @return Collection of nodes for broadcasting.
*/
private List<SegmentKey> broadcastSegments(GridH2QueryContext qctx, GridCacheContext<?, ?> cctx, boolean isLocalQry) {
Map<UUID, int[]> partMap = qctx.partitionsMap();
List<ClusterNode> nodes;
if (isLocalQry) {
if (partMap != null && !partMap.containsKey(cctx.localNodeId()))
return Collections.<SegmentKey>emptyList(); // Prevent remote index call for local queries.
nodes = Collections.singletonList(cctx.localNode());
}
else {
if (partMap == null)
nodes = new ArrayList<>(CU.affinityNodes(cctx, qctx.topologyVersion()));
else {
nodes = new ArrayList<>(partMap.size());
GridKernalContext ctx = kernalContext();
for (UUID nodeId : partMap.keySet()) {
ClusterNode node = ctx.discovery().node(nodeId);
if (node == null)
throw new GridH2RetryException("Failed to find node.");
nodes.add(node);
}
}
if (F.isEmpty(nodes))
throw new GridH2RetryException("Failed to collect affinity nodes.");
}
int segmentsCount = segmentsCount();
List<SegmentKey> res = new ArrayList<>(nodes.size() * segmentsCount);
for (ClusterNode node : nodes) {
for (int seg = 0; seg < segmentsCount; seg++)
res.add(new SegmentKey(node, seg));
}
return res;
}
/**
* @param cctx Cache context.
* @param qctx Query context.
* @param affKeyObj Affinity key.
* @param isLocalQry Local query flag.
* @return Segment key for Affinity key.
*/
private SegmentKey rangeSegment(GridCacheContext<?, ?> cctx, GridH2QueryContext qctx, Object affKeyObj, boolean isLocalQry) {
assert affKeyObj != null && affKeyObj != EXPLICIT_NULL : affKeyObj;
ClusterNode node;
int partition = cctx.affinity().partition(affKeyObj);
if (isLocalQry) {
if (qctx.partitionsMap() != null) {
// If we have explicit partitions map, we have to use it to calculate affinity node.
UUID nodeId = qctx.nodeForPartition(partition, cctx);
if(!cctx.localNodeId().equals(nodeId))
return null; // Prevent remote index call for local queries.
}
if (!cctx.affinity().primaryByKey(cctx.localNode(), partition, qctx.topologyVersion()))
return null;
node = cctx.localNode();
}
else{
if (qctx.partitionsMap() != null) {
// If we have explicit partitions map, we have to use it to calculate affinity node.
UUID nodeId = qctx.nodeForPartition(partition, cctx);
node = cctx.discovery().node(nodeId);
}
else // Get primary node for current topology version.
node = cctx.affinity().primaryByKey(affKeyObj, qctx.topologyVersion());
if (node == null) // Node was not found, probably topology changed and we need to retry the whole query.
throw new GridH2RetryException("Failed to find node.");
}
return new SegmentKey(node, segmentForPartition(partition));
}
/** */
protected class SegmentKey {
/** */
final ClusterNode node;
/** */
final int segmentId;
SegmentKey(ClusterNode node, int segmentId) {
assert node != null;
this.node = node;
this.segmentId = segmentId;
}
/** {@inheritDoc} */
@Override public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
SegmentKey key = (SegmentKey)o;
return segmentId == key.segmentId && node.id().equals(key.node.id());
}
/** {@inheritDoc} */
@Override public int hashCode() {
int result = node.hashCode();
result = 31 * result + segmentId;
return result;
}
}
/**
* @param row Row.
* @return Row message.
*/
private GridH2RowMessage toRowMessage(Row row) {
if (row == null)
return null;
int cols = row.getColumnCount();
assert cols > 0 : cols;
List<GridH2ValueMessage> vals = new ArrayList<>(cols);
for (int i = 0; i < cols; i++) {
try {
vals.add(GridH2ValueMessageFactory.toMessage(row.getValue(i)));
}
catch (IgniteCheckedException e) {
throw new CacheException(e);
}
}
GridH2RowMessage res = new GridH2RowMessage();
res.values(vals);
return res;
}
/**
* @param msg Row message.
* @return Search row.
*/
private SearchRow toSearchRow(GridH2RowMessage msg) {
if (msg == null)
return null;
GridKernalContext ctx = kernalContext();
Value[] vals = new Value[getTable().getColumns().length];
assert vals.length > 0;
List<GridH2ValueMessage> msgVals = msg.values();
for (int i = 0; i < indexColumns.length; i++) {
if (i >= msgVals.size())
continue;
try {
vals[indexColumns[i].column.getColumnId()] = msgVals.get(i).value(ctx);
}
catch (IgniteCheckedException e) {
throw new CacheException(e);
}
}
return database.createRow(vals, MEMORY_CALCULATE);
}
/**
* @param row Search row.
* @return Row message.
*/
private GridH2RowMessage toSearchRowMessage(SearchRow row) {
if (row == null)
return null;
List<GridH2ValueMessage> vals = new ArrayList<>(indexColumns.length);
for (IndexColumn idxCol : indexColumns) {
Value val = row.getValue(idxCol.column.getColumnId());
if (val == null)
break;
try {
vals.add(GridH2ValueMessageFactory.toMessage(val));
}
catch (IgniteCheckedException e) {
throw new CacheException(e);
}
}
GridH2RowMessage res = new GridH2RowMessage();
res.values(vals);
return res;
}
/**
* @param arr Array.
* @param off Offset.
* @param cmp Comparator.
*/
public static <Z> void bubbleUp(Z[] arr, int off, Comparator<Z> cmp) {
// TODO Optimize: use binary search if the range in array is big.
for (int i = off, last = arr.length - 1; i < last; i++) {
if (cmp.compare(arr[i], arr[i + 1]) <= 0)
break;
U.swap(arr, i, i + 1);
}
}
/**
* @param msg Message.
* @return Row.
*/
private Row toRow(GridH2RowMessage msg) {
if (msg == null)
return null;
GridKernalContext ctx = kernalContext();
List<GridH2ValueMessage> vals = msg.values();
assert !F.isEmpty(vals) : vals;
Value[] vals0 = new Value[vals.size()];
for (int i = 0; i < vals0.length; i++) {
try {
vals0[i] = vals.get(i).value(ctx);
}
catch (IgniteCheckedException e) {
throw new CacheException(e);
}
}
return database.createRow(vals0, MEMORY_CALCULATE);
}
/** @return Index segments count. */
protected abstract int segmentsCount();
/**
* @param partition Partition idx.
* @return Segment ID for given key
*/
protected int segmentForPartition(int partition){
return segmentsCount() == 1 ? 0 : (partition % segmentsCount());
}
/**
* @param row Table row.
* @return Segment ID for given row.
*/
protected int segmentForRow(SearchRow row) {
assert row != null;
CacheObject key;
if (ctx != null) {
final Value keyColValue = row.getValue(KEY_COL);
assert keyColValue != null;
final Object o = keyColValue.getObject();
if (o instanceof CacheObject)
key = (CacheObject)o;
else
key = ctx.toCacheKeyObject(o);
return segmentForPartition(ctx.affinity().partition(key));
}
assert segmentsCount() == 1;
return 0;
}
/**
* Simple cursor from a single node.
*/
private static class UnicastCursor implements Cursor {
/** */
final int rangeId;
/** */
RangeStream stream;
/**
* @param rangeId Range ID.
* @param keys Remote index segment keys.
* @param rangeStreams Range streams.
*/
UnicastCursor(int rangeId, List<SegmentKey> keys, Map<SegmentKey, RangeStream> rangeStreams) {
assert keys.size() == 1;
this.rangeId = rangeId;
this.stream = rangeStreams.get(F.first(keys));
assert stream != null;
}
/** {@inheritDoc} */
@Override public boolean next() {
return stream.next(rangeId);
}
/** {@inheritDoc} */
@Override public Row get() {
return stream.get(rangeId);
}
/** {@inheritDoc} */
@Override public SearchRow getSearchRow() {
return get();
}
/** {@inheritDoc} */
@Override public boolean previous() {
throw new UnsupportedOperationException();
}
}
/**
* Merge cursor from multiple nodes.
*/
private class BroadcastCursor implements Cursor, Comparator<RangeStream> {
/** */
final int rangeId;
/** */
final RangeStream[] streams;
/** */
boolean first = true;
/** */
int off;
/**
* @param rangeId Range ID.
* @param segmentKeys Remote nodes.
* @param rangeStreams Range streams.
*/
BroadcastCursor(int rangeId, Collection<SegmentKey> segmentKeys, Map<SegmentKey, RangeStream> rangeStreams) {
this.rangeId = rangeId;
streams = new RangeStream[segmentKeys.size()];
int i = 0;
for (SegmentKey segmentKey : segmentKeys) {
RangeStream stream = rangeStreams.get(segmentKey);
assert stream != null;
streams[i++] = stream;
}
}
/** {@inheritDoc} */
@Override public int compare(RangeStream o1, RangeStream o2) {
if (o1 == o2)
return 0;
// Nulls are at the beginning of array.
if (o1 == null)
return -1;
if (o2 == null)
return 1;
return compareRows(o1.get(rangeId), o2.get(rangeId));
}
/**
* Try to fetch the first row.
*
* @return {@code true} If we were able to find at least one row.
*/
private boolean goFirst() {
// Fetch first row from all the streams and sort them.
for (int i = 0; i < streams.length; i++) {
if (!streams[i].next(rangeId)) {
streams[i] = null;
off++; // After sorting this offset will cut off all null elements at the beginning of array.
}
}
if (off == streams.length)
return false;
Arrays.sort(streams, this);
return true;
}
/**
* Fetch next row.
*
* @return {@code true} If we were able to find at least one row.
*/
private boolean goNext() {
assert off != streams.length;
if (!streams[off].next(rangeId)) {
// Next row from current min stream was not found -> nullify that stream and bump offset forward.
streams[off] = null;
return ++off != streams.length;
}
// Bubble up current min stream with respect to fetched row to achieve correct sort order of streams.
bubbleUp(streams, off, this);
return true;
}
/** {@inheritDoc} */
@Override public boolean next() {
if (first) {
first = false;
return goFirst();
}
return goNext();
}
/** {@inheritDoc} */
@Override public Row get() {
return streams[off].get(rangeId);
}
/** {@inheritDoc} */
@Override public SearchRow getSearchRow() {
return get();
}
/** {@inheritDoc} */
@Override public boolean previous() {
throw new UnsupportedOperationException();
}
}
/**
* Index lookup batch.
*/
private class DistributedLookupBatch implements IndexLookupBatch {
/** */
final GridCacheContext<?,?> cctx;
/** */
final boolean ucast;
/** */
final int affColId;
/** */
GridH2QueryContext qctx;
/** */
int batchLookupId;
/** */
Map<SegmentKey, RangeStream> rangeStreams = Collections.emptyMap();
/** */
List<SegmentKey> broadcastSegments;
/** */
List<Future<Cursor>> res = Collections.emptyList();
/** */
boolean batchFull;
/** */
boolean findCalled;
/**
* @param cctx Cache Cache context.
* @param ucast Unicast or broadcast query.
* @param affColId Affinity column ID.
*/
DistributedLookupBatch(GridCacheContext<?, ?> cctx, boolean ucast, int affColId) {
this.cctx = cctx;
this.ucast = ucast;
this.affColId = affColId;
}
/**
* @param firstRow First row.
* @param lastRow Last row.
* @return Affinity key or {@code null}.
*/
private Object getAffinityKey(SearchRow firstRow, SearchRow lastRow) {
if (firstRow == null || lastRow == null)
return null;
Value affKeyFirst = firstRow.getValue(affColId);
Value affKeyLast = lastRow.getValue(affColId);
if (affKeyFirst != null && equal(affKeyFirst, affKeyLast))
return affKeyFirst == ValueNull.INSTANCE ? EXPLICIT_NULL : affKeyFirst.getObject();
if (getTable().rowDescriptor().isKeyColumn(affColId))
return null;
// Try to extract affinity key from primary key.
Value pkFirst = firstRow.getValue(KEY_COL);
Value pkLast = lastRow.getValue(KEY_COL);
if (pkFirst == ValueNull.INSTANCE || pkLast == ValueNull.INSTANCE)
return EXPLICIT_NULL;
if (pkFirst == null || pkLast == null || !equal(pkFirst, pkLast))
return null;
Object pkAffKeyFirst = cctx.affinity().affinityKey(pkFirst.getObject());
Object pkAffKeyLast = cctx.affinity().affinityKey(pkLast.getObject());
if (pkAffKeyFirst == null || pkAffKeyLast == null)
throw new CacheException("Cache key without affinity key.");
if (pkAffKeyFirst.equals(pkAffKeyLast))
return pkAffKeyFirst;
return null;
}
/** {@inheritDoc} */
@SuppressWarnings("ForLoopReplaceableByForEach")
@Override public boolean addSearchRows(SearchRow firstRow, SearchRow lastRow) {
if (qctx == null || findCalled) {
if (qctx == null) {
// It is the first call after query begin (may be after reuse),
// reinitialize query context and result.
qctx = GridH2QueryContext.get();
res = new ArrayList<>();
assert qctx != null;
assert !findCalled;
}
else {
// Cleanup after the previous lookup phase.
assert batchLookupId != 0;
findCalled = false;
qctx.putStreams(batchLookupId, null);
res.clear();
}
// Reinitialize for the next lookup phase.
batchLookupId = qctx.nextBatchLookupId();
rangeStreams = new HashMap<>();
}
Object affKey = affColId == -1 ? null : getAffinityKey(firstRow, lastRow);
boolean locQry = localQuery();
List<SegmentKey> segmentKeys;
if (affKey != null) {
// Affinity key is provided.
if (affKey == EXPLICIT_NULL) // Affinity key is explicit null, we will not find anything.
return false;
segmentKeys = F.asList(rangeSegment(cctx, qctx, affKey, locQry));
}
else {
// Affinity key is not provided or is not the same in upper and lower bounds, we have to broadcast.
if (broadcastSegments == null)
broadcastSegments = broadcastSegments(qctx, cctx, locQry);
segmentKeys = broadcastSegments;
}
if (locQry && segmentKeys.isEmpty())
return false; // Nothing to do
assert !F.isEmpty(segmentKeys) : segmentKeys;
final int rangeId = res.size();
// Create messages.
GridH2RowMessage first = toSearchRowMessage(firstRow);
GridH2RowMessage last = toSearchRowMessage(lastRow);
// Range containing upper and lower bounds.
GridH2RowRangeBounds rangeBounds = rangeBounds(rangeId, first, last);
// Add range to every message of every participating node.
for (int i = 0; i < segmentKeys.size(); i++) {
SegmentKey segmentKey = segmentKeys.get(i);
assert segmentKey != null;
RangeStream stream = rangeStreams.get(segmentKey);
List<GridH2RowRangeBounds> bounds;
if (stream == null) {
stream = new RangeStream(qctx, segmentKey.node);
stream.req = createRequest(qctx, batchLookupId, segmentKey.segmentId);
stream.req.bounds(bounds = new ArrayList<>());
rangeStreams.put(segmentKey, stream);
}
else
bounds = stream.req.bounds();
bounds.add(rangeBounds);
// If at least one node will have a full batch then we are ok.
if (bounds.size() >= qctx.pageSize())
batchFull = true;
}
Future<Cursor> fut = new DoneFuture<>(segmentKeys.size() == 1 ?
new UnicastCursor(rangeId, segmentKeys, rangeStreams) :
new BroadcastCursor(rangeId, segmentKeys, rangeStreams));
res.add(fut);
return true;
}
/** {@inheritDoc} */
@Override public boolean isBatchFull() {
return batchFull;
}
/**
* @return {@code True} if local query execution is enforced.
*/
private boolean localQuery() {
assert qctx != null : "Missing query context: " + this;
return qctx.distributedJoinMode() == LOCAL_ONLY;
}
/**
*
*/
private void startStreams() {
if (rangeStreams.isEmpty()) {
assert res.isEmpty();
return;
}
qctx.putStreams(batchLookupId, rangeStreams);
// Start streaming.
for (RangeStream stream : rangeStreams.values())
stream.start();
}
/** {@inheritDoc} */
@Override public List<Future<Cursor>> find() {
batchFull = false;
findCalled = true;
startStreams();
return res;
}
/** {@inheritDoc} */
@Override public void reset(boolean beforeQry) {
if (beforeQry || qctx == null) // Query context can be null if addSearchRows was never called.
return;
assert batchLookupId != 0;
// Do cleanup after the query run.
qctx.putStreams(batchLookupId, null);
qctx = null; // The same query can be reused multiple times for different query contexts.
batchLookupId = 0;
rangeStreams = Collections.emptyMap();
broadcastSegments = null;
batchFull = false;
findCalled = false;
res = Collections.emptyList();
}
/** {@inheritDoc} */
@Override public String getPlanSQL() {
return ucast ? "unicast" : "broadcast";
}
}
/**
* Per node range stream.
*/
private class RangeStream {
/** */
final GridH2QueryContext qctx;
/** */
final ClusterNode node;
/** */
GridH2IndexRangeRequest req;
/** */
int remainingRanges;
/** */
final BlockingQueue<GridH2IndexRangeResponse> respQueue = new LinkedBlockingQueue<>();
/** */
Iterator<GridH2RowRange> ranges = emptyIterator();
/** */
Cursor cursor = GridH2Cursor.EMPTY;
/** */
int cursorRangeId = -1;
/**
* @param qctx Query context.
* @param node Node.
*/
RangeStream(GridH2QueryContext qctx, ClusterNode node) {
this.node = node;
this.qctx = qctx;
}
/**
* Start streaming.
*/
private void start() {
assert ctx != null;
assert log != null: getName();
remainingRanges = req.bounds().size();
assert remainingRanges > 0;
if (log.isDebugEnabled())
log.debug("Starting stream: [node=" + node + ", req=" + req + "]");
send(singletonList(node), req);
}
/**
* @param msg Response.
*/
public void onResponse(GridH2IndexRangeResponse msg) {
respQueue.add(msg);
}
/**
* @return Response.
*/
private GridH2IndexRangeResponse awaitForResponse() {
assert remainingRanges > 0;
final long start = U.currentTimeMillis();
for (int attempt = 0;; attempt++) {
if (qctx.isCleared())
throw new GridH2RetryException("Query is cancelled.");
if (kernalContext().isStopping())
throw new GridH2RetryException("Stopping node.");
GridH2IndexRangeResponse res;
try {
res = respQueue.poll(500, TimeUnit.MILLISECONDS);
}
catch (InterruptedException ignored) {
throw new GridH2RetryException("Interrupted.");
}
if (res != null) {
switch (res.status()) {
case STATUS_OK:
List<GridH2RowRange> ranges0 = res.ranges();
remainingRanges -= ranges0.size();
if (ranges0.get(ranges0.size() - 1).isPartial())
remainingRanges++;
if (remainingRanges > 0) {
if (req.bounds() != null)
req = createRequest(qctx, req.batchLookupId(), req.segment());
// Prefetch next page.
send(singletonList(node), req);
}
else
req = null;
return res;
case STATUS_NOT_FOUND:
if (req == null || req.bounds() == null) // We have already received the first response.
throw new GridH2RetryException("Failure on remote node.");
if (U.currentTimeMillis() - start > 30_000)
throw new GridH2RetryException("Timeout.");
try {
U.sleep(20 * attempt);
}
catch (IgniteInterruptedCheckedException e) {
throw new IgniteInterruptedException(e.getMessage());
}
// Retry to send the request once more after some time.
send(singletonList(node), req);
break;
case STATUS_ERROR:
throw new CacheException(res.error());
default:
throw new IllegalStateException();
}
}
if (!kernalContext().discovery().alive(node))
throw new GridH2RetryException("Node left: " + node);
}
}
/**
* @param rangeId Requested range ID.
* @return {@code true} If next row for the requested range was found.
*/
private boolean next(final int rangeId) {
for (;;) {
if (rangeId == cursorRangeId) {
if (cursor.next())
return true;
}
else if (rangeId < cursorRangeId)
return false;
cursor = GridH2Cursor.EMPTY;
while (!ranges.hasNext()) {
if (remainingRanges == 0) {
ranges = emptyIterator();
return false;
}
ranges = awaitForResponse().ranges().iterator();
}
GridH2RowRange range = ranges.next();
cursorRangeId = range.rangeId();
if (!F.isEmpty(range.rows())) {
final Iterator<GridH2RowMessage> it = range.rows().iterator();
if (it.hasNext()) {
cursor = new GridH2Cursor(new Iterator<Row>() {
@Override public boolean hasNext() {
return it.hasNext();
}
@Override public Row next() {
// Lazily convert messages into real rows.
return toRow(it.next());
}
@Override public void remove() {
throw new UnsupportedOperationException();
}
});
}
}
}
}
/**
* @param rangeId Requested range ID.
* @return Current row.
*/
private Row get(int rangeId) {
assert rangeId == cursorRangeId;
return cursor.get();
}
}
/**
* Bounds iterator.
*/
private class RangeSource {
/** */
Iterator<GridH2RowRangeBounds> boundsIter;
/** */
int curRangeId = -1;
/** */
final IgniteTree tree;
/** */
private final int segment;
/** */
final IndexingQueryFilter filter;
/** Iterator. */
Iterator<GridH2Row> iter = emptyIterator();
/**
* @param bounds Bounds.
* @param tree Snapshot.
* @param filter Filter.
*/
RangeSource(
Iterable<GridH2RowRangeBounds> bounds,
int segment,
IgniteTree tree,
IndexingQueryFilter filter
) {
this.segment = segment;
this.filter = filter;
this.tree = tree;
boundsIter = bounds.iterator();
}
/**
* @return {@code true} If there are more rows in this source.
*/
public boolean hasMoreRows() throws IgniteCheckedException {
return boundsIter.hasNext() || iter.hasNext();
}
/**
* @param maxRows Max allowed rows.
* @return Range.
*/
public GridH2RowRange next(int maxRows) {
assert maxRows > 0 : maxRows;
for (; ; ) {
if (iter.hasNext()) {
// Here we are getting last rows from previously partially fetched range.
List<GridH2RowMessage> rows = new ArrayList<>();
GridH2RowRange nextRange = new GridH2RowRange();
nextRange.rangeId(curRangeId);
nextRange.rows(rows);
do {
rows.add(toRowMessage(iter.next()));
}
while (rows.size() < maxRows && iter.hasNext());
if (iter.hasNext())
nextRange.setPartial();
else
iter = emptyIterator();
return nextRange;
}
iter = emptyIterator();
if (!boundsIter.hasNext()) {
boundsIter = emptyIterator();
return null;
}
GridH2RowRangeBounds bounds = boundsIter.next();
curRangeId = bounds.rangeId();
SearchRow first = toSearchRow(bounds.first());
SearchRow last = toSearchRow(bounds.last());
IgniteTree t = tree != null ? tree : treeForRead(segment);
iter = new CursorIteratorWrapper(doFind0(t, first, true, last, filter));
if (!iter.hasNext()) {
// We have to return empty range here.
GridH2RowRange emptyRange = new GridH2RowRange();
emptyRange.rangeId(curRangeId);
return emptyRange;
}
}
}
}
/**
* @param segment Segment Id.
* @return Snapshot for requested segment if there is one.
*/
protected <K, V> IgniteTree<K, V> treeForRead(int segment) {
throw new UnsupportedOperationException();
}
/**
* @param t Tree.
* @param first Lower bound.
* @param includeFirst Whether lower bound should be inclusive.
* @param last Upper bound always inclusive.
* @param filter Filter.
* @return Iterator over rows in given range.
*/
protected GridCursor<GridH2Row> doFind0(
IgniteTree t,
@Nullable SearchRow first,
boolean includeFirst,
@Nullable SearchRow last,
IndexingQueryFilter filter) {
throw new UnsupportedOperationException();
}
/**
* Cursor which filters by expiration time and predicate.
*/
protected static class FilteringCursor implements GridCursor<GridH2Row> {
/** */
private final GridCursor<GridH2Row> cursor;
/** */
private final IgniteBiPredicate<Object, Object> fltr;
/** */
private final long time;
/** Is value required for filtering predicate? */
private final boolean isValRequired;
/** */
private GridH2Row next;
/**
* @param cursor GridCursor.
* @param time Time for expired rows filtering.
* @param qryFilter Filter.
* @param spaceName Space name.
*/
protected FilteringCursor(GridCursor<GridH2Row> cursor,
long time,
IndexingQueryFilter qryFilter,
String spaceName) {
this.cursor = cursor;
this.time = time;
if (qryFilter != null) {
this.fltr = qryFilter.forSpace(spaceName);
this.isValRequired = qryFilter.isValueRequired();
}
else {
this.fltr = null;
this.isValRequired = false;
}
}
/**
* @param row Row.
* @return If this row was accepted.
*/
@SuppressWarnings("unchecked")
protected boolean accept(GridH2Row row) {
if (row.expireTime() != 0 && row.expireTime() <= time)
return false;
if (fltr == null)
return true;
Object key = row.getValue(KEY_COL).getObject();
Object val = isValRequired ? row.getValue(VAL_COL).getObject() : null;
assert key != null;
assert !isValRequired || val != null;
return fltr.apply(key, val);
}
/** {@inheritDoc} */
@Override public boolean next() throws IgniteCheckedException {
next = null;
while (cursor.next()) {
GridH2Row t = cursor.get();
if (accept(t)) {
next = t;
return true;
}
}
return false;
}
/** {@inheritDoc} */
@Override public GridH2Row get() throws IgniteCheckedException {
if (next == null)
throw new NoSuchElementException();
return next;
}
}
/**
*
*/
private static final class CursorIteratorWrapper implements Iterator<GridH2Row> {
/** */
private final GridCursor<GridH2Row> cursor;
/** Next element. */
private GridH2Row next;
/**
* @param cursor Cursor.
*/
private CursorIteratorWrapper(GridCursor<GridH2Row> cursor) {
assert cursor != null;
this.cursor = cursor;
try {
if (cursor.next())
next = cursor.get();
}
catch (IgniteCheckedException e) {
throw U.convertException(e);
}
}
/** {@inheritDoc} */
@Override public boolean hasNext() {
return next != null;
}
/** {@inheritDoc} */
@Override public GridH2Row next() {
try {
GridH2Row res = next;
if (cursor.next())
next = cursor.get();
else
next = null;
return res;
}
catch (IgniteCheckedException e) {
throw U.convertException(e);
}
}
/** {@inheritDoc} */
@Override public void remove() {
throw new UnsupportedOperationException("operation is not supported");
}
}
/** Empty cursor. */
protected static final GridCursor<GridH2Row> EMPTY_CURSOR = new GridCursor<GridH2Row>() {
/** {@inheritDoc} */
@Override public boolean next() {
return false;
}
/** {@inheritDoc} */
@Override public GridH2Row get() {
return null;
}
};
}