/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.query.h2.twostep;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.RandomAccess;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
import javax.cache.CacheException;
import org.apache.ignite.IgniteException;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.internal.GridKernalContext;
import org.apache.ignite.internal.processors.query.h2.twostep.messages.GridQueryNextPageResponse;
import org.apache.ignite.internal.util.typedef.F;
import org.apache.ignite.internal.util.typedef.internal.U;
import org.h2.engine.Session;
import org.h2.index.BaseIndex;
import org.h2.index.Cursor;
import org.h2.index.IndexType;
import org.h2.message.DbException;
import org.h2.result.Row;
import org.h2.result.SearchRow;
import org.h2.table.Column;
import org.h2.table.IndexColumn;
import org.h2.value.Value;
import org.jetbrains.annotations.Nullable;
import static java.util.Objects.requireNonNull;
import static org.apache.ignite.IgniteSystemProperties.IGNITE_SQL_MERGE_TABLE_MAX_SIZE;
import static org.apache.ignite.IgniteSystemProperties.IGNITE_SQL_MERGE_TABLE_PREFETCH_SIZE;
import static org.apache.ignite.IgniteSystemProperties.getInteger;
/**
* Merge index.
*/
public abstract class GridMergeIndex extends BaseIndex {
/** */
private static final int MAX_FETCH_SIZE = getInteger(IGNITE_SQL_MERGE_TABLE_MAX_SIZE, 10_000);
/** */
private static final int PREFETCH_SIZE = getInteger(IGNITE_SQL_MERGE_TABLE_PREFETCH_SIZE, 1024);
/** */
private static final AtomicReferenceFieldUpdater<GridMergeIndex, ConcurrentMap> lastPagesUpdater =
AtomicReferenceFieldUpdater.newUpdater(GridMergeIndex.class, ConcurrentMap.class, "lastPages");
static {
if (!U.isPow2(PREFETCH_SIZE)) {
throw new IllegalArgumentException(IGNITE_SQL_MERGE_TABLE_PREFETCH_SIZE + " (" + PREFETCH_SIZE +
") must be positive and a power of 2.");
}
if (PREFETCH_SIZE >= MAX_FETCH_SIZE) {
throw new IllegalArgumentException(IGNITE_SQL_MERGE_TABLE_PREFETCH_SIZE + " (" + PREFETCH_SIZE +
") must be less than " + IGNITE_SQL_MERGE_TABLE_MAX_SIZE + " (" + MAX_FETCH_SIZE + ").");
}
}
/** */
protected final Comparator<SearchRow> firstRowCmp = new Comparator<SearchRow>() {
@Override public int compare(SearchRow rowInList, SearchRow searchRow) {
int res = compareRows(rowInList, searchRow);
return res == 0 ? 1 : res;
}
};
/** */
protected final Comparator<SearchRow> lastRowCmp = new Comparator<SearchRow>() {
@Override public int compare(SearchRow rowInList, SearchRow searchRow) {
int res = compareRows(rowInList, searchRow);
return res == 0 ? -1 : res;
}
};
/** Row source nodes. */
private Set<UUID> sources;
/** */
private int pageSize;
/**
* Will be r/w from query execution thread only, does not need to be threadsafe.
*/
private final BlockList<Row> fetched;
/** */
private Row lastEvictedRow;
/** */
private volatile int fetchedCnt;
/** */
private final GridKernalContext ctx;
/** */
private volatile ConcurrentMap<SourceKey, Integer> lastPages;
/**
* @param ctx Context.
* @param tbl Table.
* @param name Index name.
* @param type Type.
* @param cols Columns.
*/
public GridMergeIndex(GridKernalContext ctx,
GridMergeTable tbl,
String name,
IndexType type,
IndexColumn[] cols
) {
this(ctx);
initBaseIndex(tbl, 0, name, cols, type);
}
/**
* @param ctx Context.
*/
protected GridMergeIndex(GridKernalContext ctx) {
this.ctx = ctx;
fetched = new BlockList<>(PREFETCH_SIZE);
}
/**
* @return Return source nodes for this merge index.
*/
public Set<UUID> sources() {
return sources;
}
/**
* Fails index if any source node is left.
*/
private void checkSourceNodesAlive() {
for (UUID nodeId : sources()) {
if (!ctx.discovery().alive(nodeId)) {
fail(nodeId, null);
return;
}
}
}
/**
* @param nodeId Node ID.
* @return {@code true} If this index needs data from the given source node.
*/
public boolean hasSource(UUID nodeId) {
return sources.contains(nodeId);
}
/** {@inheritDoc} */
@Override public long getRowCount(Session ses) {
Cursor c = find(ses, null, null);
long cnt = 0;
while (c.next())
cnt++;
return cnt;
}
/** {@inheritDoc} */
@Override public long getRowCountApproximation() {
return 10_000;
}
/**
* Set source nodes.
*
* @param nodes Nodes.
* @param segmentsCnt Index segments per table.
*/
public void setSources(Collection<ClusterNode> nodes, int segmentsCnt) {
assert sources == null;
sources = new HashSet<>();
for (ClusterNode node : nodes) {
if (!sources.add(node.id()))
throw new IllegalStateException();
}
}
/**
* @param pageSize Page size.
*/
public void setPageSize(int pageSize) {
this.pageSize = pageSize;
}
/**
* @param queue Queue to poll.
* @return Next page.
*/
private GridResultPage takeNextPage(Pollable<GridResultPage> queue) {
GridResultPage page;
for (;;) {
try {
page = queue.poll(500, TimeUnit.MILLISECONDS);
}
catch (InterruptedException e) {
throw new CacheException("Query execution was interrupted.", e);
}
if (page != null)
break;
checkSourceNodesAlive();
}
return page;
}
/**
* @param queue Queue to poll.
* @param iter Current iterator.
* @return The same or new iterator.
*/
protected final Iterator<Value[]> pollNextIterator(Pollable<GridResultPage> queue, Iterator<Value[]> iter) {
if (!iter.hasNext()) {
GridResultPage page = takeNextPage(queue);
if (!page.isLast())
page.fetchNextPage(); // Failed will throw an exception here.
iter = page.rows();
// The received iterator must be empty in the dummy last page or on failure.
assert iter.hasNext() || page.isDummyLast() || page.isFail();
}
return iter;
}
/**
* @param e Error.
*/
public void fail(final CacheException e) {
for (UUID nodeId : sources)
fail(nodeId, e);
}
/**
* @param nodeId Node ID.
* @param e Exception.
*/
public void fail(UUID nodeId, final CacheException e) {
if (nodeId == null)
nodeId = F.first(sources);
addPage0(new GridResultPage(null, nodeId, null) {
@Override public boolean isFail() {
return true;
}
@Override public void fetchNextPage() {
if (e == null)
super.fetchNextPage();
else
throw e;
}
});
}
/**
* @param nodeId Node ID.
* @param res Response.
*/
private void initLastPages(UUID nodeId, GridQueryNextPageResponse res) {
int allRows = res.allRows();
// If the old protocol we send all rows number in the page 0, other pages have -1.
// In the new protocol we do not know it and always have -1, except terminating page,
// which has -2. Thus we have to init page counters only when we receive positive value
// in the first page.
if (allRows < 0 || res.page() != 0)
return;
ConcurrentMap<SourceKey,Integer> lp = lastPages;
if (lp == null && !lastPagesUpdater.compareAndSet(this, null, lp = new ConcurrentHashMap<>()))
lp = lastPages;
assert pageSize > 0: pageSize;
int lastPage = allRows == 0 ? 0 : (allRows - 1) / pageSize;
assert lastPage >= 0: lastPage;
if (lp.put(new SourceKey(nodeId, res.segmentId()), lastPage) != null)
throw new IllegalStateException();
}
/**
* @param page Page.
*/
private void markLastPage(GridResultPage page) {
GridQueryNextPageResponse res = page.response();
if (res.allRows() != -2) { // -2 means the last page.
UUID nodeId = page.source();
initLastPages(nodeId, res);
ConcurrentMap<SourceKey,Integer> lp = lastPages;
if (lp == null)
return; // It was not initialized --> wait for -2.
Integer lastPage = lp.get(new SourceKey(nodeId, res.segmentId()));
if (lastPage == null)
return; // This node may use the new protocol --> wait for -2.
if (lastPage != res.page()) {
assert lastPage > res.page();
return; // This is not the last page.
}
}
page.setLast(true);
}
/**
* @param page Page.
*/
public final void addPage(GridResultPage page) {
markLastPage(page);
addPage0(page);
}
/**
* @param lastPage Real last page.
* @return Created dummy page.
*/
protected final GridResultPage createDummyLastPage(GridResultPage lastPage) {
assert !lastPage.isDummyLast(); // It must be a real last page.
return new GridResultPage(ctx, lastPage.source(), null).setLast(true);
}
/**
* @param page Page.
*/
protected abstract void addPage0(GridResultPage page);
/** {@inheritDoc} */
@Override public final Cursor find(Session ses, SearchRow first, SearchRow last) {
checkBounds(lastEvictedRow, first, last);
if (fetchedAll())
return findAllFetched(fetched, first, last);
return findInStream(first, last);
}
/**
* @return {@code true} If we have fetched all the remote rows into a fetched list.
*/
public abstract boolean fetchedAll();
/**
* @param lastEvictedRow Last evicted fetched row.
* @param first Lower bound.
* @param last Upper bound.
*/
protected void checkBounds(Row lastEvictedRow, SearchRow first, SearchRow last) {
if (lastEvictedRow != null)
throw new IgniteException("Fetched result set was too large.");
}
/**
* @param first Lower bound.
* @param last Upper bound.
* @return Cursor. Usually it must be {@link FetchingCursor} instance.
*/
protected abstract Cursor findInStream(@Nullable SearchRow first, @Nullable SearchRow last);
/**
* @param fetched Fetched rows.
* @param first Lower bound.
* @param last Upper bound.
* @return Cursor.
*/
protected abstract Cursor findAllFetched(List<Row> fetched, @Nullable SearchRow first, @Nullable SearchRow last);
/** {@inheritDoc} */
@Override public void checkRename() {
throw DbException.getUnsupportedException("rename");
}
/** {@inheritDoc} */
@Override public void close(Session ses) {
// No-op.
}
/** {@inheritDoc} */
@Override public void add(Session ses, Row row) {
throw DbException.getUnsupportedException("add");
}
/** {@inheritDoc} */
@Override public void remove(Session ses, Row row) {
throw DbException.getUnsupportedException("remove row");
}
/** {@inheritDoc} */
@Override public void remove(Session ses) {
throw DbException.getUnsupportedException("remove index");
}
/** {@inheritDoc} */
@Override public void truncate(Session ses) {
throw DbException.getUnsupportedException("truncate");
}
/** {@inheritDoc} */
@Override public boolean canGetFirstOrLast() {
return false;
}
/** {@inheritDoc} */
@Override public Cursor findFirstOrLast(Session ses, boolean first) {
throw DbException.getUnsupportedException("findFirstOrLast");
}
/** {@inheritDoc} */
@Override public boolean needRebuild() {
return false;
}
/** {@inheritDoc} */
@Override public long getDiskSpaceUsed() {
return 0;
}
/**
* @param rows Sorted rows list.
* @param searchRow Search row.
* @param cmp Comparator.
* @param checkLast If we need to optimistically check the last row right away.
* @return Insertion point for the search row.
*/
protected static int binarySearchRow(
List<Row> rows,
SearchRow searchRow,
Comparator<SearchRow> cmp,
boolean checkLast
) {
assert !rows.isEmpty();
// Optimistically compare with the last row as a first step.
if (checkLast) {
int res = cmp.compare(last(rows), searchRow);
assert res != 0; // Comparators must never return 0 here.
if (res < 0)
return rows.size(); // The search row is greater than the last row.
}
int res = Collections.binarySearch(rows, searchRow, cmp);
assert res < 0: res; // Comparator must never return 0.
return -res - 1;
}
/**
* @param evictedBlock Evicted block.
*/
private void onBlockEvict(List<Row> evictedBlock) {
assert evictedBlock.size() == PREFETCH_SIZE;
// Remember the last row (it will be max row) from the evicted block.
lastEvictedRow = requireNonNull(last(evictedBlock));
}
/**
* @param l List.
* @return Last element.
*/
private static <Z> Z last(List<Z> l) {
return l.get(l.size() - 1);
}
/**
* Fetching cursor.
*/
protected class FetchingCursor implements Cursor {
/** */
Iterator<Row> stream;
/** */
List<Row> rows;
/** */
int cur;
/** */
SearchRow first;
/** */
SearchRow last;
/** */
int lastFound = Integer.MAX_VALUE;
/**
* @param first Lower bound.
* @param last Upper bound.
* @param stream Stream of all the rows from remote nodes.
*/
public FetchingCursor(SearchRow first, SearchRow last, Iterator<Row> stream) {
assert stream != null;
// Initially we will use all the fetched rows, after we will switch to the last block.
rows = fetched;
this.stream = stream;
this.first = first;
this.last = last;
if (haveBounds() && !rows.isEmpty())
cur = findBounds();
cur--; // Set current position before the first row.
}
/**
* @return {@code true} If we have bounds.
*/
private boolean haveBounds() {
return first != null || last != null;
}
/**
* @return Lower bound.
*/
private int findBounds() {
assert !rows.isEmpty(): "rows";
int firstFound = cur;
// Find the lower bound.
if (first != null) {
firstFound = binarySearchRow(rows, first, firstRowCmp, true);
assert firstFound >= cur && firstFound <= rows.size(): "firstFound";
if (firstFound == rows.size())
return firstFound; // The lower bound is greater than all the rows we have.
first = null; // We have found the lower bound, do not need it anymore.
}
// Find the upper bound.
if (last != null) {
assert lastFound == Integer.MAX_VALUE: "lastFound";
int lastFound0 = binarySearchRow(rows, last, lastRowCmp, true);
// If the upper bound is too large we will ignore it.
if (lastFound0 != rows.size())
lastFound = lastFound0;
}
return firstFound;
}
/**
* Fetch rows from the stream.
*/
private void fetchRows() {
for (;;) {
// Take the current last block and set the position after last.
rows = fetched.lastBlock();
cur = rows.size();
// Fetch stream.
while (stream.hasNext()) {
fetched.add(requireNonNull(stream.next()));
// Evict block if we've fetched too many rows.
if (fetched.size() == MAX_FETCH_SIZE) {
onBlockEvict(fetched.evictFirstBlock());
assert fetched.size() < MAX_FETCH_SIZE;
}
// No bounds -> no need to do binary search, can return the fetched row right away.
if (!haveBounds())
break;
// When the last block changed, it means that we've filled the current last block.
// We have fetched the needed number of rows for binary search.
if (fetched.lastBlock() != rows) {
assert fetched.lastBlock().isEmpty(); // The last row must be added to the previous block.
break;
}
}
if (cur == rows.size())
cur = Integer.MAX_VALUE; // We were not able to fetch anything. Done.
else {
// Update fetched count.
fetchedCnt += rows.size() - cur;
if (haveBounds()) {
cur = findBounds();
if (cur == rows.size())
continue; // The lower bound is too large, continue fetching rows.
}
}
return;
}
}
/** {@inheritDoc} */
@Override public boolean next() {
if (++cur == rows.size())
fetchRows();
return cur < lastFound;
}
/** {@inheritDoc} */
@Override public Row get() {
return rows.get(cur);
}
/** {@inheritDoc} */
@Override public SearchRow getSearchRow() {
return get();
}
/** {@inheritDoc} */
@Override public boolean previous() {
// Should never be called.
throw DbException.getUnsupportedException("previous");
}
}
/** */
enum State {
UNINITIALIZED, INITIALIZED, FINISHED
}
/**
*/
private static final class BlockList<Z> extends AbstractList<Z> implements RandomAccess {
/** */
private final List<List<Z>> blocks;
/** */
private int size;
/** */
private final int maxBlockSize;
/** */
private final int shift;
/** */
private final int mask;
/**
* @param maxBlockSize Max block size.
*/
private BlockList(int maxBlockSize) {
assert U.isPow2(maxBlockSize);
this.maxBlockSize = maxBlockSize;
shift = Integer.numberOfTrailingZeros(maxBlockSize);
mask = maxBlockSize - 1;
blocks = new ArrayList<>();
blocks.add(new ArrayList<Z>());
}
/** {@inheritDoc} */
@Override public int size() {
return size;
}
/** {@inheritDoc} */
@Override public boolean add(Z z) {
size++;
List<Z> lastBlock = lastBlock();
lastBlock.add(z);
if (lastBlock.size() == maxBlockSize)
blocks.add(new ArrayList<Z>());
return true;
}
/** {@inheritDoc} */
@Override public Z get(int idx) {
return blocks.get(idx >>> shift).get(idx & mask);
}
/**
* @return Last block.
*/
private List<Z> lastBlock() {
return last(blocks);
}
/**
* @return Evicted block.
*/
private List<Z> evictFirstBlock() {
// Remove head block.
List<Z> res = blocks.remove(0);
size -= res.size();
return res;
}
}
/**
* Pollable.
*/
protected static interface Pollable<E> {
/**
* @param timeout Timeout.
* @param unit Time unit.
* @return Polled value or {@code null} if none.
* @throws InterruptedException If interrupted.
*/
E poll(long timeout, TimeUnit unit) throws InterruptedException;
}
/**
*/
private static class SourceKey {
final UUID nodeId;
/** */
final int segment;
/**
* @param nodeId Node ID.
* @param segment Segment.
*/
SourceKey(UUID nodeId, int segment) {
this.nodeId = nodeId;
this.segment = segment;
}
/** {@inheritDoc} */
@Override public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SourceKey sourceKey = (SourceKey)o;
if (segment != sourceKey.segment) return false;
return nodeId.equals(sourceKey.nodeId);
}
/** {@inheritDoc} */
@Override public int hashCode() {
int result = nodeId.hashCode();
result = 31 * result + segment;
return result;
}
}
}