/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.execution.steps;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.LongStream;
import org.diqube.execution.consumers.AbstractThreadedRowIdConsumer;
import org.diqube.execution.consumers.DoneConsumer;
import org.diqube.execution.consumers.GenericConsumer;
import org.diqube.execution.consumers.RowIdConsumer;
import org.diqube.execution.exception.ExecutablePlanBuildException;
import org.diqube.executionenv.ExecutionEnvironment;
import org.diqube.queries.QueryRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A logical NOT on a row ID step.
*
* <p>
* Executing this step is fairly expensive, as it first needs to collect all row IDs of the input {@link RowIdConsumer}
* then realize all possible row IDs and them remove those reported. This might take a bit of memory, but definitely
* slows down the execution as we have to wait first. The optimizer should try to minimize the number of NotSteps.
*
* <p>
* Input: 1 {@link RowIdConsumer}s. <br>
* Output: {@link RowIdConsumer}.
*
* @author Bastian Gloeckle
*/
public class RowIdNotStep extends AbstractThreadedExecutablePlanStep {
private static final Logger logger = LoggerFactory.getLogger(RowIdNotStep.class);
private AtomicBoolean sourceIsEmpty = new AtomicBoolean(false);
private ConcurrentLinkedDeque<Long> rowIds = new ConcurrentLinkedDeque<>();
private AbstractThreadedRowIdConsumer rowIdConsumer = new AbstractThreadedRowIdConsumer(this) {
@Override
public void allSourcesAreDone() {
RowIdNotStep.this.sourceIsEmpty.set(true);
}
@Override
protected void doConsume(Long[] rowIds) {
for (long rowId : rowIds)
RowIdNotStep.this.rowIds.add(rowId);
}
};
private ExecutionEnvironment defaultEnv;
public RowIdNotStep(int stepId, QueryRegistry queryRegistry, ExecutionEnvironment defaultEnv) {
super(stepId, queryRegistry);
this.defaultEnv = defaultEnv;
}
@Override
public void initialize() {
if (defaultEnv.getNumberOfRowsInShard() == -1L)
throw new ExecutablePlanBuildException("NOT step only supported if there's a TableShard.");
}
@Override
protected void execute() {
if (sourceIsEmpty.get()) {
long lowestRowId = defaultEnv.getFirstRowIdInShard();
long numberOfRows = defaultEnv.getNumberOfRowsInShard();
Set<Long> rowIdSet = new HashSet<Long>(rowIds);
Long[] resultRowIds = LongStream.range(lowestRowId, lowestRowId + numberOfRows).filter(l -> !rowIdSet.contains(l))
.mapToObj(Long::valueOf).toArray(l -> new Long[l]);
forEachOutputConsumerOfType(RowIdConsumer.class, c -> c.consume(resultRowIds));
logger.trace("Reported {} matching rows", resultRowIds.length);
forEachOutputConsumerOfType(GenericConsumer.class, c -> c.sourceIsDone());
doneProcessing();
}
}
@Override
protected void validateOutputConsumer(GenericConsumer consumer) throws IllegalArgumentException {
if (!(consumer instanceof DoneConsumer) && !(consumer instanceof RowIdConsumer))
throw new IllegalArgumentException("Only RowIdConsumer supported.");
}
@Override
protected List<GenericConsumer> inputConsumers() {
return Arrays.asList(new GenericConsumer[] { rowIdConsumer });
}
@Override
protected String getAdditionalToStringDetails() {
return null;
}
}