/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.join;
import java.io.IOException;
import java.util.List;
import javax.inject.Named;
import org.apache.drill.exec.exception.ClassTransformationException;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.physical.impl.common.HashTable;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.RecordBatch;
import org.apache.drill.exec.record.RecordBatch.IterOutcome;
import org.apache.drill.exec.record.VectorContainer;
import org.apache.drill.exec.record.VectorWrapper;
import org.apache.calcite.rel.core.JoinRelType;
public abstract class HashJoinProbeTemplate implements HashJoinProbe {
// Probe side record batch
private RecordBatch probeBatch;
private BatchSchema probeSchema;
private VectorContainer buildBatch;
// Join type, INNER, LEFT, RIGHT or OUTER
private JoinRelType joinType;
private HashJoinBatch outgoingJoinBatch = null;
private static final int TARGET_RECORDS_PER_BATCH = 4000;
/* Helper class
* Maintains linked list of build side records with the same key
* Keeps information about which build records have a corresponding
* matching key in the probe side (for outer, right joins)
*/
private HashJoinHelper hjHelper = null;
// Underlying hashtable used by the hash join
private HashTable hashTable = null;
// Number of records to process on the probe side
private int recordsToProcess = 0;
// Number of records processed on the probe side
private int recordsProcessed = 0;
// Number of records in the output container
private int outputRecords;
// Indicate if we should drain the next record from the probe side
private boolean getNextRecord = true;
// Contains both batch idx and record idx of the matching record in the build side
private int currentCompositeIdx = -1;
// Current state the hash join algorithm is in
private ProbeState probeState = ProbeState.PROBE_PROJECT;
// For outer or right joins, this is a list of unmatched records that needs to be projected
private List<Integer> unmatchedBuildIndexes = null;
@Override
public void setupHashJoinProbe(FragmentContext context, VectorContainer buildBatch, RecordBatch probeBatch,
int probeRecordCount, HashJoinBatch outgoing, HashTable hashTable,
HashJoinHelper hjHelper, JoinRelType joinRelType) {
this.probeBatch = probeBatch;
this.probeSchema = probeBatch.getSchema();
this.buildBatch = buildBatch;
this.joinType = joinRelType;
this.recordsToProcess = probeRecordCount;
this.hashTable = hashTable;
this.hjHelper = hjHelper;
this.outgoingJoinBatch = outgoing;
doSetup(context, buildBatch, probeBatch, outgoing);
}
public void executeProjectRightPhase() {
while (outputRecords < TARGET_RECORDS_PER_BATCH && recordsProcessed < recordsToProcess) {
projectBuildRecord(unmatchedBuildIndexes.get(recordsProcessed), outputRecords);
recordsProcessed++;
outputRecords++;
}
}
public void executeProbePhase() throws SchemaChangeException {
while (outputRecords < TARGET_RECORDS_PER_BATCH && probeState != ProbeState.DONE && probeState != ProbeState.PROJECT_RIGHT) {
// Check if we have processed all records in this batch we need to invoke next
if (recordsProcessed == recordsToProcess) {
// Done processing all records in the previous batch, clean up!
for (VectorWrapper<?> wrapper : probeBatch) {
wrapper.getValueVector().clear();
}
IterOutcome leftUpstream = outgoingJoinBatch.next(HashJoinHelper.LEFT_INPUT, probeBatch);
switch (leftUpstream) {
case NONE:
case NOT_YET:
case STOP:
recordsProcessed = 0;
recordsToProcess = 0;
probeState = ProbeState.DONE;
// We are done with the probe phase. If its a RIGHT or a FULL join get the unmatched indexes from the build side
if (joinType == JoinRelType.RIGHT || joinType == JoinRelType.FULL) {
probeState = ProbeState.PROJECT_RIGHT;
}
continue;
case OK_NEW_SCHEMA:
if (probeBatch.getSchema().equals(probeSchema)) {
doSetup(outgoingJoinBatch.getContext(), buildBatch, probeBatch, outgoingJoinBatch);
hashTable.updateBatches();
} else {
throw SchemaChangeException.schemaChanged("Hash join does not support schema changes in probe side.",
probeSchema,
probeBatch.getSchema());
}
case OK:
recordsToProcess = probeBatch.getRecordCount();
recordsProcessed = 0;
// If we received an empty batch do nothing
if (recordsToProcess == 0) {
continue;
}
}
}
int probeIndex = -1;
// Check if we need to drain the next row in the probe side
if (getNextRecord) {
if (hashTable != null) {
probeIndex = hashTable.containsKey(recordsProcessed, true);
}
if (probeIndex != -1) {
/* The current probe record has a key that matches. Get the index
* of the first row in the build side that matches the current key
*/
currentCompositeIdx = hjHelper.getStartIndex(probeIndex);
/* Record in the build side at currentCompositeIdx has a matching record in the probe
* side. Set the bit corresponding to this index so if we are doing a FULL or RIGHT
* join we keep track of which records we need to project at the end
*/
hjHelper.setRecordMatched(currentCompositeIdx);
projectBuildRecord(currentCompositeIdx, outputRecords);
projectProbeRecord(recordsProcessed, outputRecords);
outputRecords++;
/* Projected single row from the build side with matching key but there
* may be more rows with the same key. Check if that's the case
*/
currentCompositeIdx = hjHelper.getNextIndex(currentCompositeIdx);
if (currentCompositeIdx == -1) {
/* We only had one row in the build side that matched the current key
* from the probe side. Drain the next row in the probe side.
*/
recordsProcessed++;
} else {
/* There is more than one row with the same key on the build side
* don't drain more records from the probe side till we have projected
* all the rows with this key
*/
getNextRecord = false;
}
} else { // No matching key
// If we have a left outer join, project the keys
if (joinType == JoinRelType.LEFT || joinType == JoinRelType.FULL) {
projectProbeRecord(recordsProcessed, outputRecords);
outputRecords++;
}
recordsProcessed++;
}
} else {
hjHelper.setRecordMatched(currentCompositeIdx);
projectBuildRecord(currentCompositeIdx, outputRecords);
projectProbeRecord(recordsProcessed, outputRecords);
outputRecords++;
currentCompositeIdx = hjHelper.getNextIndex(currentCompositeIdx);
if (currentCompositeIdx == -1) {
// We don't have any more rows matching the current key on the build side, move on to the next probe row
getNextRecord = true;
recordsProcessed++;
}
}
}
}
public int probeAndProject() throws SchemaChangeException, ClassTransformationException, IOException {
outputRecords = 0;
if (probeState == ProbeState.PROBE_PROJECT) {
executeProbePhase();
}
if (probeState == ProbeState.PROJECT_RIGHT) {
// We are here because we have a RIGHT OUTER or a FULL join
if (unmatchedBuildIndexes == null) {
// Initialize list of build indexes that didn't match a record on the probe side
unmatchedBuildIndexes = hjHelper.getNextUnmatchedIndex();
recordsToProcess = unmatchedBuildIndexes.size();
recordsProcessed = 0;
}
// Project the list of unmatched records on the build side
executeProjectRightPhase();
}
return outputRecords;
}
public abstract void doSetup(@Named("context") FragmentContext context, @Named("buildBatch") VectorContainer buildBatch, @Named("probeBatch") RecordBatch probeBatch,
@Named("outgoing") RecordBatch outgoing);
public abstract void projectBuildRecord(@Named("buildIndex") int buildIndex, @Named("outIndex") int outIndex);
public abstract void projectProbeRecord(@Named("probeIndex") int probeIndex, @Named("outIndex") int outIndex);
}