/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.window;
import com.sun.codemodel.JExpr;
import com.sun.codemodel.JExpression;
import com.sun.codemodel.JInvocation;
import com.sun.codemodel.JVar;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.expression.FunctionCall;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.logical.data.NamedExpression;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.compile.sig.GeneratorMapping;
import org.apache.drill.exec.compile.sig.MappingSet;
import org.apache.drill.exec.exception.SchemaChangeException;
import org.apache.drill.exec.expr.ClassGenerator;
import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
import org.apache.drill.exec.expr.ValueVectorReadExpression;
import org.apache.drill.exec.expr.ValueVectorWriteExpression;
import org.apache.drill.exec.expr.fn.FunctionLookupContext;
import org.apache.drill.exec.physical.config.WindowPOP;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.record.VectorContainer;
public abstract class WindowFunction {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(WindowFunction.class);
public enum Type {
ROW_NUMBER,
RANK,
DENSE_RANK,
PERCENT_RANK,
CUME_DIST,
LEAD,
LAG,
FIRST_VALUE,
LAST_VALUE,
NTILE,
AGGREGATE
}
final Type type;
WindowFunction(Type type) {
this.type = type;
}
static WindowFunction fromExpression(final FunctionCall call) {
final String name = call.getName();
Type type;
try {
type = Type.valueOf(name.toUpperCase());
} catch (IllegalArgumentException e) {
type = Type.AGGREGATE;
}
switch (type) {
case AGGREGATE:
return new WindowAggregate();
case LEAD:
return new Lead();
case LAG:
return new Lag();
case FIRST_VALUE:
return new FirstValue();
case LAST_VALUE:
return new LastValue();
case NTILE:
return new Ntile();
default:
return new Ranking(type);
}
}
abstract void generateCode(final ClassGenerator<WindowFramer> cg);
abstract boolean supportsCustomFrames();
/**
* @param pop window group definition
* @return true if this window function requires all batches of current partition to be available before processing
* the first batch
*/
public boolean requiresFullPartition(final WindowPOP pop) {
return true;
}
/**
* @param numBatchesAvailable number of batches available for current partition
* @param pop window group definition
* @param frameEndReached we found the last row of the first batch's frame
* @param partitionEndReached all batches of current partition are available
*
* @return true if this window function can process the first batch immediately
*/
public boolean canDoWork(final int numBatchesAvailable, final WindowPOP pop, final boolean frameEndReached,
final boolean partitionEndReached) {
return partitionEndReached;
}
abstract boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException;
static class WindowAggregate extends WindowFunction {
private ValueVectorWriteExpression writeAggregationToOutput;
WindowAggregate() {
super(Type.AGGREGATE);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException {
final LogicalExpression aggregate = ExpressionTreeMaterializer.materializeAndCheckErrors(ne.getExpr(), batch, registry);
if (aggregate == null) {
return false;
}
// add corresponding ValueVector to container
final MaterializedField output = MaterializedField.create(ne.getRef().getAsNamePart().getName(), aggregate.getMajorType());
batch.addOrGet(output).allocateNew();
TypedFieldId outputId = batch.getValueVectorId(ne.getRef());
writeAggregationToOutput = new ValueVectorWriteExpression(outputId, aggregate, true);
return true;
}
@Override
void generateCode(ClassGenerator<WindowFramer> cg) {
final GeneratorMapping EVAL_INSIDE = GeneratorMapping.create("setupEvaluatePeer", "evaluatePeer", null, null);
final GeneratorMapping EVAL_OUTSIDE = GeneratorMapping.create("setupPartition", "outputRow", "resetValues", "cleanup");
final MappingSet mappingSet = new MappingSet("index", "outIndex", EVAL_INSIDE, EVAL_OUTSIDE, EVAL_INSIDE);
cg.setMappingSet(mappingSet);
cg.addExpr(writeAggregationToOutput);
}
@Override
public boolean requiresFullPartition(final WindowPOP pop) {
return pop.getOrderings().isEmpty() || pop.getEnd().isUnbounded();
}
@Override
public boolean canDoWork(int numBatchesAvailable, WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
return partitionEndReached || (!requiresFullPartition(pop) && frameEndReached);
}
@Override
boolean supportsCustomFrames() {
return true;
}
}
static class Ranking extends WindowFunction {
protected TypedFieldId fieldId;
Ranking(final Type type) {
super(type);
}
private TypeProtos.MajorType getMajorType() {
if (type == Type.CUME_DIST || type == Type.PERCENT_RANK) {
return Types.required(TypeProtos.MinorType.FLOAT8);
}
return Types.required(TypeProtos.MinorType.BIGINT);
}
private String getName() {
return type.name().toLowerCase();
}
@Override
void generateCode(ClassGenerator<WindowFramer> cg) {
final GeneratorMapping mapping = GeneratorMapping.create("setupPartition", "outputRow", "resetValues", "cleanup");
final MappingSet mappingSet = new MappingSet(null, "outIndex", mapping, mapping);
cg.setMappingSet(mappingSet);
final JVar vv = cg.declareVectorValueSetupAndMember(cg.getMappingSet().getOutgoing(), fieldId);
final JExpression outIndex = cg.getMappingSet().getValueWriteIndex();
JInvocation setMethod = vv.invoke("getMutator").invoke("setSafe").arg(outIndex).arg(JExpr.direct("partition." + getName()));
cg.getEvalBlock().add(setMethod);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, FunctionLookupContext registry)
throws SchemaChangeException {
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), getMajorType());
batch.addOrGet(outputField).allocateNew();
fieldId = batch.getValueVectorId(ne.getRef());
return true;
}
@Override
public boolean requiresFullPartition(final WindowPOP pop) {
// CUME_DIST, PERCENT_RANK and NTILE require the length of current partition before processing it's first batch
return type == Type.CUME_DIST || type == Type.PERCENT_RANK || type == Type.NTILE;
}
@Override
public boolean canDoWork(int numBatchesAvailable, final WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
assert numBatchesAvailable > 0 : "canDoWork() should not be called when numBatchesAvailable == 0";
if (type == Type.ROW_NUMBER) {
// row_number doesn't need to wait for anything
return true;
}
if (type == Type.RANK) {
// rank only works if we know how many rows we have in the current frame
// we could avoid this, but it requires more refactoring
return frameEndReached;
}
// for CUME_DIST, PERCENT_RANK and NTILE we need the full partition
return partitionEndReached;
}
@Override
boolean supportsCustomFrames() {
return false;
}
}
static class Ntile extends Ranking {
private int numTiles;
public Ntile() {
super(Type.NTILE);
}
private int numTilesFromExpression(LogicalExpression numTilesExpr) {
if ((numTilesExpr instanceof ValueExpressions.IntExpression)) {
int nt = ((ValueExpressions.IntExpression) numTilesExpr).getInt();
if (nt > 0) {
return nt;
}
}
throw UserException.functionError().message("NTILE only accepts positive integer argument").build(logger);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException {
final FunctionCall call = (FunctionCall) ne.getExpr();
final LogicalExpression argument = call.args.get(0);
final MaterializedField outputField = MaterializedField.create(ne.getRef().getAsNamePart().getName(), argument.getMajorType());
batch.addOrGet(outputField).allocateNew();
fieldId = batch.getValueVectorId(ne.getRef());
numTiles = numTilesFromExpression(argument);
return true;
}
@Override
void generateCode(ClassGenerator<WindowFramer> cg) {
final GeneratorMapping mapping = GeneratorMapping.create("setupPartition", "outputRow", "resetValues", "cleanup");
final MappingSet mappingSet = new MappingSet(null, "outIndex", mapping, mapping);
cg.setMappingSet(mappingSet);
final JVar vv = cg.declareVectorValueSetupAndMember(cg.getMappingSet().getOutgoing(), fieldId);
final JExpression outIndex = cg.getMappingSet().getValueWriteIndex();
JInvocation setMethod = vv.invoke("getMutator").invoke("setSafe").arg(outIndex)
.arg(JExpr.direct("partition.ntile(" + numTiles + ")"));
cg.getEvalBlock().add(setMethod);
}
}
static class Lead extends WindowFunction {
private LogicalExpression writeInputToLead;
public Lead() {
super(Type.LEAD);
}
@Override
void generateCode(ClassGenerator<WindowFramer> cg) {
final GeneratorMapping mapping = GeneratorMapping.create("setupCopyNext", "copyNext", null, null);
final MappingSet eval = new MappingSet("inIndex", "outIndex", mapping, mapping);
cg.setMappingSet(eval);
cg.addExpr(writeInputToLead);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException {
final FunctionCall call = (FunctionCall) ne.getExpr();
final LogicalExpression input = ExpressionTreeMaterializer.materializeAndCheckErrors(call.args.get(0), batch, registry);
if (input == null) {
return false;
}
// make sure output vector type is Nullable, because we will write a null value in the first row of each partition
TypeProtos.MajorType majorType = input.getMajorType();
if (majorType.getMode() == TypeProtos.DataMode.REQUIRED) {
majorType = Types.optional(majorType.getMinorType());
}
// add corresponding ValueVector to container
final MaterializedField output = MaterializedField.create(ne.getRef().getAsNamePart().getName(), majorType);
batch.addOrGet(output).allocateNew();
final TypedFieldId outputId = batch.getValueVectorId(ne.getRef());
writeInputToLead = new ValueVectorWriteExpression(outputId, input, true);
return true;
}
@Override
public boolean requiresFullPartition(final WindowPOP pop) {
return false;
}
@Override
public boolean canDoWork(int numBatchesAvailable, final WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
return partitionEndReached || numBatchesAvailable > 1;
}
@Override
boolean supportsCustomFrames() {
return false;
}
}
static class Lag extends WindowFunction {
private LogicalExpression writeLagToLag;
private LogicalExpression writeInputToLag;
Lag() {
super(Type.LAG);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException {
final FunctionCall call = (FunctionCall) ne.getExpr();
final LogicalExpression input = ExpressionTreeMaterializer.materializeAndCheckErrors(call.args.get(0), batch, registry);
if (input == null) {
return false;
}
// make sure output vector type is Nullable, because we will write a null value in the first row of each partition
TypeProtos.MajorType majorType = input.getMajorType();
if (majorType.getMode() == TypeProtos.DataMode.REQUIRED) {
majorType = Types.optional(majorType.getMinorType());
}
// add lag output ValueVector to container
final MaterializedField output = MaterializedField.create(ne.getRef().getAsNamePart().getName(), majorType);
batch.addOrGet(output).allocateNew();
final TypedFieldId outputId = batch.getValueVectorId(ne.getRef());
writeInputToLag = new ValueVectorWriteExpression(outputId, input, true);
writeLagToLag = new ValueVectorWriteExpression(outputId, new ValueVectorReadExpression(outputId), true);
return true;
}
@Override
void generateCode(ClassGenerator<WindowFramer> cg) {
{
// generating lag copyFromInternal
final GeneratorMapping mapping = GeneratorMapping.create("setupCopyFromInternal", "copyFromInternal", null, null);
final MappingSet mappingSet = new MappingSet("inIndex", "outIndex", mapping, mapping);
cg.setMappingSet(mappingSet);
cg.addExpr(writeLagToLag);
}
{
// generating lag copyPrev
final GeneratorMapping mapping = GeneratorMapping.create("setupCopyPrev", "copyPrev", null, null);
final MappingSet eval = new MappingSet("inIndex", "outIndex", mapping, mapping);
cg.setMappingSet(eval);
cg.addExpr(writeInputToLag);
}
}
@Override
public boolean requiresFullPartition(final WindowPOP pop) {
return false;
}
@Override
public boolean canDoWork(int numBatchesAvailable, final WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
assert numBatchesAvailable > 0 : "canDoWork() should not be called when numBatchesAvailable == 0";
return true;
}
@Override
boolean supportsCustomFrames() {
return false;
}
}
static class LastValue extends WindowFunction {
private LogicalExpression writeSourceToLastValue;
LastValue() {
super(Type.LAST_VALUE);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException {
final FunctionCall call = (FunctionCall) ne.getExpr();
final LogicalExpression input = ExpressionTreeMaterializer.materializeAndCheckErrors(call.args.get(0), batch, registry);
if (input == null) {
return false;
}
final MaterializedField output = MaterializedField.create(ne.getRef().getAsNamePart().getName(), input.getMajorType());
batch.addOrGet(output).allocateNew();
final TypedFieldId outputId = batch.getValueVectorId(ne.getRef());
// write incoming.source[inIndex] to outgoing.last_value[outIndex]
writeSourceToLastValue = new ValueVectorWriteExpression(outputId, input, true);
return true;
}
@Override
void generateCode(ClassGenerator<WindowFramer> cg) {
// in DefaultFrameTemplate we call setupReadLastValue:
// setupReadLastValue(current, container)
// and readLastValue:
// writeLastValue(frameLastRow, row)
//
// this will generate the the following, pseudo, code:
// write current.source_last_value[frameLastRow] to container.last_value[row]
final GeneratorMapping mapping = GeneratorMapping.create("setupReadLastValue", "writeLastValue", "resetValues", "cleanup");
final MappingSet mappingSet = new MappingSet("index", "outIndex", mapping, mapping);
cg.setMappingSet(mappingSet);
cg.addExpr(writeSourceToLastValue);
}
@Override
public boolean requiresFullPartition(final WindowPOP pop) {
return pop.getOrderings().isEmpty() || pop.getEnd().isUnbounded();
}
@Override
public boolean canDoWork(int numBatchesAvailable, WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
return partitionEndReached || (!requiresFullPartition(pop) && frameEndReached);
}
@Override
boolean supportsCustomFrames() {
return true;
}
}
static class FirstValue extends WindowFunction {
private LogicalExpression writeInputToFirstValue;
private LogicalExpression writeFirstValueToFirstValue;
FirstValue() {
super(Type.FIRST_VALUE);
}
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final FunctionLookupContext registry)
throws SchemaChangeException {
final FunctionCall call = (FunctionCall) ne.getExpr();
final LogicalExpression input = ExpressionTreeMaterializer.materializeAndCheckErrors(call.args.get(0), batch, registry);
if (input == null) {
return false;
}
final MaterializedField output = MaterializedField.create(ne.getRef().getAsNamePart().getName(), input.getMajorType());
batch.addOrGet(output).allocateNew();
final TypedFieldId outputId = batch.getValueVectorId(ne.getRef());
// write incoming.first_value[inIndex] to outgoing.first_value[outIndex]
writeFirstValueToFirstValue = new ValueVectorWriteExpression(outputId, new ValueVectorReadExpression(outputId), true);
// write incoming.source[inIndex] to outgoing.first_value[outIndex]
writeInputToFirstValue = new ValueVectorWriteExpression(outputId, input, true);
return true;
}
@Override
void generateCode(final ClassGenerator<WindowFramer> cg) {
{
// in DefaultFrameTemplate we call setupSaveFirstValue:
// setupSaveFirstValue(current, internal)
// and saveFirstValue:
// saveFirstValue(currentRow, 0)
//
// this will generate the the following, pseudo, code:
// write current.source[currentRow] to internal.first_value[0]
//
// so it basically copies the first value of current partition into the first row of internal.first_value
// this is especially useful when handling multiple batches for the same partition where we need to keep
// the first value of the partition somewhere after we release the first batch
final GeneratorMapping mapping = GeneratorMapping.create("setupSaveFirstValue", "saveFirstValue", null, null);
final MappingSet mappingSet = new MappingSet("index", "0", mapping, mapping);
cg.setMappingSet(mappingSet);
cg.addExpr(writeInputToFirstValue);
}
{
// in DefaultFrameTemplate we call setupWriteFirstValue:
// setupWriteFirstValue(internal, container)
// and outputRow:
// outputRow(outIndex)
//
// this will generate the the following, pseudo, code:
// write internal.first_value[0] to container.first_value[outIndex]
//
// so it basically copies the value stored in internal.first_value's first row into all rows of container.first_value
final GeneratorMapping mapping = GeneratorMapping.create("setupWriteFirstValue", "outputRow", "resetValues", "cleanup");
final MappingSet mappingSet = new MappingSet("0", "outIndex", mapping, mapping);
cg.setMappingSet(mappingSet);
cg.addExpr(writeFirstValueToFirstValue);
}
}
@Override
public boolean requiresFullPartition(final WindowPOP pop) {
return false;
}
@Override
public boolean canDoWork(int numBatchesAvailable, WindowPOP pop, boolean frameEndReached, boolean partitionEndReached) {
assert numBatchesAvailable > 0 : "canDoWork() should not be called when numBatchesAvailable == 0";
return true;
}
@Override
boolean supportsCustomFrames() {
return true;
}
}
}