/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.operator.cube; import com.linkedin.cubert.memory.LongArrayList; import java.io.IOException; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.codehaus.jackson.JsonNode; import com.linkedin.cubert.block.Block; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.DataType; import com.linkedin.cubert.operator.PreconditionException; import com.linkedin.cubert.utils.JsonUtils; /** * Builtin default implementation of a {@link CubeAggregator} that aggregates input * columns using a specified {@link ValueAggregator}. * <p> * Implementation notes: this object creates a long[] array for storing aggregates for the * value cuboids. * * @see DefaultDupleCubeAggregator * @author Maneesh Varshney * */ public class DefaultCubeAggregator implements CubeAggregator { // dummyObject is used when we cannot determine the input column (e.g. COUNT() does // not specify an input column name) protected static final Object dummyObject = new Object(); // the array to store aggregated results for the value cuboids protected final LongArrayList valueTable; // allocation size for cuboid store private int size; // the value of the aggregate column in the input tuple protected Object currentValue; // the index of the column to aggregate in the input tuple protected int valueIndex; // the data type of the column to aggregate protected DataType valueType; // the index of the output aggregate field in the output tuple protected int outIndex; // the ValueAggregator for aggregating values protected ValueAggregator aggregator; public DefaultCubeAggregator(ValueAggregator aggregator) { this.aggregator = aggregator; valueTable = new LongArrayList(); valueTable.setDefaultValue(aggregator.initialValue()); } @Override public void setup(Block block, BlockSchema outputSchema, JsonNode json) throws IOException { BlockSchema inputSchema = block.getProperties().getSchema(); // determine the input column name and index within the input tuple String inColName = null; if (json.has("input") && !json.get("input").isNull()) inColName = JsonUtils.asArray(json, "input")[0]; if (inColName != null) valueIndex = inputSchema.getIndex(inColName); else valueIndex = -1; // determine the name of output column name the index within the output tuple String outColName = JsonUtils.getText(json, "output"); outIndex = outputSchema.getIndex(outColName); } @Override public void allocate(int size) { // delayed allocation this.size = size; } @Override public void clear() { valueTable.reset(); // allocate and initialize the long array valueTable.ensureCapacity(size); } @Override public void processTuple(Tuple tuple) throws ExecException { // if we don't know the input value, use the dummyObject currentValue = (valueIndex == -1) ? dummyObject : tuple.get(valueIndex); } @Override public void aggregate(int index) { if (currentValue == null) return; valueTable.ensureCapacity(index); valueTable.updateLong(index, aggregator.aggregate(valueTable.getLong(index), currentValue)); } @Override public void outputTuple(Tuple outputTuple, int index) throws ExecException { outputTuple.set(outIndex, aggregator.output(valueTable.getLong(index))); } @Override public BlockSchema outputSchema(BlockSchema inputSchema, JsonNode json) throws PreconditionException { String str = String.format("%s %s", aggregator.outputType(), JsonUtils.getText(json, "output")); return new BlockSchema(str); } }