/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.operator;
import java.io.IOException;
import java.util.Arrays;
import java.util.Map;
import org.apache.pig.data.Tuple;
import org.codehaus.jackson.JsonNode;
import com.linkedin.cubert.block.Block;
import com.linkedin.cubert.block.BlockProperties;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.utils.CommonUtils;
import com.linkedin.cubert.utils.JsonUtils;
/**
* Tuple operator that works on groups and sorted data set to extract TOP-N tuples.
*
* @author Mani Parkhe
*/
public class TopNOperator implements TupleOperator
{
private Block inputBlock = null;
private PivotBlockOperator pivotBlockOp = null;
private Block currentBlock = null;
private int current_n = 0;
private int top_n = 1;
@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException,
InterruptedException
{
inputBlock = input.get(JsonUtils.getText(json, "input"));
if (json.has("topN"))
top_n = json.get("topN").getIntValue();
pivotBlockOp = new PivotBlockOperator();
String[] groupByColumns = JsonUtils.asArray(JsonUtils.get(json, "groupBy"));
pivotBlockOp.setInput(inputBlock, groupByColumns, false);
resetState();
}
/**
* reset state of the operator between pivot blocks
*/
private void resetState()
{
current_n = 0;
currentBlock = null;
}
@Override
public Tuple next() throws IOException,
InterruptedException
{
if (currentBlock == null)
{
if (!nextBlock())
return null;
}
// Advance pivot if this block is exhausted or top_n have been exhausted.
Tuple tuple = currentBlock.next();
if ((tuple != null) && (++current_n <= top_n))
{
return tuple;
}
resetState();
return this.next();
}
/*
* Get next pivot block.
*/
private boolean nextBlock() throws IOException,
InterruptedException
{
// Get next pivot block
currentBlock = pivotBlockOp.next();
if (null == currentBlock)
return false;
return true;
}
@Override
public PostCondition getPostCondition(Map<String, PostCondition> preConditions,
JsonNode json) throws PreconditionException
{
String inputBlock = JsonUtils.getText(json, "input");
PostCondition inputBlockPostCondition = preConditions.get(inputBlock);
BlockSchema inputSchema = inputBlockPostCondition.getSchema();
// Check: partition order
// Logic -- partition keys should match group-by keys or shud prefix group-by
// keys.
// since the sorted order check (below) will ensure that all group+order columsn
// will be
// seen together for top-N query.
String[] groupByKeys = JsonUtils.asArray(JsonUtils.get(json, "groupBy"));
String[] partitionKeys = inputBlockPostCondition.getPartitionKeys();
if (groupByKeys.length == 0 || partitionKeys.length == 0
|| !CommonUtils.isPrefix(groupByKeys, partitionKeys))
{
throw new PreconditionException(PreconditionExceptionType.INVALID_PARTITION_KEYS,
String.format("Found=%s, Expected=%s",
partitionKeys == null
? "[null]"
: Arrays.toString(partitionKeys),
groupByKeys == null
? "[null]"
: Arrays.toString(groupByKeys)));
}
// Check: all groupBy columns exist in inputBlock's schema
for (String colName : groupByKeys)
{
if (!inputSchema.hasIndex(colName))
{
String msg =
String.format("Input block '%s' is missing expected column '%s'",
inputBlock,
colName);
throw new PreconditionException(PreconditionExceptionType.COLUMN_NOT_PRESENT,
msg);
}
}
// Check: sorting order
String[] orderByKeys = JsonUtils.asArray(JsonUtils.get(json, "orderBy"));
String[] expectedSortOrder = CommonUtils.concat(groupByKeys, orderByKeys);
String[] sortKeys = inputBlockPostCondition.getSortKeys();
if (!CommonUtils.isPrefix(sortKeys, expectedSortOrder))
{
throw new PreconditionException(PreconditionExceptionType.INVALID_SORT_KEYS,
String.format("Found=%s, Expected=%s",
sortKeys == null
? "[null]"
: Arrays.toString(sortKeys),
expectedSortOrder == null
? "[null]"
: Arrays.toString(expectedSortOrder)));
}
// Check: all orderBy columns exist in inputBlock's schema
for (String colName : orderByKeys)
{
if (!inputSchema.hasIndex(colName))
{
String msg =
String.format("Input block '%s' is missing expected column '%s'",
inputBlock,
colName);
throw new PreconditionException(PreconditionExceptionType.COLUMN_NOT_PRESENT,
msg);
}
}
return inputBlockPostCondition;
}
}