/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.operator;
import java.io.IOException;
import java.util.Map;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.codehaus.jackson.JsonNode;
import com.linkedin.cubert.block.Block;
import com.linkedin.cubert.block.BlockProperties;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.Index;
import com.linkedin.cubert.block.IndexEntry;
import com.linkedin.cubert.block.TupleComparator;
import com.linkedin.cubert.utils.FileCache;
import com.linkedin.cubert.utils.JsonUtils;
import com.linkedin.cubert.utils.TupleUtils;
/**
* Validate operator
*
* Used for debugging to validate the correctness of a block generation output
*
* @author Suvodeep Pyne
*/
public class ValidateOperator implements TupleOperator
{
private Block block;
private Index index;
private String[] partitionKeys, sortKeys;
private TupleComparator comparator, partitionKeyComparator;
private Tuple prev, start, end;
private boolean first = true;
private int hashPartitionId = -1;
@Override
public void setInput(Map<String, Block> input, JsonNode json, BlockProperties props) throws IOException,
InterruptedException
{
block = input.values().iterator().next();
BlockSchema inputSchema = block.getProperties().getSchema();
partitionKeys = JsonUtils.asArray(json.get("partitionKeys"));
final JsonNode pivotKeys = json.get("pivotKeys");
sortKeys = pivotKeys == null ? null : JsonUtils.asArray(pivotKeys);
prev = TupleFactory.getInstance().newTuple(inputSchema.getNumColumns());
try
{
String indexName = JsonUtils.getText(json, "index");
index = FileCache.getCachedIndex(indexName);
}
catch (ClassNotFoundException e)
{
throw new RuntimeException(e);
}
if (index == null)
{
throw new RuntimeException("Cannot load index for ["
+ JsonUtils.getText(json, "indexName") + "]");
}
if (getBlockId() == -1)
{
throw new RuntimeException("Block ID is -1. Block Creation pending / Data loaded from AVRO");
}
IndexEntry startEntry = index.getEntry(getBlockId());
IndexEntry endEntry = index.getNextEntry(getBlockId());
final BlockSchema partitionKeysSchema = inputSchema.getSubset(partitionKeys);
partitionKeyComparator = new TupleComparator(partitionKeysSchema, partitionKeys);
comparator =
new TupleComparator(getSchema(), sortKeys == null ? partitionKeys
: sortKeys);
start = startEntry.getKey();
end = endEntry == null ? null : endEntry.getKey();
}
@Override
public Tuple next() throws IOException,
InterruptedException
{
Tuple next = block.next();
/* Exhausted the pipeline. Return */
if (next == null)
return null;
Tuple partitionSubset = TupleUtils.extractTuple(next, getSchema(), partitionKeys);
/**
* The Reducer ID is computed as hashcode(key) % number of partitions. Validate
* that all tuples in the blockk should hold the same value.
*/
if (!first && hashPartitionId != index.getReducerId(partitionSubset))
{
throw new RuntimeException("VALIDATE Failed: hashPartitionId doesn't match for Tuple: "
+ next);
}
/**
* Validate that the block ID obtained from the block matches the one retrieved
* from the Index
*/
if (index.getBlockId(partitionSubset) != getBlockId())
{
throw new RuntimeException("VALIDATE Failed: Index entry for Tuple" + next
+ " is inconsistent with Block: " + getBlockId());
}
/**
* Validate that the tuples are actually sorted according to Tuple Comparator
*/
if (!first && comparator.compare(prev, next) > 0)
{
throw new RuntimeException("VALIDATE Failed: Tuples sorted incorrectly. blockId: "
+ getBlockId());
}
/**
* Validate that all keys in the block lie within the start and end boundaries.
* Note that the last block in the reducer doesn't know its boundary key in which
* case end is null
*/
if (partitionKeyComparator.compare(start, partitionSubset) > 0
|| (end != null && partitionKeyComparator.compare(end, partitionSubset) < 0))
{
throw new RuntimeException("VALIDATE Failed: Tuple" + end
+ " not within bounds");
}
if (first)
{
/* Update the reducer ID once. */
hashPartitionId = index.getReducerId(partitionSubset);
first = false;
}
TupleUtils.copy(next, prev);
return next;
}
public long getBlockId()
{
return block.getProperties().getBlockId();
}
public Tuple getPartitionKey()
{
return block.getProperties().getPartitionKey();
}
public BlockSchema getSchema()
{
return block.getProperties().getSchema();
}
@Override
public PostCondition getPostCondition(Map<String, PostCondition> preConditions,
JsonNode json) throws PreconditionException
{
return preConditions.values().iterator().next();
}
}