/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.operator;
import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
import java.util.PriorityQueue;
import org.apache.pig.data.Tuple;
import org.codehaus.jackson.JsonNode;
import com.linkedin.cubert.block.Block;
import com.linkedin.cubert.block.BlockProperties;
import com.linkedin.cubert.block.TupleComparator;
import com.linkedin.cubert.utils.CommonUtils;
import com.linkedin.cubert.utils.JsonUtils;
/**
* TupleOperator class that combines two or more cubes, while preserving the pivot key
* ordering.
*
* @author Maneesh Varshney
*
*/
public class CombineOperator implements TupleOperator
{
private static final class PQEntry
{
Tuple tuple;
Block block;
public void next() throws IOException,
InterruptedException
{
this.tuple = this.block.next();
}
}
private static final class PQEntryComparator implements Comparator<PQEntry>
{
private final TupleComparator comparator;
PQEntryComparator(TupleComparator comparator)
{
this.comparator = comparator;
}
@Override
public int compare(PQEntry o1, PQEntry o2)
{
return comparator.compare(o1.tuple, o2.tuple);
}
}
private PriorityQueue<PQEntry> pqueue;
private PQEntry lastEntry;
private boolean fastPath = false;
private PQEntry[] entryList;
private PQEntryComparator pqcomparator;
@Override
public void setInput(Map<String, Block> input, JsonNode root, BlockProperties props) throws IOException,
InterruptedException
{
String[] pivotColumns = JsonUtils.asArray(root, "pivotBy");
TupleComparator comparator = new TupleComparator(props.getSchema(), pivotColumns);
pqcomparator = new PQEntryComparator(comparator);
lastEntry = null;
if (input.size() <= 2)
{
fastPath = true;
entryList = new PQEntry[input.size()];
int i = 0;
for (Block block : input.values())
{
PQEntry entry = new PQEntry();
entry.block = block;
entry.tuple = block.next();
entryList[i++] = entry;
}
return;
}
pqueue = new PriorityQueue<PQEntry>(input.size(), pqcomparator);
for (Block block : input.values())
{
Tuple tuple = block.next();
if (tuple == null)
continue;
PQEntry entry = new PQEntry();
entry.block = block;
entry.tuple = tuple;
pqueue.add(entry);
}
}
@Override
public Tuple next() throws IOException,
InterruptedException
{
if (fastPath)
return fastPathNext();
// if an entry was extracted previously, add it now
if (lastEntry != null)
{
lastEntry.tuple = lastEntry.block.next();
// add it back to queue, if there is data available
if (lastEntry.tuple != null)
{
pqueue.add(lastEntry);
}
lastEntry = null;
}
if (pqueue.isEmpty())
return null;
PQEntry entry = pqueue.poll();
if (entry == null)
return null;
lastEntry = entry;
return entry.tuple;
}
/**
* fast path implementation for <code> next() </code> method: iterate over a fixed
* array and return next tuple.
*/
private Tuple fastPathNext() throws RuntimeException,
IOException,
InterruptedException
{
if (lastEntry != null)
lastEntry.next();
if (entryList[0].tuple == null)
{
lastEntry = entryList[1];
}
else if (entryList[1].tuple == null)
{
lastEntry = entryList[0];
}
else
{
int cmp = pqcomparator.compare(entryList[0], entryList[1]);
lastEntry = entryList[cmp <= 0 ? 0 : 1];
}
return lastEntry.tuple;
}
@Override
public PostCondition getPostCondition(Map<String, PostCondition> preConditions,
JsonNode json) throws PreconditionException
{
String[] pivotKeys = JsonUtils.asArray(json, "pivotBy");
for (String inputBlock : preConditions.keySet())
{
PostCondition inputCondition = preConditions.get(inputBlock);
String[] sortKeys = inputCondition.getSortKeys();
if (!CommonUtils.isPrefix(sortKeys, pivotKeys))
throw new PreconditionException(PreconditionExceptionType.INVALID_SORT_KEYS,
"Block " + inputBlock);
}
// the post condition is same as the input condition
return preConditions.values().iterator().next();
}
}