/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.block;
import java.io.IOException;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import com.linkedin.cubert.utils.JsonUtils;
/**
* Writes a block in the SHUFFLE format.
*
* This creates divides the input tuple into key tuple and value tuple. The key tuple
* consists of pivoted columns, while the value tuple consists of remaining columns.
*
* @author Maneesh Varshney
*
*/
public class ShuffleBlockWriter implements BlockWriter
{
private JsonNode json;
@Override
public void configure(JsonNode json) throws JsonParseException,
JsonMappingException,
IOException
{
this.json = json;
}
@Override
public void write(Block block, CommonContext context) throws IOException,
InterruptedException
{
String[] pivotColumns = JsonUtils.asArray(json.get("pivotKeys"));
BlockSchema inputSchema = block.getProperties().getSchema();
BlockSchema outputSchema = new BlockSchema(json.get("schema"));
BlockSchema keySchema = outputSchema.getSubset(pivotColumns);
BlockSchema valueSchema = outputSchema.getComplementSubset(pivotColumns);
int[] keyFieldIndex = new int[keySchema.getNumColumns()];
int[] valueFieldIndex = new int[valueSchema.getNumColumns()];
for (int i = 0; i < keyFieldIndex.length; i++)
{
keyFieldIndex[i] = inputSchema.getIndex(keySchema.getName(i));
}
for (int i = 0; i < valueFieldIndex.length; i++)
{
valueFieldIndex[i] = inputSchema.getIndex(valueSchema.getName(i));
}
Tuple keyTuple = TupleFactory.getInstance().newTuple(keySchema.getNumColumns());
Tuple valueTuple =
TupleFactory.getInstance().newTuple(valueSchema.getNumColumns());
Tuple tuple;
while ((tuple = block.next()) != null)
{
for (int i = 0; i < keyFieldIndex.length; i++)
{
Object val = tuple.get(keyFieldIndex[i]);
keyTuple.set(i, val);
}
for (int i = 0; i < valueFieldIndex.length; i++)
{
Object val = tuple.get(valueFieldIndex[i]);
valueTuple.set(i, val);
}
context.write(keyTuple, valueTuple);
}
}
}