/* (c) 2014 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package com.linkedin.cubert.analyzer.physical;
import com.linkedin.cubert.block.BlockSchema;
import com.linkedin.cubert.block.ColumnType;
import com.linkedin.cubert.block.DataType;
import com.linkedin.cubert.functions.builtin.FunctionFactory;
import com.linkedin.cubert.io.rubix.RubixConstants;
import com.linkedin.cubert.operator.BlockOperator;
import com.linkedin.cubert.operator.OperatorFactory;
import com.linkedin.cubert.operator.OperatorType;
import com.linkedin.cubert.operator.PivotBlockOperator;
import com.linkedin.cubert.operator.PostCondition;
import com.linkedin.cubert.operator.PreconditionException;
import com.linkedin.cubert.operator.PreconditionExceptionType;
import com.linkedin.cubert.operator.TupleOperator;
import com.linkedin.cubert.plan.physical.CubertCombiner;
import com.linkedin.cubert.utils.CommonUtils;
import com.linkedin.cubert.utils.JsonUtils;
import com.linkedin.cubert.utils.SchemaUtils;
import com.linkedin.cubert.utils.print;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.ObjectNode;
import static com.linkedin.cubert.utils.JsonUtils.asArray;
import static com.linkedin.cubert.utils.JsonUtils.getText;
/**
* Sematically analyzes the cubert physical plan.
*
* @author Maneesh Varshney
*
*/
public class SemanticAnalyzer extends PhysicalPlanVisitor implements PlanRewriter
{
public static final class Node
{
JsonNode json;
PostCondition condition;
Node child;
ArrayList<Node> parents = new ArrayList<Node>();
Node next;
public Node(JsonNode json, PostCondition condition)
{
this.json = json;
this.condition = condition;
}
public void setChild(Node child)
{
this.child = child;
}
public void addParent(Node parent)
{
this.parents.add(parent);
}
@Override
public String toString()
{
String str = json.toString();
if (child != null)
{
str += "\n " + child.toString();
}
return str;
}
public JsonNode getOperatorJson()
{
return this.json;
}
public PostCondition getPostCondition()
{
return this.condition;
}
public Node getNext()
{
// TODO Auto-generated method stub
return next;
}
}
private Node linkedListHead;
private final Map<String, PostCondition> datasetConditions =
new HashMap<String, PostCondition>();
private Node lastShuffleNode;
private boolean hasErrors = false;
private String[] blockIndexJoinKeys;
private boolean revisit = false;
private final Map<String, Node> operatorMap = new HashMap<String, Node>();
private boolean debugMode = false;
// List of final operator node from each mapper
// The number of entries in this list is equal to # multimappers
List<Node> mapOutputNodes = new ArrayList<Node>();
public Node getNodeInformation()
{
return linkedListHead;
}
@Override
public JsonNode rewrite(JsonNode plan,
Set<String> namesUsed,
boolean debugMode,
boolean revisit) throws IOException
{
this.revisit = revisit;
this.debugMode = debugMode;
new PhysicalPlanWalker(plan, this).walk();
return hasErrors ? null : plan;
}
@Override
public void enterProgram(JsonNode json)
{
if (json.has("input") && !json.get("input").isNull())
{
JsonNode programInput = json.get("input");
Iterator<String> it = programInput.getFieldNames();
while (it.hasNext())
{
String input = it.next();
JsonNode inputJson = programInput.get(input);
BlockSchema schema = new BlockSchema(inputJson.get("schema"));
PostCondition condition = null;
if (inputJson.has("partitionKeys") && inputJson.has("sortKeys"))
condition =
new PostCondition(schema,
JsonUtils.asArray(inputJson.get("partitionKeys")),
JsonUtils.asArray(inputJson.get("sortKeys")));
else
condition = new PostCondition(schema, null, null);
datasetConditions.put(input, condition);
}
}
}
@Override
public void enterJob(JsonNode json)
{
print.f("Analyzing job [%s]...", getText(json, "name"));
if (!json.has("jobType")
|| !getText(json, "jobType").equals("GENERATE_DICTIONARY"))
{
int numReducers = json.get("reducers").getIntValue();
if (numReducers < 0)
{
error(null,
"Invalid negative number of reducers in job "
+ getText(json, "name"));
}
else if (numReducers == 0)
{
if (json.has("shuffle") && !json.get("shuffle").isNull())
error(null, "Job [" + getText(json, "name")
+ "] is configured with 0 reducers, but has shuffle operator");
if (json.has("reduce") && !json.get("reduce").isNull())
error(null, "Job [" + getText(json, "name")
+ "] is configured with 0 reducers, but has reduce operators");
}
else
{
if (!json.has("shuffle") || json.get("shuffle").isNull())
error(null,
"Job ["
+ getText(json, "name")
+ "] is configured with reducers, but does not define shuffle operator");
if (!json.has("reduce") || json.get("reduce").isNull())
error(null,
"Job ["
+ getText(json, "name")
+ "] is configured with reducers, but does not define reduce operators");
}
}
lastShuffleNode = null;
linkedListHead = null;
blockIndexJoinKeys = null;
operatorMap.clear();
mapOutputNodes.clear();
}
@Override
public void visitInput(JsonNode json)
{
JsonNode pathJson = json.get("path");
String path;
if (pathJson.isArray())
path = JsonUtils.encodePath(pathJson.get(0));
else
path = JsonUtils.encodePath(pathJson);
// determine the postcondition of input
PostCondition inputCondition = datasetConditions.get(path);
if (inputCondition == null)
{
error(null, pathJson, "Cannot determine schema at path " + path);
}
// put the schema in json
((ObjectNode) json).put("schema", inputCondition.getSchema().toJson());
Node node = new Node(json, inputCondition);
// If the linkedListHead is not null, it implies there was a previous multimapper
// In which case, store the Node in a list for now.
// We will validate the postconditions in visitShuffle()
if (linkedListHead != null)
{
mapOutputNodes.add(linkedListHead);
}
linkedListHead = node;
operatorMap.clear();
operatorMap.put(getText(json, "name"), node);
}
private void printConditions(String line,
Map<String, PostCondition> preConditions,
PostCondition postCondition)
{
if (!debugMode)
return;
print.f("---------------------------------------------");
print.f("%s\n", line);
for (String block : preConditions.keySet())
{
PostCondition preCondition = preConditions.get(block);
print.f("Precondition for %s", block);
print.f("\tSchema: %s", preCondition.getSchema());
print.f("\tPartition Keys: %s",
Arrays.toString(preCondition.getPartitionKeys()));
print.f("\tSort Keys: %s", Arrays.toString(preCondition.getSortKeys()));
if (preCondition.getPivotKeys() != null)
print.f("\tPivot Keys: %s",
Arrays.toString(preCondition.getPivotKeys()));
}
print.f("\nPost Condition");
if (postCondition == null)
{
print.f("\tERROR");
}
else
{
print.f("\tSchema: %s", postCondition.getSchema());
print.f("\tPartition Keys: %s",
Arrays.toString(postCondition.getPartitionKeys()));
print.f("\tSort Keys: %s", Arrays.toString(postCondition.getSortKeys()));
if (postCondition.getPivotKeys() != null)
print.f("\tPivot Keys: %s",
Arrays.toString(postCondition.getPivotKeys()));
}
}
@Override
public void visitOperator(JsonNode json, boolean isMapper)
{
Map<String, PostCondition> preConditions = null;
PostCondition postCondition = null;
try
{
String[] inputNames = new String[] {};
if (json.has("input"))
inputNames = JsonUtils.asArray(json.get("input"));
preConditions = getPreconditions(inputNames, json);
if (preConditions == null)
return;
postCondition = getPostCondition(json, preConditions);
if (json.get("line") != null)
printConditions(json.has("line") ? getText(json, "line") : null,
preConditions,
postCondition);
if (postCondition == null)
return;
((ObjectNode) json).put("schema", postCondition.getSchema().toJson());
// create a node for this operator
Node node = new Node(json, postCondition);
for (String inputName : inputNames)
{
Node parent = operatorMap.get(inputName);
parent.setChild(node);
node.addParent(parent);
}
operatorMap.put(getText(json, "output"), node);
node.next = linkedListHead;
linkedListHead = node;
// special cases for individual operators
OperatorType type = OperatorType.valueOf(getText(json, "operator"));
switch (type)
{
case BLOCK_INDEX_JOIN:
this.blockIndexJoinKeys = asArray(json, "partitionKeys");
((ObjectNode) json).put("schema", postCondition.getSchema().toJson());
break;
case TEE:
this.datasetConditions.put(getText(json, "path"), postCondition);
((ObjectNode) json).put("schema", postCondition.getSchema().toJson());
break;
case LOAD_BLOCK:
((ObjectNode) json).put("schema", postCondition.getSchema().toJson());
if (postCondition.getPartitionKeys() != null)
((ObjectNode) json).put("partitionKeys",
JsonUtils.createArrayNode(postCondition.getPartitionKeys()));
if (postCondition.getSortKeys() != null)
((ObjectNode) json).put("sortKeys",
JsonUtils.createArrayNode(postCondition.getSortKeys()));
break;
default:
break;
}
}
catch (IllegalArgumentException e)
{
// error(json, "operator %s is not supported.", getText(json, "operator"));
printConditions(json.get("line").getTextValue(), preConditions, postCondition);
error(e, json, e.getMessage());
}
catch (PreconditionException e)
{
printConditions(json.get("line").getTextValue(), preConditions, postCondition);
error(e, json, e.toString());
}
catch (ArrayIndexOutOfBoundsException e)
{
error(e, json, e.toString());
}
}
private BlockSchema createReduceSideJoinSchema(Node leftNode, Node rightNode, JsonNode shuffleJson)
{
// Build a "union" schema for the join
BlockSchema leftSchema = leftNode.condition.getSchema();
BlockSchema rightSchema = rightNode.condition.getSchema();
String[] joinKeys = asArray(leftNode.json, "joinKeys");
// validate the the join keys have same schema in both mappers
for (String joinKey: joinKeys)
{
ColumnType leftType = leftSchema.getColumnType(leftSchema.getIndex(joinKey));
ColumnType rightType = rightSchema.getColumnType(rightSchema.getIndex(joinKey));
if (!leftType.equals(rightType))
{
error(shuffleJson, "Datatype of join key " + joinKey + " is not same. Left: "
+ leftType + " Right: " + rightType);
}
}
// validate that column names are different in left and right tables
Set<String> joinKeySet = new HashSet<String>();
Set<String> leftColumnSet = new HashSet<String>();
for (String str: joinKeys)
joinKeySet.add(str);
for (String str: leftSchema.getColumnNames())
leftColumnSet.add(str);
for (String str: rightSchema.getColumnNames())
if (leftColumnSet.contains(str) && !joinKeySet.contains(str))
error(shuffleJson, "The names of columns (other than join keys) must be different. Found: " + str);
// the new schema will have #leftColumns + #rightColumn - #joinKeys + 1 (tag)
ColumnType[] joinTypes = new ColumnType[leftSchema.getNumColumns() + rightSchema.getNumColumns() - joinKeys.length + 1];
Set<String> joinKeysSet = new HashSet<String>();
// fill the schema with the join keys
int idx = 0;
for (String joinKey: joinKeys)
{
joinTypes[idx++] = leftSchema.getColumnType(leftSchema.getIndex(joinKey));
joinKeysSet.add(joinKey);
}
// fill the remaining columns from the left schema
for (int i = 0; i < leftSchema.getNumColumns(); i++)
{
String colName = leftSchema.getName(i);
if (joinKeysSet.contains(colName))
continue;
joinTypes[idx++] = leftSchema.getColumnType(i);
}
// fill the remaining columns from the right schema
for (int i = 0; i < rightSchema.getNumColumns(); i++)
{
String colName = rightSchema.getName(i);
if (joinKeysSet.contains(colName))
continue;
joinTypes[idx++] = rightSchema.getColumnType(i);
}
// finally, fill the schema for tag (this is LAST columns)
joinTypes[idx++] = new ColumnType("___tag", DataType.INT);
BlockSchema joinSchema = new BlockSchema(joinTypes);
return joinSchema;
}
@Override
public void visitShuffle(JsonNode json)
{
// Add the current Mapper node in the list
mapOutputNodes.add(linkedListHead);
// if there are more than one mappers (multi-mapper situation)
if (mapOutputNodes.size() > 1)
{
// special case: REDUCE-SIDE join
if (json.has("join") && json.get("join").getBooleanValue())
{
// Build a "union" schema for the join
Node leftNode = mapOutputNodes.get(0);
Node rightNode = mapOutputNodes.get(1);
BlockSchema joinSchema = createReduceSideJoinSchema(leftNode, rightNode, json);
// Assign this schema to both mappers
leftNode.condition = new PostCondition(joinSchema,
leftNode.condition.getPartitionKeys(),
leftNode.condition.getSortKeys());
rightNode.condition = new PostCondition(joinSchema,
rightNode.condition.getPartitionKeys(),
rightNode.condition.getSortKeys());
// set this joined schema in json for both operators
((ObjectNode) leftNode.json).put("schema", joinSchema.toJson());
((ObjectNode) rightNode.json).put("schema", joinSchema.toJson());
}
// validate that we are shuffling on the same name from each multi mapper
// Also validate that the schema is identical (with one exception: ReduceSideJoin)
String outputName = getText(mapOutputNodes.get(0).json, "output");
BlockSchema firstSchema = mapOutputNodes.get(0).getPostCondition().getSchema();
BlockSchema widerSchema = firstSchema;
for (int i = 1; i < mapOutputNodes.size(); i++)
{
String name = getText(mapOutputNodes.get(i).json, "output");
BlockSchema schema = mapOutputNodes.get(i).getPostCondition().getSchema();
if (!name.equals(outputName))
error(json, "Multimappers must shuffle on same block names. Found: " + name + " and " + outputName);
if (!firstSchema.equalsIgnoreNumeric(schema))
error(json, "Multimappers must have same output schema. Found:\n\t"
+ firstSchema + " and\n\t" + schema);
widerSchema = SchemaUtils.getWiderSchema(widerSchema, schema);
}
// update the condition for each multimapper with wider type
for (int i = 0; i < mapOutputNodes.size(); i++)
{
PostCondition condition = mapOutputNodes.get(i).condition;
mapOutputNodes.get(i).condition = new PostCondition(widerSchema,
condition.getPartitionKeys(),
condition.getSortKeys());
}
}
String[] inputNames = asArray(json.get("name"));
Map<String, PostCondition> preConditions = getPreconditions(inputNames, json);
if (preConditions == null)
return;
PostCondition preCondition = preConditions.values().iterator().next();
if (json.has("distinctShuffle") && json.get("distinctShuffle").getBooleanValue())
{
String[] columns = preCondition.getSchema().getColumnNames();
JsonNode columnsJson = JsonUtils.createArrayNode(columns);
((ObjectNode) json).put("partitionKeys", columnsJson);
((ObjectNode) json).put("pivotKeys", columnsJson);
}
String[] partitionKeys = asArray(json, "partitionKeys");
String[] pivotKeys = asArray(json, "pivotKeys");
// make sure partition and pivot keys are in the schema
if (partitionKeys != null)
{
checkShuffleKeyErrors(json, preCondition.getSchema(), partitionKeys);
}
if (pivotKeys != null)
{
checkShuffleKeyErrors(json, preCondition.getSchema(), pivotKeys);
}
// if there is a distinct operator in reduce, add remaining columns in pivot keys
if (json.has("distinct") && json.get("distinct").getBooleanValue())
{
BlockSchema remaining =
preCondition.getSchema().getComplementSubset(pivotKeys);
String[] completePivotKeys =
CommonUtils.concat(pivotKeys, remaining.getColumnNames());
((ObjectNode) json).put("pivotKeys",
JsonUtils.createArrayNode(completePivotKeys));
pivotKeys = asArray(json, "pivotKeys");
}
if (json.has("aggregates"))
{
try
{
CubertCombiner.checkPostCondition(preConditions, json);
}
catch (PreconditionException e)
{
error(json, e.getMessage());
}
}
PostCondition postCondition =
new PostCondition(preCondition.getSchema(), partitionKeys, pivotKeys);
// put the schema in the json
((ObjectNode) json).put("schema", postCondition.getSchema().toJson());
// create a node for this operator
Node shuffleNode = new Node(json, postCondition);
for (String inputName : inputNames)
{
Node parent = operatorMap.get(inputName);
parent.setChild(shuffleNode);
shuffleNode.addParent(parent);
}
// validate that all operators are used once
for (Node node : mapOutputNodes)
{
node.setChild(shuffleNode);
while (node != null)
{
if (node.child == null)
error(node.json, "The output of this command is not used");
node = node.next;
}
}
// start a new chain with shuffle as the input block
operatorMap.clear();
linkedListHead = shuffleNode;
operatorMap.put(getText(json, "name"), shuffleNode);
}
private void checkShuffleKeyErrors(JsonNode json, BlockSchema schema, String[] keys)
{
final Map<String, Integer> schemaIndexMap = schema.getIndexMap();
HashSet<String> cols = new HashSet<String>();
for (String key : keys)
{
if (!schemaIndexMap.containsKey(key))
{
error(json, "Column " + key + " not found in schema:\n\t" + schema);
}
if (cols.contains(key))
{
error(json, "Duplicate key specified: " + key);
}
cols.add(key);
}
}
@Override
public void exitJob(JsonNode json)
{
JsonNode outputJson = json.get("output");
boolean isRubixStorage =
outputJson.has("type")
&& getText(outputJson, "type").equalsIgnoreCase("RUBIX");
// if the output type is cubert, we expect a metadata node in the json
if (isRubixStorage)
{
// the metadata has the following fields
JsonNode metadataNode =
JsonUtils.createObjectNode("schema",
outputJson.get("schema"),
"partitionKeys",
outputJson.get("partitionKeys"),
"sortKeys",
outputJson.get("sortKeys"),
"version",
RubixConstants.RUBIX_FILE_VERSION_NUMBER,
"BlockFormat",
"DefaultRubixFormat",
"BlockgenId",
getText(outputJson, "blockgenId"));
if (!json.has("metadata") || revisit)
((ObjectNode) json).put("metadata", metadataNode);
else
throw new RuntimeException(String.format("The metadata node is already added for the output of job %s, which is not expected.",
json.get("name").toString()));
}
}
@Override
public void visitOutput(JsonNode json)
{
String[] inputNames = asArray(json.get("name"));
Map<String, PostCondition> preConditions = getPreconditions(inputNames, json);
if (preConditions == null)
return;
PostCondition preCondition = preConditions.values().iterator().next();
PostCondition postCondition = preCondition;
boolean isRubixStorage =
json.has("type") && getText(json, "type").equalsIgnoreCase("RUBIX");
// Only rubix storage allows partition and sort keys
if (!isRubixStorage)
{
postCondition = new PostCondition(postCondition.getSchema(), null, null);
}
// put the schema in the json
((ObjectNode) json).put("schema", postCondition.getSchema().toJson());
if (postCondition.getPartitionKeys() != null)
((ObjectNode) json).put("partitionKeys",
JsonUtils.createArrayNode(postCondition.getPartitionKeys()));
if (postCondition.getSortKeys() != null)
((ObjectNode) json).put("sortKeys",
JsonUtils.createArrayNode(postCondition.getSortKeys()));
Node node = new Node(json, postCondition);
for (String inputName : inputNames)
{
Node parent = operatorMap.get(inputName);
parent.setChild(node);
node.addParent(parent);
}
// validate that all operators are used once
node = linkedListHead;
while (node != null)
{
if (node.child == null)
error(node.json, "The output of this command is not used");
node = node.next;
}
datasetConditions.put(getText(json, "path"), postCondition);
}
private void error(JsonNode json, String format, Object... args)
{
error(null, json, format, args);
}
private void error(Exception e, JsonNode json, String format, Object... args)
{
if (debugMode && e != null)
e.printStackTrace();
hasErrors = true;
System.err.println(String.format("ERROR: " + format, args));
if (json != null)
{
System.err.print("At:\t");
if (json.has("line"))
System.err.println(json.get("line").getTextValue());
else
System.err.println(json.toString());
}
if (e != null)
throw new PlanRewriteException(e);
else
throw new PlanRewriteException();
}
private Map<String, PostCondition> getPreconditions(String[] inputNames, JsonNode json)
{
Map<String, PostCondition> preConditions = new HashMap<String, PostCondition>();
// verify that all inputs are present, and put them in the preConditions map
for (String inputName : inputNames)
{
Node parent = operatorMap.get(inputName);
if (parent == null)
{
error(json, "input block %s not found", inputName);
continue;
}
if (parent.child != null)
{
// Exception 1: it is okay for the parent to have multiple child,
// IF all other children are LOAD_BLOCK (which were using "MATCHING"
// keyword to reference the parents)
boolean isException = false;
if (json.has("operator")
&& getText(json, "operator").equals("LOAD_BLOCK"))
isException = true;
if (parent.child.json.has("operator")
&& getText(parent.child.json, "operator").equals("LOAD_BLOCK"))
isException = true;
// Exception 2: it is okay for the parent to have multiple child,
// IF the parent is an IN MEMORY PIVOT operator
if (parent.json.has("operator")
&& getText(parent.json, "operator").equals("PIVOT_BLOCK")
&& parent.json.has("inMemory")
&& parent.json.get("inMemory").getBooleanValue())
isException = true;
// Exception 3 (for now): it is okay for parent to have multiple child,
// if the other children are USER_DEFINED_BLOCK_OPERATOR
if ((json.has("operator") && getText(json, "operator").equals("USER_DEFINED_BLOCK_OPERATOR"))
|| (parent.child.json.has("operator") && getText(parent.child.json,
"operator").equals("USER_DEFINED_BLOCK_OPERATOR")))
isException = true;
if (!isException)
{
error(json,
"parent operator [%s] has a child assigned already.\n\tChild: %s",
parent.json.get("line"),
parent.child.json.get("line"));
continue;
}
}
preConditions.put(inputName, parent.condition);
}
// bail out, if we haven't accumulated all preconditions
if (preConditions.size() != inputNames.length)
return null;
return preConditions;
}
private PostCondition getPostCondition(JsonNode json,
Map<String, PostCondition> preConditions) throws PreconditionException
{
OperatorType type = OperatorType.valueOf(getText(json, "operator"));
if (type.isTupleOperator())
{
// TupleOperator operatorObject = OperatorFactory.getTupleOperator(type);
TupleOperator operatorObject =
(type == OperatorType.USER_DEFINED_TUPLE_OPERATOR)
? (TupleOperator) FunctionFactory.createFunctionObject(getText(json,
"class"),
json.get("constructorArgs"))
: OperatorFactory.getTupleOperator(type);
return operatorObject.getPostCondition(preConditions, json);
}
else
{
switch (type)
{
case CREATE_BLOCK:
{
String[] partitionKeys = JsonUtils.asArray(json, "partitionKeys");
PostCondition condition = preConditions.values().iterator().next();
BlockSchema schema = condition.getSchema();
String[] sortKeys = condition.getSortKeys();
if (json.has("pivotKeys"))
sortKeys = JsonUtils.asArray(json, "pivotKeys");
if (!CommonUtils.isPrefix(partitionKeys, condition.getPartitionKeys())
&& !CommonUtils.isPrefix(condition.getPartitionKeys(),
partitionKeys))
throw new PreconditionException(PreconditionExceptionType.INVALID_PARTITION_KEYS);
boolean isIndexed = getText(json, "blockgenType").equals("BY_INDEX");
if (isIndexed)
{
partitionKeys = blockIndexJoinKeys;
// remove BLOCL_ID from schema
ColumnType[] ctypes = new ColumnType[schema.getNumColumns() - 1];
int idx = 0;
for (int i = 0; i < schema.getNumColumns(); i++)
if (!schema.getColumnType(i).getName().equals("BLOCK_ID"))
ctypes[idx++] = schema.getColumnType(i);
schema = new BlockSchema(ctypes);
// remove BLOCK_ID from sort keys
if (sortKeys.length == 0)
System.out.println("Empty sortKeys for opNode " + json.toString());
String[] tmp = new String[sortKeys.length - 1];
idx = 0;
for (String key : sortKeys)
if (!key.equals("BLOCK_ID"))
tmp[idx++] = key;
sortKeys = tmp;
}
return new PostCondition(schema, partitionKeys, sortKeys);
}
case LOAD_BLOCK:
{
if (json.get("path") == null)
System.out.println("null path name for " + json);
String path = JsonUtils.encodePath(json.get("path"));
PostCondition postCondition = this.datasetConditions.get(path);
if (postCondition == null)
error(json, "Cannot determine schema of " + path);
return postCondition;
}
case LOAD_CACHED_FILE:
{
String path = JsonUtils.encodePath(json.get("path"));
try
{
path = new URI(path).getPath();
}
catch (URISyntaxException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
PostCondition postCondition = this.datasetConditions.get(path);
if (postCondition == null)
error(json, "Cannot determine schema of " + path);
return new PostCondition(postCondition.getSchema(),
new String[] {},
new String[] {});
}
case PIVOT_BLOCK:
{
BlockOperator operator = new PivotBlockOperator();
return operator.getPostCondition(preConditions, json);
}
case COLLATE_VECTOR_BLOCK:
{
PostCondition preCondition = preConditions.values().iterator().next();
String metaRelationName =
new String(JsonUtils.getText(json, "metaRelationName"));
BlockSchema outSchema = new BlockSchema(preCondition.getSchema());
PostCondition preConditionMetaRelation =
preConditions.get(metaRelationName);
String identifierColumns[] = JsonUtils.asArray(json, "identifierColumn");
// This will allow a subsequent GROUP BY on (identifierColumns,
// lookupColumns)
if (!CommonUtils.isPrefix(preConditionMetaRelation.getSortKeys(),
identifierColumns))
{
System.out.println("PreconditionMetaRelation sortKeys "
+ Arrays.toString(preConditionMetaRelation.getSortKeys()));
throw new PreconditionException(PreconditionExceptionType.INVALID_SORT_KEYS);
}
String[] combineColumns = JsonUtils.asArray(json, "combineColumns");
String[] lookupColumns = JsonUtils.asArray(json, "lookupColumn");
if (!CommonUtils.isPrefix(preCondition.getSortKeys(),
CommonUtils.concat(lookupColumns,
combineColumns)))
{
System.out.println("PreconditionInputBlock sortKeys "
+ Arrays.toString(preCondition.getSortKeys()));
throw new PreconditionException(PreconditionExceptionType.INVALID_SORT_KEYS);
}
int identifierColumnIndex =
preConditionMetaRelation.getSchema()
.getIndex(getText(json,
"identifierColumn"));
ColumnType ct =
preConditionMetaRelation.getSchema()
.getColumnType(identifierColumnIndex);
// rename identifierColumn for output schema.
identifierColumns[0] =
String.format("%s___%s",
getText(json, "metaRelationName"),
getText(json, "identifierColumn"));
ct.setName(identifierColumns[0]);
BlockSchema identifierColumnSchema =
new BlockSchema(new ColumnType[] { ct });
/*
* String newColumn = String.format("STRING %s___%s",
* CommonUtils.stripQuotes(getText(json, "metaRelationName")),
* CommonUtils.stripQuotes(getText(json, "identifierColumn")));
* BlockSchema newColumnSchema = new BlockSchema(newColumn);
*/
// Only the metaRelation identifier column is renamed. Columns from the
// "data/metrics" block are transmitted with their old names.
outSchema = outSchema.append(identifierColumnSchema);
return new PostCondition(outSchema,
preCondition.getPartitionKeys(),
(combineColumns),
identifierColumns);
}
case USER_DEFINED_BLOCK_OPERATOR:
{
BlockOperator operator =
(BlockOperator) FunctionFactory.createFunctionObject(getText(json,
"class"),
json.get("constructorArgs"));
// OperatorFactory.getUserDefinedBlockOperator(getText(json, "class"),
// json.get("constructorArgs"));
return operator.getPostCondition(preConditions, json);
}
default:
throw new IllegalArgumentException("Operator " + type
+ " is not supported.");
}
}
}
}