/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.functions; import static com.linkedin.cubert.utils.JsonUtils.getText; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.Tuple; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.node.ArrayNode; import com.linkedin.cubert.block.Block; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.ColumnType; import com.linkedin.cubert.block.DataType; import com.linkedin.cubert.functions.builtin.FunctionFactory; import com.linkedin.cubert.operator.PreconditionException; import com.linkedin.cubert.operator.PreconditionExceptionType; import com.linkedin.cubert.utils.JsonUtils; /** * Parses, constructs and executes the function tree. * <p> * This class can maintain multiple function trees (added via the {@code addFunctionTree} * method). This method parses the JSON specification and builds a runtime representation * of the function tree. This method may throw a {@code PreconditionException} if any * inconsistency is found in the JSON. * <p> * The output type for each function tree can be retrieved using the {@code getType} * method. * <p> * The function tree can be evaluation using the {@code evalTree} method. Before calling * this method, the input tuple must be assigned to the tree using the {@code attachTuple} * method. * * @author Maneesh Varshney * */ public class FunctionTree { private final Block block; private final BlockSchema inputSchema; private final InputProjection[] inputProjections; private final List<FunctionTreeNode> functionTrees = new ArrayList<FunctionTreeNode>(); private DataType[] outputTypes; /** * Create a new object using the input Block. This constructor is called when creating * the FunctionTree at runtime. * * @param block */ public FunctionTree(Block block) { this.block = block; this.inputSchema = block.getProperties().getSchema(); inputProjections = new InputProjection[inputSchema.getNumColumns()]; } /** * Create a new object using the schema of the input block. This constructor is called * when creating the FunctionTree at compile time (since the 'Block' object is not * available at compile time). * * @param schema */ public FunctionTree(BlockSchema schema) { this.block = null; this.inputSchema = schema; inputProjections = new InputProjection[inputSchema.getNumColumns()]; } /** * Adds a new function tree. * * @param json * JSON representation of the function tree. * @throws PreconditionException */ public void addFunctionTree(JsonNode json) throws PreconditionException { FunctionTreeNode root = createTreeNode(json); functionTrees.add(root); outputTypes = new DataType[functionTrees.size()]; for (int i = 0; i < outputTypes.length; i++) outputTypes[i] = functionTrees.get(i).getType().getType(); } /** * Get the type of the root function for the specified function tree. * * @param treeIndex * @return */ public ColumnType getType(int treeIndex) { return functionTrees.get(treeIndex).getType(); } /** * Attaches an input tuple to the tree before calling the {@code evalTree} method. * * @param tuple * @throws ExecException */ public void attachTuple(Tuple tuple) throws ExecException { for (int i = 0; i < inputProjections.length; i++) { if (inputProjections[i] != null) inputProjections[i].setValue(tuple.get(i)); } } /** * Evaluates the specified function tree. * * @param treeIndex * the index of the tree to evaluate. * @return * @throws IOException */ public Object evalTree(int treeIndex) throws IOException { FunctionTreeNode root = functionTrees.get(treeIndex); Object val = root.eval(); if (val == null) return null; // if val is numeric, the actual value may be of narrower type. // upcast it to the proper wider type switch (outputTypes[treeIndex]) { case INT: return ((Number) val).intValue(); case LONG: return ((Number) val).longValue(); case FLOAT: return ((Number) val).floatValue(); case DOUBLE: return ((Number) val).doubleValue(); default: return val; } } /** * Recursive method to parse and construct the function tree. * * @param json * @return * @throws PreconditionException */ private FunctionTreeNode createTreeNode(JsonNode json) throws PreconditionException { String function = getText(json, "function"); ArrayNode args = (ArrayNode) json.get("arguments"); if (function.equals("INPUT_PROJECTION")) { Object selector = JsonUtils.decodeConstant(args.get(0), null); int index = getSelectorIndex(inputSchema, selector); if (inputProjections[index] == null) inputProjections[index] = new InputProjection(index); Function func = inputProjections[index]; return new FunctionTreeNode(func, null, func.outputSchema(inputSchema)); } else if (function.equals("PROJECTION")) { Object selector = JsonUtils.decodeConstant(args.get(1), null); FunctionTreeNode parent = createTreeNode(args.get(0)); int index = getSelectorIndex(parent.getType().getColumnSchema(), selector); Function func = new Projection(index); return new FunctionTreeNode(func, new LazyTuple(parent), func.outputSchema(parent.getType() .getColumnSchema())); } else if (function.equals("MAP_PROJECTION")) { String key = args.get(1).getTextValue(); FunctionTreeNode mapNode = createTreeNode(args.get(0)); Function func = new MapProjection(key); return new FunctionTreeNode(func, new LazyTuple(mapNode), func.outputSchema(mapNode.getType() .getColumnSchema())); } else if (function.equals("CONSTANT")) { String type = (args.size() > 1) ? args.get(1).getTextValue() : null; Object constant = JsonUtils.decodeConstant(args.get(0), type); Function func = new Constant(constant); return new FunctionTreeNode(func, null, func.outputSchema(null)); } else { Function func = FunctionFactory.get(function, json.get("constructorArgs")); if (block != null) func.setBlock(block); FunctionTreeNode[] children = new FunctionTreeNode[args.size()]; ColumnType[] columnTypes = new ColumnType[args.size()]; for (int i = 0; i < args.size(); i++) { children[i] = createTreeNode(args.get(i)); columnTypes[i] = children[i].getType(); } return new FunctionTreeNode(func, new LazyTuple(children), func.outputSchema(new BlockSchema(columnTypes))); } } public static int getSelectorIndex(BlockSchema schema, Object selector) throws PreconditionException { int index = -1; if (selector instanceof Integer) { index = (Integer) selector; if (index < 0 || index >= schema.getNumColumns()) { throw new PreconditionException(PreconditionExceptionType.COLUMN_NOT_PRESENT, "Column at index " + index + " is not present in input: " + schema); } } else { String colName = (String) selector; if (!schema.hasIndex(colName)) throw new PreconditionException(PreconditionExceptionType.COLUMN_NOT_PRESENT, "Column " + colName + " is not present in input: " + schema); index = schema.getIndex(colName); } return index; } }