/* (c) 2014 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use * this file except in compliance with the License. You may obtain a copy of the * License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. */ package com.linkedin.cubert.functions; import java.io.IOException; import java.util.List; import org.apache.pig.data.Tuple; import com.linkedin.cubert.block.Block; import com.linkedin.cubert.block.BlockSchema; import com.linkedin.cubert.block.ColumnType; import com.linkedin.cubert.operator.PreconditionException; /** * Abstract class for defining user-defined functions for the GENERATE operator. * <p> * The UDF class is responsible for: * <ul> * <li>generating the output column type ({@code getCacheFile} method).</li> * * <li>evaluating the input tuple and generating the output ({@code eval} method)</li> * * <li>[Optionally] listing the files that must be stored in the distributed cache ( * {@code getCacheFiles} method)</li> * </ul> * <p> * The UDF has access to the current block using the protected {@code setBlock} method. * <p> * Note: the {@code outputSchema} and {@code getCacheFiles} methods will be called at the * compile time, as well as the run time. * <p> * At run time, the {@code outputSchema} method is called before calling the {@code eval} * method (so it okay to initialize object variables in the outputSchema method). * * @author Maneesh Varshney * */ public abstract class Function { /** * Optionally override this method to perform setup operations. * * @param block */ protected void setBlock(Block block) throws PreconditionException { } /** * Evaluate the input tuple to generate output object. * * @param tuple * the input tuple * @return evaluated output * @throws IOException */ public abstract Object eval(Tuple tuple) throws IOException; /** * Generate the ColumnType of the output. * * @param inputSchema * Schema of the input tuple * @return Output object ColumnType * @throws PreconditionException */ public abstract ColumnType outputSchema(BlockSchema inputSchema) throws PreconditionException; /** * Returns a list of files that must be stored in distributed cache. * * @return a list of file names */ public List<String> getCacheFiles() { return null; } }