package edu.washington.escience.myria.expression.evaluate; import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; import org.codehaus.commons.compiler.CompileException; import org.codehaus.commons.compiler.CompilerFactoryFactory; import org.codehaus.commons.compiler.IScriptEvaluator; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.MyriaConstants; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.column.Column; import edu.washington.escience.myria.column.ConstantValueColumn; import edu.washington.escience.myria.column.builder.ColumnBuilder; import edu.washington.escience.myria.column.builder.ColumnFactory; import edu.washington.escience.myria.column.builder.WritableColumn; import edu.washington.escience.myria.expression.Expression; import edu.washington.escience.myria.expression.ExpressionOperator; import edu.washington.escience.myria.expression.VariableExpression; import edu.washington.escience.myria.operator.Apply; import edu.washington.escience.myria.storage.MutableTupleBuffer; import edu.washington.escience.myria.storage.ReadableColumn; import edu.washington.escience.myria.storage.ReadableTable; import edu.washington.escience.myria.storage.TupleBatch; import edu.washington.escience.myria.storage.TupleBuffer; import edu.washington.escience.myria.storage.TupleUtils; /** * An Expression evaluator for generic expressions. Used in {@link Apply}. */ public class GenericEvaluator extends Evaluator { /** logger for this class. */ private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(GenericEvaluator.class); /** Expression evaluator. */ private ExpressionEvalInterface evaluator; /** The script. */ private String script; /** * Default constructor. * * @param expression the expression for the evaluator * @param parameters parameters that are passed to the expression * @throws DbException if the compilation failed */ public GenericEvaluator(final Expression expression, final ExpressionOperatorParameter parameters) throws DbException { super(expression, parameters); this.script = getExpression().getJavaExpressionWithAppend(getParameters()); compile(); } /** * @param expression * @param script * @param parameters * @throws DbException if the compilation failed */ public GenericEvaluator( final Expression expression, final String script, final ExpressionOperatorParameter parameters) throws DbException { super(expression, parameters); this.script = script; compile(); } /** * Compiles the {@link #javaExpression}. * * @throws DbException if the compilation failed */ @Override public void compile() throws DbException { IScriptEvaluator se; try { se = CompilerFactoryFactory.getDefaultCompilerFactory().newScriptEvaluator(); } catch (Exception e) { LOGGER.error("Could not create expression evaluator", e); throw new DbException("Could not create expression evaluator", e); } se.setDefaultImports(MyriaConstants.DEFAULT_JANINO_IMPORTS); try { if (script.contains("append")) { evaluator = (ExpressionEvalAppendInterface) se.createFastEvaluator( script, ExpressionEvalAppendInterface.class, new String[] { Expression.INPUT, Expression.INPUTROW, Expression.STATE, Expression.STATEROW, Expression.STATECOLOFFSET, Expression.RESULT, Expression.COUNT }); } else { evaluator = (ExpressionEvalReplaceInterface) se.createFastEvaluator( script, ExpressionEvalReplaceInterface.class, new String[] { Expression.INPUT, Expression.INPUTROW, Expression.STATE, Expression.STATEROW, Expression.STATECOLOFFSET }); } } catch (CompileException e) { LOGGER.error("Error when compiling expression {}: {}", script, e); throw new DbException("Error when compiling expression: " + script, e); } } /** * Evaluates the {@link #getJavaExpressionWithAppend()} using the {@link #evaluator}. Prefer to use * {@link #evaluateColumn(TupleBatch)} since it can evaluate an entire TupleBatch at a time for better locality. * * @param input a tuple batch * @param inputRow index of the row that should be used for input data * @param state additional state that affects the computation * @param stateRow index of the row that should be used for state * @param stateColOffset the column offset of the state * @throws DbException in case of error. */ public void updateState( @Nonnull final ReadableTable input, final int inputRow, @Nonnull final MutableTupleBuffer state, final int stateRow, final int stateColOffset) throws DbException { ((ExpressionEvalReplaceInterface) evaluator) .evaluate(input, inputRow, state, stateRow, stateColOffset); } /** * Evaluates the {@link #getJavaExpressionWithAppend()} using the {@link #evaluator}. Prefer to use * {@link #evaluateColumn(TupleBatch)} since it can evaluate an entire TupleBatch at a time for better locality. * * @param input a tuple batch * @param inputRow index of the row that should be used for input data * @param state additional state that affects the computation * @param stateRow index of the row that should be used for state * @param result the table storing the result * @param count column storing number of results (null for single-valued expressions) * @throws DbException in case of error. */ public void eval( @Nullable final ReadableTable input, final int inputRow, @Nullable final ReadableTable state, final int stateRow, @Nonnull final WritableColumn result, @Nullable final WritableColumn count) throws DbException { Preconditions.checkArgument( evaluator != null, "Call compile first or copy the data if it is the same in the input."); Preconditions.checkArgument( getExpression().isMultiValued() != (count == null), "count must be null for a single-valued expression and non-null for a multivalued expression."); try { ((ExpressionEvalAppendInterface) evaluator) .evaluate(input, inputRow, state, stateRow, 0, result, count); } catch (Exception e) { LOGGER.error(script, e); throw e; } } /** * Holder class for results and result counts from {@link #evaluateColumn}. */ public static class EvaluatorResult { private final ReadableColumn results; private final ImmutableList<Column<?>> resultColumns; private final ReadableColumn resultCounts; protected EvaluatorResult( @Nonnull final TupleBuffer results, @Nonnull final Column<?> resultCounts) { ImmutableList.Builder<Column<?>> resultColumnsBuilder = ImmutableList.builder(); for (final TupleBatch tb : results.finalResult()) { resultColumnsBuilder.add(tb.getDataColumns().get(0)); } this.resultColumns = resultColumnsBuilder.build(); this.results = results.asColumn(0); this.resultCounts = resultCounts; } protected EvaluatorResult( @Nonnull final Column<?> results, @Nonnull final Column<?> resultCounts) { this.resultColumns = ImmutableList.of(results); this.results = results; this.resultCounts = resultCounts; } /** * @return a {@link ReadableColumn} containing results from {@link #evaluateColumn} */ public ReadableColumn getResults() { return results; } /** * @return a {@link List<Column>} containing results from {@link #evaluateColumn} */ public List<Column<?>> getResultColumns() { return resultColumns; } /** * @return a {@link Column<Integer>} containing result counts from {@link #evaluateColumn} */ public ReadableColumn getResultCounts() { return resultCounts; } } /** * Evaluate an expression over an entire TupleBatch and return the column(s) of results, along with a column of result * counts from each tuple. This method cannot take state into consideration. * * @param tb the tuples to be input to this expression * @param outputSchema the schema that results from this evaluator belongs to, used to determine the tuple batch size * @return an {@link EvaluatorResult} containing the results and result counts of evaluating this expression on the * entire TupleBatch * @throws DbException */ public EvaluatorResult evalTupleBatch(final TupleBatch tb, final Schema outputSchema) throws DbException { final Column<?> constCounts = new ConstantValueColumn(1, Type.INT_TYPE, tb.numTuples()); int batchSize = TupleUtils.getBatchSize(outputSchema); // Critical optimization: return a zero-copy reference to a column referenced by a pure `VariableExpression`. if (isCopyFromInput() && batchSize >= tb.numTuples()) { ExpressionOperator op = getExpression().getRootExpressionOperator(); return new EvaluatorResult( tb.getDataColumns().get(((VariableExpression) op).getColumnIdx()), constCounts); } /* For multivalued expressions, we may get more than batchSize results, so we need to pass in a `TupleBuffer` rather * than a `ColumnBuilder` to `eval()`, and return a `List<Column>` rather than a `Column` of results. */ final TupleBuffer resultsBuffer = new TupleBuffer( Schema.ofFields(getExpression().getOutputName(), getOutputType()), batchSize); final WritableColumn resultsWriter = resultsBuffer.asWritableColumn(0); // For single-valued expressions, the Java expression will never attempt to write to `countsWriter`. WritableColumn countsWriter = null; if (getExpression().isMultiValued()) { countsWriter = ColumnFactory.allocateColumn(Type.INT_TYPE); } for (int rowIdx = 0; rowIdx < tb.numTuples(); ++rowIdx) { /* Hack, tb is either Expression.INPUT or Expression.STATE */ eval(tb, rowIdx, tb, rowIdx, resultsWriter, countsWriter); } final Column<?> resultCounts; if (getExpression().isMultiValued()) { resultCounts = ((ColumnBuilder<?>) countsWriter).build(); } else { resultCounts = constCounts; } return new EvaluatorResult(resultsBuffer, resultCounts); } /** * @return the script */ public String getScript() { return script; } }