package edu.washington.escience.myria.operator; import java.io.Serializable; import java.util.Arrays; import java.util.Map; import java.util.Objects; import java.util.Set; import javax.annotation.Nonnull; import javax.annotation.Nullable; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import edu.washington.escience.myria.DbException; import edu.washington.escience.myria.MyriaConstants; import edu.washington.escience.myria.MyriaConstants.ProfilingMode; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.functions.PythonFunctionRegistrar; import edu.washington.escience.myria.parallel.LocalFragment; import edu.washington.escience.myria.parallel.LocalFragmentResourceManager; import edu.washington.escience.myria.parallel.LocalSubQuery; import edu.washington.escience.myria.parallel.SubQueryId; import edu.washington.escience.myria.parallel.WorkerSubQuery; import edu.washington.escience.myria.profiling.ProfilingLogger; import edu.washington.escience.myria.storage.TupleBatch; /** * Abstract class for implementing operators. * * @author slxu * * Currently, the operator api design requires that each single operator instance should be executed within a * single thread. * * No multi-thread synchronization is considered. * */ public abstract class Operator implements Serializable { /** * logger for this class. */ private static final org.slf4j.Logger LOGGER = org.slf4j.LoggerFactory.getLogger(Operator.class); /** Required for Java serialization. */ private static final long serialVersionUID = 1L; /** * the name of the operator from json queries (or set from hand-constructed query plans). */ private String opName = ""; /** * The unique operator id. */ private Integer opId; /** * A bit denoting whether the operator is open (initialized). */ private boolean open = false; /** * The {@link Schema} of the tuples produced by this {@link Operator}. */ private Schema schema; /** * End of stream (EOS). Initialized to true. */ private volatile boolean eos = true; /** * End of iteration (EOI). */ private boolean eoi = false; /** * Environmental variables during execution. */ private ImmutableMap<String, Object> execEnvVars; /** * @return return environmental variables */ public ImmutableMap<String, Object> getExecEnvVars() { return execEnvVars; } /** * Logger for profiling. */ private ProfilingLogger profilingLogger; /** * Cache for profiling mode. */ private Set<ProfilingMode> profilingMode; /** * @return the profilingLogger */ public ProfilingLogger getProfilingLogger() { Preconditions.checkNotNull(profilingLogger); return profilingLogger; } /** * * @return PythonFunctionRegistrar for operator. * @throws DbException in case of error. */ public PythonFunctionRegistrar getPythonFunctionRegistrar() throws DbException { PythonFunctionRegistrar pyFuncRegistrar = null; if (execEnvVars == null || !execEnvVars.containsKey(MyriaConstants.EXEC_ENV_VAR_TEST_MODE)) { if (getLocalSubQuery() instanceof WorkerSubQuery) { pyFuncRegistrar = ((WorkerSubQuery) getLocalSubQuery()).getWorker().getPythonFunctionRegistrar(); } } return pyFuncRegistrar; } /** * @return return subquery id. */ public SubQueryId getSubQueryId() { return ((LocalFragmentResourceManager) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_FRAGMENT_RESOURCE_MANAGER)) .getFragment() .getLocalSubQuery() .getSubQueryId(); } /** * @return the executing {@link LocalSubQuery} that this {@link Operator} is part of. */ public final LocalSubQuery getLocalSubQuery() { LocalFragment qstt = getFragment(); if (qstt == null) { return null; } else { return qstt.getLocalSubQuery(); } } /** * @return the executing {@link LocalFragment} that this {@link Operator} is part of. */ public LocalFragment getFragment() { if (execEnvVars == null) { return null; } else { return ((LocalFragmentResourceManager) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_FRAGMENT_RESOURCE_MANAGER)) .getFragment(); } } /** * fragment id of this operator. */ private Integer fragmentId; /** * @return fragment Id. */ public int getFragmentId() { Objects.requireNonNull(fragmentId, "fragmentId"); return fragmentId; } /** * @param fragmentId fragment Id. */ public void setFragmentId(final int fragmentId) { this.fragmentId = fragmentId; } /** * @return the profiling modes. */ @Nonnull protected Set<ProfilingMode> getProfilingMode() { // make sure hard coded test will pass if (execEnvVars == null) { return ImmutableSet.of(); } if (profilingMode == null) { LocalFragmentResourceManager lfrm = (LocalFragmentResourceManager) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_FRAGMENT_RESOURCE_MANAGER); if (lfrm == null) { return ImmutableSet.of(); } LocalFragment fragment = lfrm.getFragment(); if (fragment == null) { return ImmutableSet.of(); } profilingMode = fragment.getLocalSubQuery().getProfilingMode(); } return profilingMode; } /** * Closes this iterator. * * @throws DbException if any errors occur */ public final void close() throws DbException { // Ensures that a future call to next() or nextReady() will fail // outputBuffer = null; if (LOGGER.isDebugEnabled()) { LOGGER.debug( "Operator {} closed, #output TBs: {}, # output tuples: {}", this, numOutputTBs, numOutputTuples); } open = false; eos = true; eoi = false; Exception errors = null; try { cleanup(); } catch (DbException | RuntimeException e) { errors = e; } catch (Throwable e) { errors = new DbException(e); } final Operator[] children = getChildren(); if (children != null) { for (final Operator child : children) { if (child != null) { try { child.close(); } catch (Throwable e) { if (errors != null) { errors.addSuppressed(e); } else { if (e instanceof DbException || e instanceof RuntimeException) { errors = (Exception) e; } else { errors = new DbException(e); } } } } } } if (errors != null) { if (errors instanceof RuntimeException) { throw (RuntimeException) errors; } else { throw (DbException) errors; } } } /** * Check if EOS is set. * * This method is non-blocking. * * @return if the Operator is at EOS (End of Stream) * */ public final boolean eos() { return eos; } /** * @return if the operator received an EOI (End of Iteration) */ public final boolean eoi() { return eoi; } /** * @return return the children Operators of this operator. If there is only one child, return an array of only one * element. For join operators, the order of the children is not important. But they should be consistent * among multiple calls. */ public abstract Operator[] getChildren(); /** * process EOS and EOI logic. */ protected void checkEOSAndEOI() { // this is the implementation for ordinary operators, e.g. join, project. // some operators have their own logics, e.g. LeafOperator, IDBController. // so they should override this function Operator[] children = getChildren(); childrenEOI = getChildrenEOI(); boolean hasEOI = false; if (children.length > 0) { boolean allEOS = true; int count = 0; for (int i = 0; i < children.length; ++i) { if (children[i].eos()) { childrenEOI[i] = true; } else { allEOS = false; if (children[i].eoi()) { hasEOI = true; childrenEOI[i] = true; children[i].setEOI(false); } } if (childrenEOI[i]) { count++; } } if (allEOS) { setEOS(); } if (count == children.length && hasEOI) { // only emit EOI if it actually received at least one EOI eoi = true; Arrays.fill(childrenEOI, false); } } } /** * Check if currently there's any TupleBatch available for pull. * * This method is non-blocking. * * If the thread is interrupted during the processing of nextReady, the interrupt status will be kept. * * @throws DbException if any problem * * @return if currently there's output for pulling. * */ public final TupleBatch nextReady() throws DbException { if (!open) { throw new DbException("Operator not yet open"); } if (eos() || eoi()) { return null; } if (Thread.interrupted()) { Thread.currentThread().interrupt(); return null; } long startTime = -1; if (getProfilingMode().contains(ProfilingMode.QUERY)) { startTime = profilingLogger.getTime(this); } TupleBatch result = null; try { do { result = fetchNextReady(); // XXX while or not while? For a single thread operator, while sounds more efficient // generally } while (result != null && result.numTuples() <= 0); } catch (RuntimeException | DbException e) { throw e; } catch (Exception e) { throw new DbException(e); } if (getProfilingMode().contains(ProfilingMode.QUERY)) { int numberOfTupleReturned = -1; if (result != null) { numberOfTupleReturned = result.numTuples(); } profilingLogger.recordEvent(this, numberOfTupleReturned, startTime); } if (result == null) { checkEOSAndEOI(); } else { numOutputTBs++; numOutputTuples += result.numTuples(); } return result; } /** * A simple statistic. The number of output tuples generated by this Operator. */ private long numOutputTuples; /** * @return the number of output tuples. */ public final long getNumOutputTuples() { return numOutputTuples; } /** * A simple statistic. The number of output TBs generated by this Operator. */ private long numOutputTBs; /** * open the operator and do initializations. * * @param execEnvVars the environment variables of the execution unit. * * @throws DbException if any error occurs */ public final void open(final Map<String, Object> execEnvVars) throws DbException { // open the children first if (open) { // XXX Do some error handling to multi-open? throw new DbException("Operator (opName=" + getOpName() + ") already open."); } if (execEnvVars == null) { this.execEnvVars = null; } else { this.execEnvVars = ImmutableMap.copyOf(execEnvVars); } final Operator[] children = getChildren(); if (children != null) { for (final Operator child : children) { if (child != null) { child.open(execEnvVars); } } } eos = false; eoi = false; numOutputTBs = 0; numOutputTuples = 0; // do my initialization try { init(this.execEnvVars); } catch (DbException | RuntimeException e) { throw e; } catch (Exception e) { throw new DbException(e); } open = true; if (getProfilingMode().size() > 0) { if (getLocalSubQuery() instanceof WorkerSubQuery) { profilingLogger = ((WorkerSubQuery) getLocalSubQuery()).getWorker().getProfilingLogger(); } } } /** * Mark the end of an iteration. * * @param eoi the new value of eoi. */ public final void setEOI(final boolean eoi) { this.eoi = eoi; } /** * @return true if this operator is open. */ public final boolean isOpen() { return open; } /** * Do the initialization of this operator. * * @param execEnvVars execution environment variables * @throws Exception if any error occurs */ protected void init(final ImmutableMap<String, Object> execEnvVars) throws Exception {}; /** * Do the clean up, release resources. * * @throws Exception if any error occurs */ protected void cleanup() throws Exception {}; /** * Generate next output TupleBatch if possible. Return null immediately if currently no output can be generated. * * Do not block the execution thread in this method, including sleep, wait on locks, etc. * * @throws Exception if any error occurs * * @return next ready output TupleBatch. null if either EOS or no output TupleBatch can be generated currently. */ protected abstract TupleBatch fetchNextReady() throws Exception; /** * Explicitly set EOS for this operator. * * Operators should not be able to unset an already set EOS except reopen it. */ protected final void setEOS() { if (eos()) { return; } eos = true; } /** * Attempt to produce the {@link Schema} of the tuples generated by this operator. This function must handle cases * like <code>null</code> children or arguments, and return <code>null</code> if there is not enough information to * produce the schema. * * @return the {@link Schema} of the tuples generated by this operator, or <code>null</code> if the operator does not * yet have enough information to generate the schema. */ protected abstract Schema generateSchema(); /** * @return return the Schema of the output tuples of this operator. * */ public final Schema getSchema() { if (schema == null) { schema = generateSchema(); } return schema; } /** * This method is blocking. * * @param children the Operators which are to be set as the children(child) of this operator. */ public abstract void setChildren(Operator[] children); /** * Store if the children have meet EOI. */ private boolean[] childrenEOI = null; /** * @return children EOI status. */ protected final boolean[] getChildrenEOI() { if (childrenEOI == null) { // getChildren() == null indicates a leaf operator, which has its own checkEOSAndEOI() childrenEOI = new boolean[getChildren().length]; } return childrenEOI; } /** * For use in leaf operators. */ protected static final Operator[] NO_CHILDREN = new Operator[] {}; /** * set op name. * * @param name op name */ public void setOpName(final String name) { opName = name; } /** * @param opId the opId to set */ public void setOpId(final int opId) { this.opId = opId; } /** * get op name. * * @return op name */ public String getOpName() { return opName; } /** * @return The id of the node (worker or master) that is running this operator. */ protected int getNodeID() { return (Integer) execEnvVars.get(MyriaConstants.EXEC_ENV_VAR_NODE_ID); } /** * Get the unique operator id. * * @return the op id */ @Nullable public Integer getOpId() { return opId; } }