/*
* Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
*
* Project and contact information: http://www.cascading.org/
*
* This file is part of the Cascading project.
*
* Cascading is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Cascading is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Cascading. If not, see <http://www.gnu.org/licenses/>.
*/
package cascading.operation;
import cascading.flow.FlowProcess;
import cascading.tuple.Fields;
/**
* Interface Operation is the base interface for all functions applied to {@link cascading.tuple.Tuple} streams.
* <p/>
* Specifically {@link Function}, {@link Filter}, {@link Aggregator}, {@link Buffer}, and {@link Assertion}.
* <p/>
* Use {@link BaseOperation} for a convenient way to create new Operation types.
*
* @see cascading.operation.BaseOperation
* @see Function
* @see Filter
* @see Aggregator
* @see Buffer
* @see Assertion
*/
public interface Operation<C>
{
/** Field ANY denotes that a given Operation will take any number of argument values */
int ANY = Integer.MAX_VALUE;
/**
* The prepare method is called immediately before the current Operation instance is put into play processing Tuples.
* This method should initialize any resources that can be shutdown or released in the
* {@link #cleanup(cascading.flow.FlowProcess, OperationCall)} method.
* <p/>
* This method may be called more than once during the life of this instance. But it will never be called multiple times
* without a cleanup invocation immediately before subsequent invocations.
* <p/>
* If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
* cluster side, not client side.
*
* @param flowProcess
* @param operationCall
*/
void prepare( FlowProcess flowProcess, OperationCall<C> operationCall );
/**
* The cleanup method is called immediately after the current Operation instance is taken out of play processing Tuples.
* This method should shutdown any resources created or initialized during the
* {@link #prepare(cascading.flow.FlowProcess, OperationCall)} method.
* <p/>
* This method may be called more than once during the life of this instance. But it will never be called multiple times
* without a prepare invocation before.
* <p/>
* If the Flow this Operation instance belongs will execute on a remote cluster, this method will be called
* cluster side, not client side.
*
* @param flowProcess
* @param operationCall
*/
void cleanup( FlowProcess flowProcess, OperationCall<C> operationCall );
/**
* Returns the fields created by this Operation instance. If this instance is a {@link Filter}, it should always
* return {@link Fields#ALL}.
*
* @return a Fields instance
*/
Fields getFieldDeclaration();
/**
* The minimum number of arguments this Operation expects from the calling {@link cascading.pipe.Each} or
* {@link cascading.pipe.Every} Operator.
* <p/>
* Operations should be willing to receive more arguments than expected, but should ignore them if they are unused,
* instead of failing.
*
* @return an int
*/
int getNumArgs();
/**
* Returns {@code true} if this Operation instance can safely execute on the same 'record' multiple
* times, {@code false} otherwise.
* <p/>
* That is, this Operation is safe if it has no side-effects, or if it does, they are idempotent.
* <p/>
* If seeing the same 'record' more than once can cause errors (internally or externally),
* this method must return {@code false}.
*
* @return a boolean
*/
boolean isSafe();
}