/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.api;
import org.apache.metamodel.query.Query;
/**
* Interface for components that produce data streams as an output of their
* work.
*
* Each output data stream has a {@link OutputDataStream} that describe the
* metadata and structure of the output data stream.
*
* For each output data stream that is relevant (consumed by one or more
* components) the
* {@link #initializeOutputDataStream(OutputDataStream, Query, OutputRowCollector)}
* method is invoked at initialization time of this component.
*/
public interface HasOutputDataStreams {
/**
* Gets the {@link OutputDataStream}s that this component can produce.
*
* @return
*/
OutputDataStream[] getOutputDataStreams();
/**
* Method invoked for each {@link OutputDataStream} that is consumed. The
* method is invoked after validation time (see {@link Validate} ) and
* before initialization time (see {@link Initialize}) of the component. The
* method passes on an {@link OutputRowCollector} which makes it possible
* for this component to post records into the output data stream.
*
* If a particular {@link OutputDataStream} is NOT consumed by any following
* components then this method will not be called.
*
* @param outputDataStream
* @param query
* the query posted towards the {@link OutputDataStream}. In most
* cases this will be a plain "SELECT * FROM table" query, but if
* {@link OutputDataStream#getPerformanceCharacteristics()}
* indicates that query optimization is possible, then the query
* may be adapted.
* @param outputRowCollector
* an {@link OutputRowCollector} which the component should use
* to post records into the output stream.
*/
void initializeOutputDataStream(OutputDataStream outputDataStream, Query query,
OutputRowCollector outputRowCollector);
}