/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.job.runner;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.datacleaner.api.Concurrent;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputRowCollector;
import org.datacleaner.api.Transformer;
import org.datacleaner.data.TransformedInputRow;
import org.datacleaner.descriptors.ProvidedPropertyDescriptor;
import org.datacleaner.job.FilterOutcomes;
import org.datacleaner.job.TransformerJob;
import org.datacleaner.job.concurrent.ThreadLocalOutputRowCollector;
import org.datacleaner.job.concurrent.ThreadLocalOutputRowCollector.Listener;
/**
* {@link RowProcessingConsumer} implementation for {@link Transformer}s.
*/
final class TransformerConsumer extends AbstractRowProcessingConsumer implements RowProcessingConsumer {
private final Transformer _transformer;
private final TransformerJob _transformerJob;
private final InputColumn<?>[] _inputColumns;
private final boolean _concurrent;
private final Set<ProvidedPropertyDescriptor> _outputRowCollectorProperties;
private RowIdGenerator _idGenerator;
public TransformerConsumer(final Transformer transformer, final TransformerJob transformerJob,
final InputColumn<?>[] inputColumns, final RowProcessingPublisher publisher) {
super(publisher, transformerJob, transformerJob);
_transformer = transformer;
_transformerJob = transformerJob;
_inputColumns = inputColumns;
_concurrent = determineConcurrent();
_outputRowCollectorProperties =
_transformerJob.getDescriptor().getProvidedPropertiesByType(OutputRowCollector.class);
}
private boolean determineConcurrent() {
final Concurrent concurrent = _transformerJob.getDescriptor().getAnnotation(Concurrent.class);
if (concurrent == null) {
// transformers are by default concurrent
return true;
}
return concurrent.value();
}
/**
* Sets the row id generator to use, when creating new transformed records.
*
* @param idGenerator
*/
public void setRowIdGenerator(final RowIdGenerator idGenerator) {
_idGenerator = idGenerator;
}
@Override
public boolean isConcurrent() {
return _concurrent;
}
@Override
public InputColumn<?>[] getRequiredInput() {
return _inputColumns;
}
@Override
public Transformer getComponent() {
return _transformer;
}
@Override
public InputColumn<?>[] getOutputColumns() {
return _transformerJob.getOutput();
}
@Override
public void consumeInternal(final InputRow row, final int distinctCount, final FilterOutcomes outcomes,
final RowProcessingChain chain) {
final InputColumn<?>[] outputColumns = getOutputColumns();
registerListener(_transformer, row, outcomes, chain, outputColumns);
try {
final Object[] values = _transformer.transform(row);
if (values == null) {
return;
}
final TransformedInputRow resultRow = TransformedInputRow.of(row);
addValuesToRow(resultRow, outputColumns, values);
chain.processNext(resultRow, distinctCount, outcomes);
} finally {
unregisterListener(_transformer);
}
}
private void unregisterListener(final Transformer transformer) {
for (final ProvidedPropertyDescriptor descriptor : _outputRowCollectorProperties) {
final OutputRowCollector outputRowCollector = (OutputRowCollector) descriptor.getValue(transformer);
if (outputRowCollector instanceof ThreadLocalOutputRowCollector) {
((ThreadLocalOutputRowCollector) outputRowCollector).removeListener();
}
}
}
private void registerListener(final Transformer transformer, final InputRow row, final FilterOutcomes outcomes,
final RowProcessingChain chain, final InputColumn<?>[] outputColumns) {
if (_outputRowCollectorProperties.isEmpty()) {
return;
}
final Listener listener = new Listener() {
private AtomicInteger recordNumber = new AtomicInteger(0);
@Override
public void onValues(final Object[] values) {
final int recordNo = recordNumber.incrementAndGet();
final boolean isFirst = recordNo == 1;
final TransformedInputRow resultRow;
if (isFirst) {
// retain the first record's id
resultRow = TransformedInputRow.of(row);
} else {
resultRow = new TransformedInputRow(row, getNextVirtualRowId(row, recordNo));
}
addValuesToRow(resultRow, outputColumns, values);
final FilterOutcomes clonedOutcomeSink = outcomes.clone();
chain.processNext(resultRow, 1, clonedOutcomeSink);
}
};
for (final ProvidedPropertyDescriptor descriptor : _outputRowCollectorProperties) {
final OutputRowCollector outputRowCollector = (OutputRowCollector) descriptor.getValue(transformer);
if (outputRowCollector instanceof ThreadLocalOutputRowCollector) {
((ThreadLocalOutputRowCollector) outputRowCollector).setListener(listener);
} else {
throw new UnsupportedOperationException("Unsupported output row collector type: " + outputRowCollector);
}
}
}
private long getNextVirtualRowId(final InputRow row, final int recordNo) {
if (_idGenerator == null) {
// this can more or less never happen, except in test cases or in
// cases where the consumers are programmatically being used outside
// of an AnalysisRunner. There's a risk then here that we get the
// same row ID twice, but that's life :-P
final long offset = Long.MAX_VALUE;
final long hiLoIntervalOffset = row.getId() * 10000;
return offset - hiLoIntervalOffset + recordNo;
}
return _idGenerator.nextVirtualRowId();
}
private void addValuesToRow(final TransformedInputRow resultRow, final InputColumn<?>[] outputColumns,
final Object[] values) {
assert outputColumns.length == values.length;
// add output values to row.
for (int i = 0; i < outputColumns.length; i++) {
final Object value;
if (i < values.length) {
value = values[i];
} else {
value = null;
}
final InputColumn<?> column = outputColumns[i];
resultRow.addValue(column, value);
}
}
@Override
public TransformerJob getComponentJob() {
return _transformerJob;
}
@Override
public String toString() {
return "TransformerConsumer[" + _transformer + "]";
}
}