/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.components.fuse;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import javax.inject.Named;
import org.datacleaner.api.Alias;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.Initialize;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Transformer;
import org.datacleaner.api.Validate;
import org.datacleaner.components.categories.CompositionCategory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Named("Fuse / Coalesce fields")
@Alias("Coalesce multiple fields")
@Description("Lets you combine multiple fields into one, selecting the first value that is non-null.\n\n"
+ "Use it to fuse data streams coming from different filter requirements. You can define new fields whose "
+ "values represent whatever is available from one of the input streams.\n\n"
+ "Or use it to identify the most accurate or most recent observation, "
+ "if multiple entries have been recorded in separate columns.")
@Categorized(CompositionCategory.class)
public class CoalesceMultipleFieldsTransformer implements Transformer {
public static final String PROPERTY_UNITS = "Units";
private static final Logger logger = LoggerFactory.getLogger(CoalesceMultipleFieldsTransformer.class);
@Configured
InputColumn<?>[] _input;
@Configured(value = PROPERTY_UNITS)
CoalesceUnit[] _units;
@Configured
@Description("Consider empty strings (\"\") as null also?")
boolean considerEmptyStringAsNull = true;
private CoalesceFunction _coalesceFunction;
private CoalesceUnit[] _initializedUnits;
public CoalesceMultipleFieldsTransformer() {
}
public CoalesceMultipleFieldsTransformer(final CoalesceUnit... units) {
this();
this._units = units;
}
@Initialize
public void init() {
_coalesceFunction = new CoalesceFunction(considerEmptyStringAsNull);
_initializedUnits = new CoalesceUnit[_units.length];
for (int i = 0; i < _units.length; i++) {
_initializedUnits[i] = _units[i].updateInputColumns(_input);
}
}
@Validate
public void validate() {
for (final CoalesceUnit unit : _units) {
// Ensure that initialization is actually possible.
unit.updateInputColumns(_input);
}
}
/**
* Configures the transformer using the coalesce units provided
*
* @param units
*/
public void configureUsingCoalesceUnits(final CoalesceUnit... units) {
final List<InputColumn<?>> input = new ArrayList<>();
for (final CoalesceUnit coalesceUnit : units) {
final InputColumn<?>[] inputColumns = coalesceUnit.getInputColumns();
Collections.addAll(input, inputColumns);
}
_input = input.toArray(new InputColumn[input.size()]);
_units = units;
}
@Override
public OutputColumns getOutputColumns() {
final OutputColumns outputColumns = new OutputColumns(_units.length, Object.class);
for (int i = 0; i < _units.length; i++) {
// Not necessarily initialized yet, so no _initializedUnits available
final CoalesceUnit unit = _units[i].updateInputColumns(_input);
final Class<?> dataType = unit.getOutputDataType();
outputColumns.setColumnType(i, dataType);
}
return outputColumns;
}
@Override
public Object[] transform(final InputRow inputRow) {
final Object[] result = new Object[_initializedUnits.length];
for (int i = 0; i < _initializedUnits.length; i++) {
final CoalesceUnit unit = _initializedUnits[i];
final InputColumn<?>[] inputColumns = unit.getInputColumns();
final List<Object> values = inputRow.getValues(inputColumns);
final Object value = _coalesceFunction.coalesce(values);
result[i] = value;
}
if (logger.isDebugEnabled()) {
logger.debug("Coalesced values for row {}: {}", inputRow.getId(), Arrays.toString(result));
}
return result;
}
}