/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.job.builder;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.MutableColumn;
import org.apache.metamodel.schema.MutableTable;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.EqualsBuilder;
import org.apache.metamodel.util.HasNameMapper;
import org.datacleaner.api.Analyzer;
import org.datacleaner.api.Component;
import org.datacleaner.api.HasDistributionAdvice;
import org.datacleaner.api.HasOutputDataStreams;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.MappedProperty;
import org.datacleaner.api.OutputDataStream;
import org.datacleaner.api.Renderable;
import org.datacleaner.configuration.DataCleanerConfiguration;
import org.datacleaner.configuration.InjectionManager;
import org.datacleaner.connection.OutputDataStreamDatastore;
import org.datacleaner.data.MetaModelInputColumn;
import org.datacleaner.data.TransformedInputColumn;
import org.datacleaner.descriptors.AnalyzerDescriptor;
import org.datacleaner.descriptors.ComponentDescriptor;
import org.datacleaner.descriptors.ConfiguredPropertyDescriptor;
import org.datacleaner.descriptors.RemoteDescriptorProvider;
import org.datacleaner.descriptors.RemoteTransformerDescriptor;
import org.datacleaner.job.AnalysisJob;
import org.datacleaner.job.ComponentConfiguration;
import org.datacleaner.job.ComponentRequirement;
import org.datacleaner.job.ComponentValidationException;
import org.datacleaner.job.FilterOutcome;
import org.datacleaner.job.HasComponentRequirement;
import org.datacleaner.job.HasFilterOutcomes;
import org.datacleaner.job.ImmutableComponentConfiguration;
import org.datacleaner.job.OutputDataStreamJob;
import org.datacleaner.job.SimpleComponentRequirement;
import org.datacleaner.lifecycle.LifeCycleHelper;
import org.datacleaner.util.CollectionUtils2;
import org.datacleaner.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Abstract {@link ComponentBuilder} for components of a {@link AnalysisJob}.
*
* @param <D>
* the component descriptor type (for instance
* {@link AnalyzerDescriptor})
* @param <E>
* the actual component type (for instance {@link Analyzer})
* @param <B>
* the concrete {@link ComponentBuilder} (for instance
* {@link AnalyzerComponentBuilder})
*/
@SuppressWarnings("unchecked")
public abstract class AbstractComponentBuilder<D extends ComponentDescriptor<E>, E extends Component, B extends ComponentBuilder>
implements ComponentBuilder, Renderable {
private static final Logger logger = LoggerFactory.getLogger(AbstractComponentBuilder.class);
private final List<ComponentRemovalListener<ComponentBuilder>> _removalListeners;
private final List<OutputDataStream> _outputDataStreams = new ArrayList<>();
private final Map<OutputDataStream, AnalysisJobBuilder> _outputDataStreamJobs = new HashMap<>();
private final D _descriptor;
private final E _configurableBean;
private final Map<String, String> _metadataProperties;
private AnalysisJobBuilder _analysisJobBuilder;
private ComponentRequirement _componentRequirement;
private String _name;
public AbstractComponentBuilder(final AnalysisJobBuilder analysisJobBuilder, final D descriptor,
final Class<?> builderClass) {
if (analysisJobBuilder == null) {
throw new IllegalArgumentException("analysisJobBuilder cannot be null");
}
if (descriptor == null) {
throw new IllegalArgumentException("descriptor cannot be null");
}
if (builderClass == null) {
throw new IllegalArgumentException("builderClass cannot be null");
}
_analysisJobBuilder = analysisJobBuilder;
_descriptor = descriptor;
if (!ReflectionUtils.is(getClass(), builderClass)) {
throw new IllegalArgumentException("Builder class does not correspond to actual class of builder");
}
_configurableBean = _descriptor.newInstance();
_metadataProperties = new LinkedHashMap<>();
initMetadataProperties();
_removalListeners = new ArrayList<>(1);
}
private static String getKey(final Object object) {
if (object instanceof InputColumn<?>) {
final InputColumn<?> inputColumn = (InputColumn<?>) object;
if (inputColumn.isVirtualColumn()) {
return inputColumn.getName();
}
}
return String.valueOf(object.hashCode());
}
private static <E> E[] getArray(final Class<E> clazz, final List<?> baseList) {
final E[] result = (E[]) Array.newInstance(clazz, baseList.size());
for (int i = 0; i < baseList.size(); i++) {
Array.set(result, i, (E) baseList.get(i));
}
return (E[]) result;
}
private void initMetadataProperties() {
if (_descriptor instanceof RemoteTransformerDescriptor) {
final RemoteDescriptorProvider remoteDescriptorProvider =
((RemoteTransformerDescriptor<?>) _descriptor).getRemoteDescriptorProvider();
final String source = remoteDescriptorProvider.getServerData().getServerName();
_metadataProperties.put("source", source);
}
}
/**
* Gets metadata properties as a map.
*
* @return
*/
@Override
public final Map<String, String> getMetadataProperties() {
return _metadataProperties;
}
@Override
public void setMetadataProperties(final Map<String, String> metadataProperties) {
_metadataProperties.clear();
initMetadataProperties();
if (metadataProperties != null) {
_metadataProperties.putAll(metadataProperties);
}
}
/**
* Gets a metadata property
*
* @param key
* @return
*/
@Override
public final String getMetadataProperty(final String key) {
return _metadataProperties.get(key);
}
/**
* Sets a metadata property
*
* @param key
* @param value
*/
@Override
public final void setMetadataProperty(final String key, final String value) {
_metadataProperties.put(key, value);
}
/**
* Removes/clears a metadata property
*
* @param key
*/
@Override
public final void removeMetadataProperty(final String key) {
_metadataProperties.remove(key);
}
@Override
public final AnalysisJobBuilder getAnalysisJobBuilder() {
return _analysisJobBuilder;
}
@Override
public void setAnalysisJobBuilder(final AnalysisJobBuilder analysisJobBuilder) {
_analysisJobBuilder = analysisJobBuilder;
}
@Override
public final D getDescriptor() {
return _descriptor;
}
@Override
public final E getComponentInstance() {
return _configurableBean;
}
/**
* @deprecated use {@link #getComponentInstance()} instead.
*/
@Deprecated
public final E getConfigurableBean() {
return getComponentInstance();
}
@Override
public void setConfiguredProperties(final Map<ConfiguredPropertyDescriptor, Object> configuredProperties) {
boolean changed = false;
for (final Map.Entry<ConfiguredPropertyDescriptor, Object> entry : configuredProperties.entrySet()) {
changed = setConfiguredPropertyIfChanged(entry.getKey(), entry.getValue()) || changed;
}
if (changed) {
onConfigurationChanged();
}
}
@Override
public final boolean isConfigured(final boolean throwException)
throws ComponentValidationException, UnconfiguredConfiguredPropertyException {
for (final ConfiguredPropertyDescriptor configuredProperty : _descriptor.getConfiguredProperties()) {
if (!isConfigured(configuredProperty, throwException)) {
if (throwException) {
throw new UnconfiguredConfiguredPropertyException(this, configuredProperty);
} else {
return false;
}
}
}
try {
final LifeCycleHelper lifeCycleHelper =
new LifeCycleHelper(_analysisJobBuilder.getConfiguration(), null, false);
lifeCycleHelper.validate(getDescriptor(), getComponentInstance());
} catch (final RuntimeException e) {
if (throwException) {
throw e;
} else {
return false;
}
}
return true;
}
@Override
public String getName() {
return _name;
}
@Override
public void setName(final String name) {
_name = name;
}
@Override
public boolean isConfigured() {
return isConfigured(false);
}
@Override
public boolean isDistributable() {
if (getDescriptor().isDistributable()) {
final Component component = getComponentInstanceForQuestioning();
if (component instanceof HasDistributionAdvice) {
return ((HasDistributionAdvice) component).isDistributable();
}
return true;
}
return false;
}
@Override
public boolean isConfigured(final ConfiguredPropertyDescriptor configuredProperty, final boolean throwException)
throws UnconfiguredConfiguredPropertyException {
if (configuredProperty.isRequired()) {
final Map<ConfiguredPropertyDescriptor, Object> configuredProperties = getConfiguredProperties();
Object value = configuredProperties.get(configuredProperty);
if (configuredProperty.isArray() && value != null) {
if (Array.getLength(value) == 0) {
value = null;
}
}
if (value == null) {
if (throwException) {
throw new UnconfiguredConfiguredPropertyException(this, configuredProperty);
} else {
logger.debug("Configured property is not set: " + configuredProperty);
return false;
}
}
}
return true;
}
@Override
public B setConfiguredProperty(final String configuredName, final Object value) {
final ConfiguredPropertyDescriptor configuredProperty = _descriptor.getConfiguredProperty(configuredName);
if (configuredProperty == null) {
throw new IllegalArgumentException("No such configured property: " + configuredName);
}
return setConfiguredProperty(configuredProperty, value);
}
@Override
public B setConfiguredProperty(final ConfiguredPropertyDescriptor configuredProperty, final Object value) {
final boolean changed = setConfiguredPropertyIfChanged(configuredProperty, value);
if (changed) {
if (configuredProperty.isInputColumn()) {
registerListenerIfLinkedToTransformer(configuredProperty, value);
}
onConfigurationChanged();
}
return (B) this;
}
protected void registerListenerIfLinkedToTransformer(final ConfiguredPropertyDescriptor configuredProperty,
final Object value) {
// Register change listener on all transformers providing values used for the input column.
getTransformedInputColumns(value).forEach(
transformedInputColumn -> getAnalysisJobBuilder().getTransformerComponentBuilders().stream()
.filter(transformer -> (isProvidingColumn(transformedInputColumn, transformer))).forEach(
transformer -> transformer.addChangeListener(
new ComponentBuilderTransformerChangeListener(this, configuredProperty))));
}
protected boolean isProvidingColumn(final TransformedInputColumn<?> transformedInputColumn,
final TransformerComponentBuilder<?> transformer) {
for (final Object outputColumn : transformer.getOutputColumns()) {
if (outputColumn.equals(transformedInputColumn)) {
return true;
}
}
return false;
}
private List<TransformedInputColumn<?>> getTransformedInputColumns(final Object value) {
final List<TransformedInputColumn<?>> transformedInputColumns = new ArrayList<>();
if (value != null) {
if (value.getClass().isArray()) {
for (int i = 0; i < Array.getLength(value); i++) {
final Object valuePart = Array.get(value, i);
if (valuePart != null && ReflectionUtils.is(valuePart.getClass(), TransformedInputColumn.class)) {
transformedInputColumns.add((TransformedInputColumn<?>) valuePart);
}
}
} else if (ReflectionUtils.is(value.getClass(), TransformedInputColumn.class)) {
transformedInputColumns.add((TransformedInputColumn<?>) value);
}
}
return transformedInputColumns;
}
/**
* Sets a configured property if it has changed.
*
* Note that this method is for internal use. It does not invoke
* {@link #onConfigurationChanged()} even if changes happen. The reason for
* this is to allow code reuse and avoid chatty use of the notification
* method.
*
* @param configuredProperty
* @param value
* @return true if the value was changed or false if it was not
*/
protected boolean setConfiguredPropertyIfChanged(final ConfiguredPropertyDescriptor configuredProperty,
final Object value) {
if (configuredProperty == null) {
throw new IllegalArgumentException("configuredProperty cannot be null");
}
final Object currentValue = configuredProperty.getValue(_configurableBean);
if (EqualsBuilder.equals(currentValue, value)) {
// no change
return false;
}
if (value != null) {
boolean correctType = true;
if (configuredProperty.isArray()) {
if (value.getClass().isArray()) {
final int length = Array.getLength(value);
for (int i = 0; i < length; i++) {
final Object valuePart = Array.get(value, i);
if (valuePart == null) {
logger.warn("Element no. {} in array (size {}) is null! Value passed to {}",
new Object[] { i, length, configuredProperty });
} else {
if (!ReflectionUtils.is(valuePart.getClass(), configuredProperty.getBaseType())) {
correctType = false;
}
}
}
} else {
if (!ReflectionUtils.is(value.getClass(), configuredProperty.getBaseType())) {
correctType = false;
}
}
} else {
if (!ReflectionUtils.is(value.getClass(), configuredProperty.getBaseType())) {
correctType = false;
}
}
if (!correctType) {
throw new IllegalArgumentException(
"Invalid value type: " + value.getClass().getName() + ", expected: " + configuredProperty
.getBaseType().getName());
}
}
synchronizeDependentProperties(configuredProperty, value, currentValue);
configuredProperty.setValue(_configurableBean, value);
return true;
}
private void synchronizeDependentProperties(final ConfiguredPropertyDescriptor property, final Object newValue,
final Object currentValue) {
if (currentValue != null) {
getDescriptor().getConfiguredPropertiesByAnnotation(MappedProperty.class).stream()
.filter(dependentProperty -> property.getName()
.equals(dependentProperty.getAnnotation(MappedProperty.class).value()))
.forEach(dependentProperty -> doSynchronizeProperties(newValue, currentValue, dependentProperty));
}
}
private void doSynchronizeProperties(final Object newValue, final Object currentValue,
final ConfiguredPropertyDescriptor property) {
// In case the new value no longer contains everything in the original value,
// the values in the dependent property referring to the removed values need
// to be removed too.
final Object dependentValue = property.getValue(_configurableBean);
if (dependentValue != null) {
// First build a list containing value and references tuples.
final Map<String, Object> originalMappings = new HashMap<>();
final List<Object> synchronizedDependents = new ArrayList<>();
if (currentValue.getClass().isArray()) {
for (int i = 0; i < Array.getLength(currentValue); i++) {
originalMappings.put(getKey(Array.get(currentValue, i)), Array.get(dependentValue, i));
}
for (int i = 0; i < Array.getLength(newValue); i++) {
synchronizedDependents.add(originalMappings.get(getKey(Array.get(newValue, i))));
}
property.setValue(_configurableBean, getArray(property.getBaseType(), synchronizedDependents));
} else {
if (newValue == null) {
property.setValue(_configurableBean, null);
}
}
}
}
@Override
public Map<ConfiguredPropertyDescriptor, Object> getConfiguredProperties() {
final Map<ConfiguredPropertyDescriptor, Object> map = new HashMap<>();
final Set<ConfiguredPropertyDescriptor> configuredProperties = getDescriptor().getConfiguredProperties();
for (final ConfiguredPropertyDescriptor propertyDescriptor : configuredProperties) {
final Object value = getConfiguredProperty(propertyDescriptor);
if (value != null) {
map.put(propertyDescriptor, value);
}
}
return Collections.unmodifiableMap(map);
}
@Override
public void setConfiguredProperties(final ComponentConfiguration configuration) {
boolean changed = false;
final Set<ConfiguredPropertyDescriptor> properties = getDescriptor().getConfiguredProperties();
for (final ConfiguredPropertyDescriptor property : properties) {
final Object value = configuration.getProperty(property);
final boolean changedValue = setConfiguredPropertyIfChanged(property, value);
if (changedValue) {
changed = true;
}
}
if (changed) {
onConfigurationChanged();
}
}
/**
* method that can be used by sub-classes to add callback logic when the
* requirement of the bean changes
*/
public void onRequirementChanged() {
}
/**
* method that can be used by sub-classes to add callback logic when the
* configuration of the bean changes
*/
public void onConfigurationChanged() {
}
@Override
public Object getConfiguredProperty(final ConfiguredPropertyDescriptor propertyDescriptor) {
return propertyDescriptor.getValue(getConfigurableBean());
}
/**
* Removes/clears all input columns
*/
@Override
public void clearInputColumns() {
final Set<ConfiguredPropertyDescriptor> configuredProperties =
getDescriptor().getConfiguredPropertiesForInput();
for (final ConfiguredPropertyDescriptor configuredProperty : configuredProperties) {
if (configuredProperty.isArray()) {
setConfiguredProperty(configuredProperty, new InputColumn[0]);
} else {
setConfiguredProperty(configuredProperty, null);
}
}
}
/**
*
* @param inputColumn
* @throws IllegalArgumentException
* if the input column data type family doesn't match the types
* accepted by this transformer.
*/
@Override
public B addInputColumn(final InputColumn<?> inputColumn) throws IllegalArgumentException {
final ConfiguredPropertyDescriptor propertyDescriptor = getDefaultConfiguredPropertyForInput();
return addInputColumn(inputColumn, propertyDescriptor);
}
@Override
public ConfiguredPropertyDescriptor getDefaultConfiguredPropertyForInput() throws UnsupportedOperationException {
Collection<ConfiguredPropertyDescriptor> inputProperties =
getDescriptor().getConfiguredPropertiesForInput(false);
if (inputProperties.isEmpty()) {
// if there are no required input columns, try optional input
// columns
inputProperties = getDescriptor().getConfiguredPropertiesForInput(true);
}
if (inputProperties.size() == 1) {
return inputProperties.iterator().next();
} else {
throw new UnsupportedOperationException(
"There are " + inputProperties.size() + " named input columns in \"" + getDescriptor()
.getDisplayName() + "\", please specify which one to configure");
}
}
// this is the main "addInputColumn" method that the other similar methods
// delegate to
@Override
public B addInputColumn(final InputColumn<?> inputColumn, final ConfiguredPropertyDescriptor propertyDescriptor)
throws IllegalArgumentException {
if (propertyDescriptor == null || !propertyDescriptor.isInputColumn()) {
throw new IllegalArgumentException("Property is not of InputColumn type: " + propertyDescriptor);
}
final Class<?> expectedDataType = propertyDescriptor.getTypeArgument(0);
if (expectedDataType != null && expectedDataType != Object.class) {
// check input column type parameter compatibility
final Class<?> actualDataType = inputColumn.getDataType();
if (!ReflectionUtils.is(actualDataType, expectedDataType, false)) {
throw new IllegalArgumentException(
"Unsupported InputColumn type: " + actualDataType + ", expected: " + expectedDataType);
}
}
Object inputColumns = getConfiguredProperty(propertyDescriptor);
if (inputColumns == null) {
if (propertyDescriptor.isArray()) {
inputColumns = new InputColumn[] { inputColumn };
} else {
inputColumns = inputColumn;
}
} else {
inputColumns = CollectionUtils2.array(InputColumn.class, inputColumns, inputColumn);
}
setConfiguredProperty(propertyDescriptor, inputColumns);
return (B) this;
}
// this is the main "addInputColumns" method that the other similar methods
// delegate to
@Override
public B addInputColumns(final Collection<? extends InputColumn<?>> inputColumns,
final ConfiguredPropertyDescriptor propertyDescriptor) {
if (propertyDescriptor == null || !propertyDescriptor.isInputColumn()) {
throw new IllegalArgumentException("Property is not of InputColumn type: " + propertyDescriptor);
}
final Class<?> expectedDataType = propertyDescriptor.getTypeArgument(0);
if (expectedDataType != null && expectedDataType != Object.class) {
// check input column type parameter compatibility
for (final InputColumn<?> inputColumn : inputColumns) {
final Class<?> actualDataType = inputColumn.getDataType();
if (!ReflectionUtils.is(actualDataType, expectedDataType, false)) {
throw new IllegalArgumentException(
"Unsupported InputColumn type: " + actualDataType + ", expected: " + expectedDataType);
}
}
}
Object newInputColumns = getConfiguredProperty(propertyDescriptor);
if (newInputColumns == null) {
if (propertyDescriptor.isArray()) {
newInputColumns = inputColumns.toArray(new InputColumn[inputColumns.size()]);
} else {
if (inputColumns == null || inputColumns.isEmpty()) {
newInputColumns = null;
} else if (inputColumns.size() > 1) {
throw new IllegalArgumentException(
"Property type is a single InputColumn, but a collection of more than one element was given");
} else {
newInputColumns = inputColumns.iterator().next();
}
}
} else {
final InputColumn<?>[] asArray = inputColumns.toArray(new InputColumn[inputColumns.size()]);
newInputColumns = CollectionUtils2.array(InputColumn.class, newInputColumns, asArray);
}
setConfiguredProperty(propertyDescriptor, newInputColumns);
return (B) this;
}
@Override
public B addInputColumns(final Collection<? extends InputColumn<?>> inputColumns) {
final ConfiguredPropertyDescriptor propertyDescriptor = getDefaultConfiguredPropertyForInput();
addInputColumns(inputColumns, propertyDescriptor);
return (B) this;
}
@Override
public B addInputColumns(final InputColumn<?>... inputColumns) {
final List<InputColumn<?>> list = Arrays.asList(inputColumns);
final ConfiguredPropertyDescriptor propertyDescriptor = getDefaultConfiguredPropertyForInput();
addInputColumns(list, propertyDescriptor);
return (B) this;
}
@Override
public B removeInputColumn(final InputColumn<?> inputColumn) {
final Set<ConfiguredPropertyDescriptor> propertyDescriptors = getDescriptor().getConfiguredPropertiesForInput();
if (propertyDescriptors.size() == 1) {
final ConfiguredPropertyDescriptor propertyDescriptor = propertyDescriptors.iterator().next();
return removeInputColumn(inputColumn, propertyDescriptor);
} else {
throw new UnsupportedOperationException("There are " + propertyDescriptors.size()
+ " named input columns, please specify which one to configure");
}
}
@Override
public B removeInputColumn(final InputColumn<?> inputColumn,
final ConfiguredPropertyDescriptor propertyDescriptor) {
Object inputColumns = getConfiguredProperty(propertyDescriptor);
if (inputColumns != null) {
if (inputColumns == inputColumn) {
inputColumns = null;
} else {
if (inputColumns.getClass().isArray()) {
inputColumns = CollectionUtils.arrayRemove(inputColumns, inputColumn);
if (!propertyDescriptor.isArray() && Array.getLength(inputColumns) == 0) {
inputColumns = null;
}
}
}
setConfiguredProperty(propertyDescriptor, inputColumns);
propertyDescriptor.setValue(getComponentInstance(), inputColumns);
}
return (B) this;
}
public void setRequirement(final FilterComponentBuilder<?, ?> filterComponentBuilder, final String category) {
if (filterComponentBuilder == this) {
throw new IllegalArgumentException("Requirement source and sink cannot be the same");
}
final FilterOutcome filterOutcome = filterComponentBuilder.getFilterOutcome(category);
if (filterOutcome == null) {
throw new IllegalArgumentException("No such category found in available outcomes: " + category);
}
setRequirement(filterOutcome);
}
public void setRequirement(final FilterComponentBuilder<?, ?> filterComponentBuilder, final Enum<?> category) {
if (filterComponentBuilder == this) {
throw new IllegalArgumentException("Requirement source and sink cannot be the same");
}
final EnumSet<?> categories = filterComponentBuilder.getDescriptor().getOutcomeCategories();
if (!categories.contains(category)) {
throw new IllegalArgumentException("No such category found in available outcomes: " + category);
}
setRequirement(filterComponentBuilder.getFilterOutcome(category));
}
public void setRequirement(final FilterOutcome outcome) throws IllegalArgumentException {
if (!validateRequirementCandidate(outcome)) {
throw new IllegalArgumentException("Cyclic dependency detected when setting requirement: " + outcome);
}
if (outcome == null) {
setComponentRequirement(null);
} else if (outcome instanceof FilterOutcome) {
setComponentRequirement(new SimpleComponentRequirement((FilterOutcome) outcome));
} else {
throw new IllegalArgumentException(
"Unsupported outcome type (use ComponentRequirement instead): " + outcome);
}
}
public boolean validateRequirementSource(final HasFilterOutcomes outcomeSource) {
if (outcomeSource == null) {
return true;
}
final Collection<FilterOutcome> outcomes = outcomeSource.getFilterOutcomes();
if (outcomes == null || outcomes.isEmpty()) {
return true;
}
final FilterOutcome firstOutcome = outcomes.iterator().next();
return validateRequirementCandidate(firstOutcome);
}
public boolean validateRequirementCandidate(final ComponentRequirement requirement) {
if (requirement instanceof SimpleComponentRequirement) {
final SimpleComponentRequirement simpleComponentRequirement = (SimpleComponentRequirement) requirement;
final FilterOutcome outcome = simpleComponentRequirement.getOutcome();
return validateRequirementCandidate(outcome);
}
return true;
}
public boolean validateRequirementCandidate(final FilterOutcome requirement) {
if (requirement == null) {
return true;
}
final HasFilterOutcomes source = requirement.getSource();
if (source == this) {
return false;
}
if (source instanceof HasComponentRequirement) {
final ComponentRequirement componentRequirement =
((HasComponentRequirement) source).getComponentRequirement();
if (componentRequirement != null) {
final Collection<FilterOutcome> requirements = componentRequirement.getProcessingDependencies();
for (final FilterOutcome transitiveRequirement : requirements) {
final boolean transitiveValidation = validateRequirementCandidate(transitiveRequirement);
if (!transitiveValidation) {
return false;
}
}
}
}
return true;
}
public List<InputColumn<?>> getInputColumns() {
final List<InputColumn<?>> result = new LinkedList<>();
final Set<ConfiguredPropertyDescriptor> configuredPropertiesForInput =
getDescriptor().getConfiguredPropertiesForInput();
for (final ConfiguredPropertyDescriptor configuredProperty : configuredPropertiesForInput) {
final Object inputColumns = getConfiguredProperty(configuredProperty);
if (inputColumns != null) {
if (inputColumns.getClass().isArray()) {
final int length = Array.getLength(inputColumns);
for (int i = 0; i < length; i++) {
final InputColumn<?> column = (InputColumn<?>) Array.get(inputColumns, i);
if (column == null) {
logger.warn("Element no. {} in array (size {}) is null! Value read from {}",
new Object[] { i, length, configuredProperty });
} else {
result.add(column);
}
}
} else {
result.add((InputColumn<?>) inputColumns);
}
}
}
return Collections.unmodifiableList(result);
}
@Override
public ComponentRequirement getComponentRequirement() {
return _componentRequirement;
}
@Override
public void setComponentRequirement(final ComponentRequirement requirement) {
if (!EqualsBuilder.equals(_componentRequirement, requirement)) {
_componentRequirement = requirement;
onRequirementChanged();
}
}
@Override
public InputColumn<?>[] getInput() {
final List<InputColumn<?>> inputColumns = getInputColumns();
return inputColumns.toArray(new InputColumn[inputColumns.size()]);
}
/**
* Notification method invoked when this {@link ComponentBuilder} is
* removed.
*/
protected final void onRemoved() {
onRemovedInternal();
for (final ComponentRemovalListener<ComponentBuilder> removalListener : _removalListeners) {
removalListener.onRemove(this);
}
}
protected abstract void onRemovedInternal();
@Override
public void addRemovalListener(final ComponentRemovalListener<ComponentBuilder> componentRemovalListener) {
_removalListeners.add(componentRemovalListener);
}
@Override
public boolean removeRemovalListener(final ComponentRemovalListener<ComponentBuilder> componentRemovalListener) {
return _removalListeners.remove(componentRemovalListener);
}
protected Component getComponentInstanceForQuestioning() {
if (!isConfigured()) {
// as long as the component is not configured we cannot proceed
return null;
}
final Component component = getComponentInstance();
final D descriptor = getDescriptor();
final DataCleanerConfiguration configuration = getAnalysisJobBuilder().getConfiguration();
final InjectionManager injectionManager =
configuration.getEnvironment().getInjectionManagerFactory().getInjectionManager(configuration);
final LifeCycleHelper lifeCycleHelper = new LifeCycleHelper(injectionManager, false);
// mimic the configuration of a real component instance
final ComponentConfiguration beanConfiguration =
new ImmutableComponentConfiguration(getConfiguredPropertiesForQuestioning());
lifeCycleHelper.assignConfiguredProperties(descriptor, component, beanConfiguration);
lifeCycleHelper.assignProvidedProperties(descriptor, component);
try {
// only validate, don't initialize
lifeCycleHelper.validate(descriptor, component);
} catch (final RuntimeException e) {
return null;
}
return component;
}
protected Map<ConfiguredPropertyDescriptor, Object> getConfiguredPropertiesForQuestioning() {
return getConfiguredProperties();
}
@Override
public AnalysisJobBuilder getOutputDataStreamJobBuilder(final String outputDataStreamName) {
final OutputDataStream outputDataStream = getOutputDataStream(outputDataStreamName);
if (outputDataStream == null) {
throw new IllegalArgumentException("No such OutputDataStream: " + outputDataStreamName);
}
return getOutputDataStreamJobBuilder(outputDataStream);
}
@Override
public AnalysisJobBuilder getOutputDataStreamJobBuilder(final OutputDataStream outputDataStream) {
AnalysisJobBuilder analysisJobBuilder = _outputDataStreamJobs.get(outputDataStream);
if (analysisJobBuilder == null) {
assert _outputDataStreams.contains(outputDataStream);
final Table table = outputDataStream.getTable();
analysisJobBuilder = new AnalysisJobBuilder(_analysisJobBuilder.getConfiguration(), _analysisJobBuilder);
analysisJobBuilder.setDatastore(new OutputDataStreamDatastore(outputDataStream));
analysisJobBuilder.addSourceColumns(table.getColumns());
_outputDataStreamJobs.put(outputDataStream, analysisJobBuilder);
} else {
final List<MetaModelInputColumn> sourceColumns = analysisJobBuilder.getSourceColumns();
final String[] sourceColumnsNames = new String[sourceColumns.size()];
for (int i = 0; i < sourceColumns.size(); i++) {
sourceColumnsNames[i] = sourceColumns.get(i).getName();
}
// If the one of the components has had changed output columns names it won't be visible
// in the analysisJobBuilder's source columns represented by the outputStream.
// Therefore, we check if there are any changes in the name of the columns. see issue #1616(github).
final Table table = outputDataStream.getTable();
final String[] outputStreamColumnNames = table.getColumnNames();
if (!Arrays.equals(sourceColumnsNames, outputStreamColumnNames)) {
//avoid triggering listeners when the outputstream is consumed
if (!isOutputDataStreamConsumed(outputDataStream)) {
for (int i = 0; i < sourceColumns.size(); i++) {
analysisJobBuilder.removeSourceColumn(sourceColumns.get(i));
}
//Add the new source columns
final Column[] columns = table.getColumns();
analysisJobBuilder.addSourceColumns(columns);
}
}
}
return analysisJobBuilder;
}
@Override
public OutputDataStream getOutputDataStream(final Table dataStreamTable) {
if (dataStreamTable == null) {
return null;
}
final List<OutputDataStream> streams = getOutputDataStreams();
for (final OutputDataStream outputDataStream : streams) {
if (dataStreamTable.equals(outputDataStream.getTable())) {
return outputDataStream;
}
}
return null;
}
@Override
public OutputDataStream getOutputDataStream(final String name) {
if (name == null) {
return null;
}
final List<OutputDataStream> streams = getOutputDataStreams();
for (final OutputDataStream outputDataStream : streams) {
if (name.equals(outputDataStream.getName())) {
return outputDataStream;
}
}
return null;
}
@Override
public List<OutputDataStream> getOutputDataStreams() {
final Component component = getComponentInstanceForQuestioning();
if (component == null) {
// as long as the component is not configured, just return an
// empty list
return Collections.emptyList();
}
if (component instanceof HasOutputDataStreams) {
final OutputDataStream[] outputDataStreams = ((HasOutputDataStreams) component).getOutputDataStreams();
final List<OutputDataStream> newStreams = Arrays.asList(outputDataStreams);
if (!_outputDataStreams.equals(newStreams)) {
final List<String> newNames = CollectionUtils.map(newStreams, new HasNameMapper());
final List<String> existingNames = CollectionUtils.map(_outputDataStreams, new HasNameMapper());
if (!newNames.equals(existingNames)) {
_outputDataStreams.clear();
_outputDataStreamJobs.clear();
_outputDataStreams.addAll(newStreams);
} else {
// if the stream names are the same then it's better to see
// if we can incrementally update the existing streams
// instead of replacing it all
for (int i = 0; i < outputDataStreams.length; i++) {
final OutputDataStream existingStream = _outputDataStreams.get(i);
final Table table = existingStream.getTable();
final OutputDataStream newStream = newStreams.get(i);
if (table instanceof MutableTable) {
final MutableTable mutableTable = (MutableTable) table;
if (isOutputDataStreamConsumed(existingStream)) {
final AnalysisJobBuilder existingJobBuilder =
getOutputDataStreamJobBuilder(existingStream);
// update the table
updateStream(mutableTable, existingJobBuilder, newStream);
} else {
updateStream(mutableTable, null, newStream);
}
} else {
_outputDataStreams.set(i, newStream);
}
}
}
}
return new ArrayList<>(_outputDataStreams);
}
// component isn't capable of having output data streams
return Collections.emptyList();
}
private void updateStream(final MutableTable existingTable, final AnalysisJobBuilder jobBuilder,
final OutputDataStream newStream) {
final List<Column> newColumnList = new ArrayList<>();
final List<Column> addedColumns = new ArrayList<>();
final Table newTable = newStream.getTable();
int columnNumber = 0;
for (final Column newColumn : newTable.getColumns()) {
final Column existingColumn = existingTable.getColumnByName(newColumn.getName());
final MutableColumn mutableColumn;
if (existingColumn == null) {
mutableColumn = (MutableColumn) newColumn;
addedColumns.add(newColumn);
} else {
mutableColumn = (MutableColumn) existingColumn;
// remove this so that it cannot be matched against in next
// iterations
existingTable.removeColumn(existingColumn);
}
// update the column to make sure everything is 100% matching
mutableColumn.setTable(existingTable);
mutableColumn.setColumnNumber(columnNumber);
mutableColumn.setType(newColumn.getType());
newColumnList.add(mutableColumn);
columnNumber++;
}
if (jobBuilder != null) {
// notify job builder of removed source columns
for (final Column column : existingTable.getColumns()) {
jobBuilder.removeSourceColumn(column);
}
// notify the job builder of added source columns
for (final Column column : addedColumns) {
jobBuilder.addSourceColumn(column);
}
}
// update the table with the new set of columns
existingTable.setColumns(newColumnList);
}
@Override
public boolean isOutputDataStreamConsumed(final OutputDataStream outputDataStream) {
final AnalysisJobBuilder analysisJobBuilder = _outputDataStreamJobs.get(outputDataStream);
if (analysisJobBuilder == null) {
return false;
}
return analysisJobBuilder.getComponentCount() > 0;
}
@Override
public OutputDataStreamJob[] getOutputDataStreamJobs() {
final List<OutputDataStream> outputDataStreams = getOutputDataStreams();
if (outputDataStreams == null || outputDataStreams.isEmpty()) {
return new OutputDataStreamJob[0];
}
final List<OutputDataStreamJob> result = new ArrayList<>();
for (final OutputDataStream outputDataStream : outputDataStreams) {
if (isOutputDataStreamConsumed(outputDataStream)) {
result.add(
new LazyOutputDataStreamJob(outputDataStream, getOutputDataStreamJobBuilder(outputDataStream)));
}
}
return result.toArray(new OutputDataStreamJob[result.size()]);
}
}