/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.job.builder; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import org.datacleaner.api.Component; import org.datacleaner.api.HideOutputColumns; import org.datacleaner.api.InputColumn; import org.datacleaner.api.OutputColumns; import org.datacleaner.api.Transformer; import org.datacleaner.configuration.RemoteServerState; import org.datacleaner.data.MutableInputColumn; import org.datacleaner.data.TransformedInputColumn; import org.datacleaner.descriptors.ComponentDescriptor; import org.datacleaner.descriptors.RemoteTransformerDescriptor; import org.datacleaner.descriptors.TransformerDescriptor; import org.datacleaner.job.AnalysisJobImmutabilizer; import org.datacleaner.job.ComponentRequirement; import org.datacleaner.job.HasComponentRequirement; import org.datacleaner.job.IdGenerator; import org.datacleaner.job.ImmutableComponentConfiguration; import org.datacleaner.job.ImmutableTransformerJob; import org.datacleaner.job.InputColumnSinkJob; import org.datacleaner.job.InputColumnSourceJob; import org.datacleaner.job.OutputDataStreamJob; import org.datacleaner.job.TransformerJob; import org.datacleaner.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A {@link ComponentBuilder} for {@link Transformer}s * * @param <T> * the type of {@link Transformer} being built */ public final class TransformerComponentBuilder<T extends Transformer> extends AbstractComponentBuilder<TransformerDescriptor<T>, T, TransformerComponentBuilder<T>> implements InputColumnSourceJob, InputColumnSinkJob, HasComponentRequirement { private static final Logger logger = LoggerFactory.getLogger(TransformerComponentBuilder.class); private final String _id; private final List<MutableInputColumn<?>> _outputColumns = new ArrayList<>(); private final List<String> _automaticOutputColumnNames = new ArrayList<>(); private final IdGenerator _idGenerator; private final List<TransformerChangeListener> _localChangeListeners; public TransformerComponentBuilder(final AnalysisJobBuilder analysisJobBuilder, final TransformerDescriptor<T> descriptor, final IdGenerator idGenerator) { super(analysisJobBuilder, descriptor, TransformerComponentBuilder.class); _id = "trans-" + idGenerator.nextId(); _idGenerator = idGenerator; _localChangeListeners = new ArrayList<>(0); } /** * Gets the output column of this transformation with it's current * configuration. * * @return transformer's output columns */ public List<MutableInputColumn<?>> getOutputColumns() { final Component component = getComponentInstanceForQuestioning(); if (component == null) { // as long as the transformer is not configured, just return an // empty list return Collections.emptyList(); } final Transformer transformer = (Transformer) component; final ComponentDescriptor<?> componentDescriptor = getDescriptor(); if (componentDescriptor instanceof RemoteTransformerDescriptor) { final RemoteServerState state = ((RemoteTransformerDescriptor<?>) componentDescriptor).getRemoteDescriptorProvider() .getServerState(); if (state.getActualState() == RemoteServerState.State.ERROR) { logger.warn("Output columns for transformer '" + transformer + "' can not be retrieved because the remote server is down. "); return Collections.emptyList(); } } final OutputColumns outputColumns; try { outputColumns = transformer.getOutputColumns(); } catch (final Exception e) { logger.error("getOutputColumn() threw unexpected exception on transformer: " + transformer, e); return Collections.emptyList(); } if (outputColumns == null) { logger.error("getOutputColumns() returned null on transformer: " + transformer); return Collections.emptyList(); } boolean changed = false; // adjust the amount of output columns final int expectedCols = outputColumns.getColumnCount(); final int existingCols = _outputColumns.size(); if (expectedCols != existingCols) { changed = true; final int colDiff = expectedCols - existingCols; if (colDiff > 0) { for (int i = 0; i < colDiff; i++) { final int nextIndex = _outputColumns.size(); final String name = getColumnName(outputColumns, nextIndex); final String id = _id + "-" + _idGenerator.nextId(); final TransformedInputColumn<Object> column = new TransformedInputColumn<>(name, id); /* * If the transformer has the annotation * "HideOutputColumns", the output columns are hidden in * other components as input columns. If the user wants to * use them, he can select them in the transformer. */ final HideOutputColumns hideOutputColumnsAnnotation = this.getDescriptor().getAnnotation(HideOutputColumns.class); if (hideOutputColumnsAnnotation != null && hideOutputColumnsAnnotation.isHidden()) { column.setHidden(true); } _outputColumns.add(column); _automaticOutputColumnNames.add(name); } } else if (colDiff < 0) { for (int i = 0; i < Math.abs(colDiff); i++) { // remove from the tail _outputColumns.remove(_outputColumns.size() - 1); _automaticOutputColumnNames.remove(_automaticOutputColumnNames.size() - 1); } } // reset the names when the number of output columns change and the // initial name has changed for (int i = 0; i < expectedCols; i++) { final MutableInputColumn<?> column = _outputColumns.get(i); final String previousProposedName = column.getInitialName(); final String newProposedName = outputColumns.getColumnName(i); if (newProposedName != null && !newProposedName.equals(previousProposedName)) { column.setName(newProposedName); } } } // automatically update names and types of columns if they have not been // manually set for (int i = 0; i < expectedCols; i++) { final String proposedName = getColumnName(outputColumns, i); final Class<?> dataType = outputColumns.getColumnType(i); final TransformedInputColumn<?> col = (TransformedInputColumn<?>) _outputColumns.get(i); col.setInitialName(proposedName); if (dataType != col.getDataType()) { col.setDataType(dataType); changed = true; } final String automaticName = _automaticOutputColumnNames.get(i); final String columnName = col.getName(); if (StringUtils.isNullOrEmpty(columnName) || automaticName.equals(columnName)) { if (proposedName != null) { col.setName(proposedName); _automaticOutputColumnNames.set(i, proposedName); } } } if (changed) { // notify listeners onOutputChanged(); } return Collections.unmodifiableList(_outputColumns); } private String getColumnName(final OutputColumns outputColumns, final int index) { String name = outputColumns.getColumnName(index); if (name == null) { name = getDescriptor().getDisplayName() + " (" + (index + 1) + ")"; } return name; } public void onOutputChanged() { // notify listeners final List<TransformerChangeListener> listeners = getAllListeners(); for (final TransformerChangeListener listener : listeners) { listener.onOutputChanged(this, _outputColumns); } } public TransformerJob toTransformerJob() throws IllegalStateException { return toTransformerJob(true); } public TransformerJob toTransformerJob(final AnalysisJobImmutabilizer immutabilizer) throws IllegalStateException { return toTransformerJob(true, immutabilizer); } public TransformerJob toTransformerJob(final boolean validate) { return toTransformerJob(validate, new AnalysisJobImmutabilizer()); } public TransformerJob toTransformerJob(final boolean validate, final AnalysisJobImmutabilizer immutabilizer) { if (validate && !isConfigured(true)) { throw new IllegalStateException("Transformer job is not correctly configured"); } final ComponentRequirement componentRequirement = immutabilizer.load(getComponentRequirement()); final OutputDataStreamJob[] outputDataStreamJobs = immutabilizer.load(getOutputDataStreamJobs(), validate); return new ImmutableTransformerJob(getName(), getDescriptor(), new ImmutableComponentConfiguration(getConfiguredProperties()), getOutputColumns(), componentRequirement, getMetadataProperties(), outputDataStreamJobs); } @Override public String toString() { return "TransformerComponentBuilder[transformer=" + getDescriptor().getDisplayName() + ",inputColumns=" + getInputColumns() + "]"; } /** * Builds a temporary list of all listeners, both global and local * * @return a list of global and local listeners */ private List<TransformerChangeListener> getAllListeners() { @SuppressWarnings("deprecation") final List<TransformerChangeListener> globalChangeListeners = getAnalysisJobBuilder().getTransformerChangeListeners(); final List<TransformerChangeListener> list = new ArrayList<>(globalChangeListeners.size() + _localChangeListeners.size()); list.addAll(globalChangeListeners); list.addAll(_localChangeListeners); return list; } /** * Gets an output column by name. * * @see #getOutputColumns() * * @param name * name of the output column * @return output column */ public MutableInputColumn<?> getOutputColumnByName(final String name) { if (StringUtils.isNullOrEmpty(name)) { return null; } final List<MutableInputColumn<?>> outputColumns = getOutputColumns(); for (final MutableInputColumn<?> inputColumn : outputColumns) { if (name.equals(inputColumn.getName())) { return inputColumn; } } return null; } @Override public void onConfigurationChanged() { super.onConfigurationChanged(); // trigger getOutputColumns which will notify consumers in the case of // output changes if (isConfigured()) { getOutputColumns(); } final List<TransformerChangeListener> listeners = getAllListeners(); for (final TransformerChangeListener listener : listeners) { listener.onConfigurationChanged(this); } } @Override public void onRequirementChanged() { super.onRequirementChanged(); final List<TransformerChangeListener> listeners = getAllListeners(); for (final TransformerChangeListener listener : listeners) { listener.onRequirementChanged(this); } } @Override public InputColumn<?>[] getInput() { return getInputColumns().toArray(new InputColumn<?>[0]); } @Override public MutableInputColumn<?>[] getOutput() { return getOutputColumns().toArray(new MutableInputColumn<?>[0]); } /** * Notification method invoked when transformer is removed. */ @Override protected void onRemovedInternal() { final List<TransformerChangeListener> listeners = getAllListeners(); for (final TransformerChangeListener listener : listeners) { listener.onOutputChanged(this, new LinkedList<>()); listener.onRemove(this); } } /** * Adds a change listener to this component * * @param listener * a new change listener */ public void addChangeListener(final TransformerChangeListener listener) { if (!_localChangeListeners.contains(listener)) { _localChangeListeners.add(listener); } } /** * Removes a change listener from this component * * @param listener * the change listener to be removed * @return whether or not the listener was found and removed. */ public boolean removeChangeListener(final TransformerChangeListener listener) { return _localChangeListeners.remove(listener); } }