/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.util; import java.util.Collection; import java.util.LinkedList; import java.util.List; import org.apache.metamodel.schema.Table; import org.datacleaner.api.InputColumn; import org.datacleaner.api.OutputDataStream; import org.datacleaner.components.maxrows.MaxRowsFilter; import org.datacleaner.job.AnalysisJob; import org.datacleaner.job.ComponentRequirement; import org.datacleaner.job.CompoundComponentRequirement; import org.datacleaner.job.HasFilterOutcomes; import org.datacleaner.job.SimpleComponentRequirement; import org.datacleaner.job.builder.AnalysisJobBuilder; import org.datacleaner.job.builder.AnalyzerComponentBuilder; import org.datacleaner.job.builder.ComponentBuilder; import org.datacleaner.job.builder.FilterComponentBuilder; import org.datacleaner.job.builder.TransformerComponentBuilder; public class PreviewUtils { public static final String METADATA_PROPERTY_MARKER = "org.datacleaner.preview.targetcomponent"; public static void limitJobRows(final AnalysisJobBuilder jobBuilder, final Collection<? extends ComponentBuilder> componentBuilders, final int previewRows) { { final SourceColumnFinder sourceColumnFinder = new SourceColumnFinder(); sourceColumnFinder.addSources(jobBuilder); final List<Table> sourceTables = jobBuilder.getSourceTables(); final int maxRows = Double.valueOf(Math.ceil(((double) previewRows) / sourceTables.size())).intValue(); for (final Table table : sourceTables) { final String filterName = PreviewUtils.class.getName() + "-" + table.getName() + "-MaxRows"; final FilterComponentBuilder<?, ?> maxRowFilter = jobBuilder.getFilterComponentBuilderByName(filterName).orElseGet(() -> { final FilterComponentBuilder<MaxRowsFilter, MaxRowsFilter.Category> filter = jobBuilder.addFilter(MaxRowsFilter.class); filter.setName(filterName); filter.getComponentInstance().setMaxRows(maxRows); filter.getComponentInstance().setApplyOrdering(false); filter.getComponentInstance() .setOrderColumn(jobBuilder.getSourceColumnsOfTable(table).get(0)); return filter; }); componentBuilders.stream().filter(cb -> cb != maxRowFilter).forEach(componentBuilder -> { final InputColumn<?>[] input = componentBuilder.getInput(); if (input.length > 0) { if (componentBuilder.getDescriptor().isMultiStreamComponent() || sourceColumnFinder.findOriginatingTable(input[0]) == table) { final ComponentRequirement existingRequirement = componentBuilder.getComponentRequirement(); if (existingRequirement != null) { if (componentBuilder.getDescriptor().isMultiStreamComponent()) { componentBuilder.setComponentRequirement( new CompoundComponentRequirement(existingRequirement, maxRowFilter.getFilterOutcome(MaxRowsFilter.Category.VALID))); } } else { componentBuilder.setComponentRequirement(new SimpleComponentRequirement( maxRowFilter.getFilterOutcome(MaxRowsFilter.Category.VALID))); } } } }); } } } public static AnalysisJobBuilder copy(final AnalysisJobBuilder original) { final AnalysisJob analysisJob = original.getRootJobBuilder().withoutListeners().toAnalysisJob(false); return new AnalysisJobBuilder(original.getConfiguration(), analysisJob); } public static void sanitizeIrrelevantComponents(final AnalysisJobBuilder ajb, final TransformerComponentBuilder<?> tjb) { final List<AnalysisJobBuilder> relevantAnalysisJobBuilders = createRelevantAnalysisJobBuildersList(ajb); for (final AnalysisJobBuilder relevantAnalysisJobBuilder : relevantAnalysisJobBuilders) { final Collection<ComponentBuilder> componentBuilders = relevantAnalysisJobBuilder.getComponentBuilders(); for (final ComponentBuilder componentBuilder : componentBuilders) { // flag to indicate if this component is directly involved in // populating data for the previewed component boolean importantComponent = componentBuilder == tjb; final List<OutputDataStream> streams = componentBuilder.getOutputDataStreams(); for (final OutputDataStream stream : streams) { if (componentBuilder.isOutputDataStreamConsumed(stream)) { final AnalysisJobBuilder childJobBuilder = componentBuilder.getOutputDataStreamJobBuilder(stream); if (relevantAnalysisJobBuilders.contains(childJobBuilder)) { importantComponent = true; } else { // remove irrelevant output data stream job builder childJobBuilder.removeAllComponents(); } } } if (!importantComponent && componentBuilder instanceof AnalyzerComponentBuilder) { // remove analyzers because they are generally more // heavy-weight and they produce no dependencies for other // components relevantAnalysisJobBuilder.removeComponent(componentBuilder); } if (!importantComponent) { // remove the components that are not configured. if (!componentBuilder.isConfigured(false)) { relevantAnalysisJobBuilder.removeComponent(componentBuilder); } } } } } /** * Creates a list with _just_ the relevant {@link AnalysisJobBuilder}s to * include in the preview job * * @param ajb * @return */ private static List<AnalysisJobBuilder> createRelevantAnalysisJobBuildersList(AnalysisJobBuilder ajb) { final List<AnalysisJobBuilder> relevantAnalysisJobBuilders = new LinkedList<>(); relevantAnalysisJobBuilders.add(ajb); while (!ajb.isRootJobBuilder()) { ajb = ajb.getParentJobBuilder(); } return relevantAnalysisJobBuilders; } public static AnalysisJobBuilder findAnalysisJobBuilder(final AnalysisJobBuilder analysisJobBuilder, final String jobBuilderIdentifier) { if (jobBuilderIdentifier .equals(analysisJobBuilder.getAnalysisJobMetadata().getProperties().get(METADATA_PROPERTY_MARKER))) { return analysisJobBuilder; } final List<AnalysisJobBuilder> childJobBuilders = analysisJobBuilder.getConsumedOutputDataStreamsJobBuilders(); for (final AnalysisJobBuilder childJobBuilder : childJobBuilders) { final AnalysisJobBuilder result = findAnalysisJobBuilder(childJobBuilder, jobBuilderIdentifier); if (result != null) { return result; } } return null; } public static boolean hasFilterPresent(final SourceColumnFinder scf, final ComponentBuilder acb) { return scf.findAllSourceJobs(acb).stream().filter(o -> o instanceof HasFilterOutcomes).findAny().isPresent(); } }