/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.job;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.datacleaner.api.InputColumn;
import org.datacleaner.descriptors.ComponentDescriptor;
import org.datacleaner.descriptors.ConfiguredPropertyDescriptor;
import org.datacleaner.util.CollectionUtils2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Helper class that wraps a collection of {@link AnalyzerJob}s and provides
* richer functionality to traverse and search jobs for components and columns
* etc.
*/
public class AnalyzerJobHelper {
private static final Logger logger = LoggerFactory.getLogger(AnalyzerJobHelper.class);
private final Collection<AnalyzerJob> _jobs;
public AnalyzerJobHelper(final Collection<AnalyzerJob> jobs) {
_jobs = jobs;
}
public AnalyzerJobHelper(final AnalysisJob analysisJob) {
this(analysisJob.flattened().flatMap(analysisJob1 -> analysisJob1.getAnalyzerJobs().stream())
.collect(Collectors.toList()));
}
/**
* Gets the identifying input column of an {@link ComponentJob}, if there is
* such a column. With an identifying input column, a externalizable
* reference to the {@link ComponentJob} can be build, based on the
* descriptor name, component name and the identifying column.
*
* @param componentJob
* @return
*/
public static InputColumn<?> getIdentifyingInputColumn(final ComponentJob componentJob) {
final ComponentDescriptor<?> descriptor = componentJob.getDescriptor();
final Set<ConfiguredPropertyDescriptor> inputProperties = descriptor.getConfiguredPropertiesForInput(false);
if (inputProperties.size() != 1) {
return null;
}
final ConfiguredPropertyDescriptor inputProperty = inputProperties.iterator().next();
final Object input = componentJob.getConfiguration().getProperty(inputProperty);
if (input instanceof InputColumn) {
return (InputColumn<?>) input;
} else if (input instanceof InputColumn[]) {
final InputColumn<?>[] inputColumns = (InputColumn[]) input;
if (inputColumns.length != 1) {
return null;
}
return inputColumns[0];
}
return null;
}
public Collection<AnalyzerJob> getAnalyzerJobs() {
return _jobs;
}
/**
* Gets the "best candidate" to be the same (or a copy of) the analyzer job
* provided in parameter.
*
* @param analyzerJob
* @return
*/
public AnalyzerJob getAnalyzerJob(final AnalyzerJob analyzerJob) {
if (_jobs.contains(analyzerJob)) {
return analyzerJob;
}
final String analyzerInputName;
final InputColumn<?> inputColumn = getIdentifyingInputColumn(analyzerJob);
if (inputColumn == null) {
analyzerInputName = null;
} else {
analyzerInputName = inputColumn.getName();
}
return getAnalyzerJob(analyzerJob.getDescriptor().getDisplayName(), analyzerJob.getName(), analyzerInputName);
}
/**
* Gets the "best candidate" analyzer job based on search criteria offered
* in parameters.
*
* @param descriptorName
* @param analyzerName
* @param analyzerInputName
* @return
*/
public AnalyzerJob getAnalyzerJob(final String descriptorName, final String analyzerName,
final String analyzerInputName) {
List<AnalyzerJob> candidates = new ArrayList<>(_jobs);
// filter analyzers of the corresponding type
candidates = CollectionUtils2.refineCandidates(candidates, o -> {
final String actualDescriptorName = o.getDescriptor().getDisplayName();
return descriptorName.equals(actualDescriptorName);
});
if (analyzerName != null) {
// filter analyzers with a particular name
candidates = CollectionUtils2.refineCandidates(candidates, o -> {
final String actualAnalyzerName = o.getName();
return analyzerName.equals(actualAnalyzerName);
});
}
if (analyzerInputName != null) {
// filter analyzers with a particular input
candidates = CollectionUtils2.refineCandidates(candidates, o -> {
final InputColumn<?> inputColumn = getIdentifyingInputColumn(o);
if (inputColumn == null) {
return false;
}
return analyzerInputName.equals(inputColumn.getName());
});
}
if (candidates.isEmpty()) {
logger.error("No more AnalyzerJob candidates to choose from");
return null;
} else if (candidates.size() > 1) {
logger.warn("Multiple ({}) AnalyzerJob candidates to choose from, picking first");
}
return candidates.iterator().next();
}
}