// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dq.analysis;
import java.io.File;
import java.lang.management.ManagementFactory;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.eclipse.core.runtime.IProgressMonitor;
import org.eclipse.core.runtime.Platform;
import org.eclipse.emf.common.util.EList;
import org.talend.commons.exception.BusinessException;
import org.talend.commons.utils.platform.PluginChecker;
import org.talend.core.ITDQRepositoryService;
import org.talend.core.model.metadata.builder.connection.MetadataColumn;
import org.talend.cwm.db.connection.DatabaseSQLExecutor;
import org.talend.cwm.db.connection.DelimitedFileSQLExecutor;
import org.talend.cwm.db.connection.ISQLExecutor;
import org.talend.cwm.db.connection.SQLExecutor;
import org.talend.cwm.helper.TaggedValueHelper;
import org.talend.cwm.management.i18n.Messages;
import org.talend.cwm.relational.TdColumn;
import org.talend.dataquality.analysis.Analysis;
import org.talend.dataquality.analysis.ExecutionInformations;
import org.talend.dataquality.indicators.Indicator;
import org.talend.dataquality.indicators.columnset.BlockKeyIndicator;
import org.talend.dataquality.indicators.columnset.RecordMatchingIndicator;
import org.talend.dataquality.matchmerge.Record;
import org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer;
import org.talend.dq.analysis.match.BlockAndMatchManager;
import org.talend.dq.analysis.memory.AnalysisThreadMemoryChangeNotifier;
import org.talend.dq.helper.AnalysisExecutorHelper;
import org.talend.dq.helper.StoreOnDiskUtils;
import org.talend.dq.indicators.Evaluator;
import org.talend.utils.sugars.ReturnCode;
import org.talend.utils.sugars.TypedReturnCode;
import orgomg.cwm.objectmodel.core.ModelElement;
/**
* execute the match analysis
*
*/
public class MatchAnalysisExecutor implements IAnalysisExecutor {
private static Logger log = Logger.getLogger(MatchAnalysisExecutor.class);
private long usedMemory;
private volatile boolean isLowMemory = false;
private long checkContinueCount = 0L;
private boolean keepRunning = true;
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.IAnalysisExecutor#execute(org.talend.dataquality.analysis.Analysis)
*/
public ReturnCode execute(Analysis analysis) {
assert analysis != null;
// --- preconditions
ReturnCode rc = AnalysisExecutorHelper.check(analysis);
if (!rc.isOk()) {
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
// --- creation time
final long startime = AnalysisExecutorHelper.setExecutionDateInAnalysisResult(analysis);
EList<Indicator> indicators = analysis.getResults().getIndicators();
RecordMatchingIndicator recordMatchingIndicator = null;
BlockKeyIndicator blockKeyIndicator = null;
for (Indicator ind : indicators) {
if (ind instanceof RecordMatchingIndicator) {
recordMatchingIndicator = (RecordMatchingIndicator) ind;
} else if (ind instanceof BlockKeyIndicator) {
blockKeyIndicator = (BlockKeyIndicator) ind;
}
}
if (recordMatchingIndicator == null || blockKeyIndicator == null) {
rc.setOk(Boolean.FALSE);
rc.setMessage(Messages.getString("MatchAnalysisExecutor.noIndicators")); //$NON-NLS-1$
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
List<ModelElement> anlayzedElements = analysis.getContext().getAnalysedElements();
if (anlayzedElements == null || anlayzedElements.size() == 0) {
rc.setOk(Boolean.FALSE);
rc.setMessage(Messages.getString("MatchAnalysisExecutor.EmptyAnalyzedElement")); //$NON-NLS-1$
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
// TDQ-9664 msjian: check the store on disk path.
Boolean isStoreOnDisk = TaggedValueHelper.getValueBoolean(SQLExecutor.STORE_ON_DISK_KEY, analysis);
if (isStoreOnDisk) {
String tempDataPath = TaggedValueHelper.getValueString(SQLExecutor.TEMP_DATA_DIR, analysis);
File file = new File(tempDataPath);
if (!file.exists() || !file.isDirectory()) {
rc.setOk(Boolean.FALSE);
rc.setMessage(Messages.getString("MatchAnalysisExecutor.InvalidPath", file.getPath())); //$NON-NLS-1$
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
}
// TDQ-9664~
Map<MetadataColumn, String> columnMap = getColumn2IndexMap(anlayzedElements);
ISQLExecutor sqlExecutor = getSQLExectutor(analysis, recordMatchingIndicator, columnMap);
if (sqlExecutor == null) {
rc.setOk(Boolean.FALSE);
rc.setMessage(Messages.getString("MatchAnalysisExecutor.noSqlExecutor")); //$NON-NLS-1$
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
// Set schema for match key.
TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>();
MetadataColumn[] completeColumnSchema = AnalysisRecordGroupingUtils.getCompleteColumnSchema(columnMap);
String[] colSchemaString = new String[completeColumnSchema.length];
int idx = 0;
for (MetadataColumn metadataCol : completeColumnSchema) {
colSchemaString[idx++] = metadataCol.getName();
}
recordMatchingIndicator.setMatchRowSchema(colSchemaString);
recordMatchingIndicator.reset();
MatchGroupResultConsumer matchResultConsumer = createMatchGroupResultConsumer(recordMatchingIndicator);
if (sqlExecutor.isStoreOnDisk()) {
// need to execute the query
try {
sqlExecutor.executeQuery(analysis.getContext().getConnection(), analysis.getContext().getAnalysedElements());
} catch (SQLException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
try {
TypedReturnCode<Object> result = StoreOnDiskUtils.getDefault().executeWithStoreOnDisk(columnMap,
recordMatchingIndicator, blockKeyIndicator, sqlExecutor.getStoreOnDiskHandler(), matchResultConsumer);
if (result != null) {
returnCode.setObject((MatchGroupResultConsumer) result.getObject());
returnCode.setOk(result.isOk());
returnCode.setMessage(result.getMessage());
}
} catch (Exception e) {
log.error(e, e);
returnCode.setMessage(e.getMessage());
returnCode.setOk(false);
}
} else {
// Added TDQ-9320 , use the result set iterator to replace the list of result in the memory.
try {
Iterator<Record> resultSetIterator = sqlExecutor.getResultSetIterator(analysis.getContext().getConnection(),
anlayzedElements);
BlockAndMatchManager bAndmManager = new BlockAndMatchManager(resultSetIterator, matchResultConsumer, columnMap,
recordMatchingIndicator, blockKeyIndicator);
bAndmManager.run();
} catch (SQLException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
} catch (BusinessException e) {
log.error(e, e);
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getMessage());
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
return rc;
}
}
if (!returnCode.isOk()) {
rc.setOk(returnCode.isOk());
rc.setMessage(returnCode.getMessage());
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
if (isLowMemory) {
rc.setMessage(Messages.getString("Evaluator.OutOfMomory", usedMemory));//$NON-NLS-1$
}
// nodify the master page
refreshTableWithMatchFullResult(analysis);
// --- set metadata information of analysis
AnalysisExecutorHelper.setExecutionInfoInAnalysisResult(analysis, rc.isOk(), rc.getMessage());
// --- compute execution duration
if (this.continueRun()) {
long endtime = System.currentTimeMillis();
final ExecutionInformations resultMetadata = analysis.getResults().getResultMetadata();
resultMetadata.setExecutionDuration((int) (endtime - startime));
resultMetadata.setOutThreshold(false);
}
if (getMonitor() != null) {
getMonitor().worked(20);
}
return rc;
}
private MatchGroupResultConsumer createMatchGroupResultConsumer(final RecordMatchingIndicator recordMatchingIndicator) {
MatchGroupResultConsumer matchResultConsumer = new MatchGroupResultConsumer(false) {
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.record.linkage.grouping. MatchGroupResultConsumer#handle(java.lang.Object)
*/
@Override
public void handle(Object row) {
recordMatchingIndicator.handle(row);
}
};
return matchResultConsumer;
}
/**
* refresh Table With Match Full Result .
*
* @param analysis
*
* @param matchResultConsumer
*/
private void refreshTableWithMatchFullResult(Analysis analysis) {
ITDQRepositoryService tdqRepService = AnalysisExecutorHelper.getTDQService();
if (tdqRepService != null) {
tdqRepService.refreshTableWithResult(analysis, null);
}
}
/**
* get Column2Index Map".
*
* @param anlayzedElements
* @return
*/
private Map<MetadataColumn, String> getColumn2IndexMap(List<ModelElement> anlayzedElements) {
Map<MetadataColumn, String> columnMap = new HashMap<MetadataColumn, String>();
int index = 0;
for (ModelElement column : anlayzedElements) {
columnMap.put((MetadataColumn) column, String.valueOf(index++));
}
return columnMap;
}
/**
* getSQLExectutor .
*
* @param modelElement
* @return
*/
private ISQLExecutor getSQLExectutor(Analysis analysis, RecordMatchingIndicator recordMatchingIndicator,
Map<MetadataColumn, String> columnMap) {
ModelElement modelElement = analysis.getContext().getAnalysedElements().get(0);
ISQLExecutor sqlExecutor = null;
if (modelElement instanceof TdColumn) {
sqlExecutor = new DatabaseSQLExecutor();
} else if (modelElement instanceof MetadataColumn) {
sqlExecutor = new DelimitedFileSQLExecutor();
}
// Tune on store on disk option when needed.
Boolean isStoreOnDisk = PluginChecker.isTDQLoaded() ? TaggedValueHelper.getValueBoolean(SQLExecutor.STORE_ON_DISK_KEY,
analysis) : Boolean.FALSE;
if (sqlExecutor != null && isStoreOnDisk) {
sqlExecutor.setStoreOnDisk(Boolean.TRUE);
sqlExecutor.initStoreOnDiskHandler(analysis, recordMatchingIndicator, columnMap);
}
return sqlExecutor;
}
// FIXME this method is the same as Evaluator.continueRun(). Factorize code.
protected boolean continueRun() {
// MOD scorreia 2013-09-10 avoid checking for each analyzed row. Check only every 1000 rows
checkContinueCount++;
if (checkContinueCount % Evaluator.CHECK_EVERY_N_COUNT != 0) {
return keepRunning;
}
if (!Platform.isRunning()) { // Reporting engine is working as library
return true;
}
if (getMonitor() != null && getMonitor().isCanceled()) {
keepRunning = false;
} else if (this.isLowMemory) {
keepRunning = false;
} else if (AnalysisThreadMemoryChangeNotifier.getInstance().isUsageThresholdExceeded()) {
this.usedMemory = AnalysisThreadMemoryChangeNotifier.convertToMB(ManagementFactory.getMemoryMXBean()
.getHeapMemoryUsage().getUsed());
this.isLowMemory = true;
keepRunning = false;
}
return keepRunning;
}
private IProgressMonitor monitor;
public IProgressMonitor getMonitor() {
return monitor;
}
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.IAnalysisExecutor#setMonitor(org.eclipse.core.runtime.IProgressMonitor)
*/
public void setMonitor(IProgressMonitor monitor) {
this.monitor = monitor;
}
}