// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dq.analysis.match;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.lang3.StringUtils;
import org.talend.commons.exception.BusinessException;
import org.talend.core.model.metadata.builder.connection.MetadataColumn;
import org.talend.dataquality.indicators.columnset.BlockKeyIndicator;
import org.talend.dataquality.indicators.columnset.RecordMatchingIndicator;
import org.talend.dataquality.record.linkage.constant.RecordMatcherType;
import org.talend.dataquality.record.linkage.genkey.BlockingKeyHandler;
import org.talend.dataquality.record.linkage.grouping.AnalysisMatchRecordGrouping;
import org.talend.dataquality.record.linkage.grouping.MatchGroupResultConsumer;
import org.talend.dataquality.record.linkage.grouping.swoosh.AnalysisSwooshMatchRecordGrouping;
import org.talend.dq.analysis.AnalysisRecordGroupingUtils;
import org.talend.utils.sugars.ReturnCode;
import org.talend.utils.sugars.TypedReturnCode;
/**
* created by zshen on Sep 16, 2013 Detailled comment
* Used for the "chart" button in the analysis.
*
*/
public class ExecuteMatchRuleHandler {
public TypedReturnCode<MatchGroupResultConsumer> execute(Map<MetadataColumn, String> columnMap,
RecordMatchingIndicator recordMatchingIndicator, List<Object[]> matchRows, BlockKeyIndicator blockKeyIndicator,
MatchGroupResultConsumer matchResultConsumer) {
TypedReturnCode<MatchGroupResultConsumer> returnCode = new TypedReturnCode<MatchGroupResultConsumer>(false);
returnCode.setObject(matchResultConsumer);
// By default for analysis, the applied blocking key will be the key
// from key generation definition. This
// will be refined when there is a need to define the applied blocking
// key manually by user later.
AnalysisRecordGroupingUtils.createAppliedBlockKeyByGenKey(recordMatchingIndicator);
ReturnCode computeMatchGroupReturnCode = null;
// Blocking key specified.
computeMatchGroupReturnCode = computeMatchGroupWithBlockKey(recordMatchingIndicator, blockKeyIndicator, columnMap,
matchResultConsumer, matchRows);
returnCode.setOk(computeMatchGroupReturnCode.isOk());
returnCode.setMessage(computeMatchGroupReturnCode.getMessage());
return returnCode;
}
/**
* DOC zhao Comment method "computeMatchGroupWithBlockKey".
*
* @param recordMatchingIndicator
* @param blockKeyIndicator
* @param columnMap
* @param matchResultConsumer
* @param matchRows
*/
private ReturnCode computeMatchGroupWithBlockKey(RecordMatchingIndicator recordMatchingIndicator,
BlockKeyIndicator blockKeyIndicator, Map<MetadataColumn, String> columnMap,
MatchGroupResultConsumer matchResultConsumer, List<Object[]> matchRows) {
ReturnCode rc = new ReturnCode(Boolean.TRUE);
Map<String, List<String[]>> resultWithBlockKey = computeBlockingKey(columnMap, matchRows, recordMatchingIndicator);
Iterator<String> keyIterator = resultWithBlockKey.keySet().iterator();
TreeMap<Object, Long> blockSize2Freq = new TreeMap<Object, Long>();
while (keyIterator.hasNext()) {
// Match group with in each block
List<String[]> matchRowsInBlock = resultWithBlockKey.get(keyIterator.next());
List<Object[]> objList = new ArrayList<Object[]>();
objList.addAll(matchRowsInBlock);
// Add check match key
try {
computeMatchGroupResult(columnMap, matchResultConsumer, objList, recordMatchingIndicator);
} catch (BusinessException e) {
rc.setOk(Boolean.FALSE);
rc.setMessage(e.getAdditonalMessage());
return rc;
}
// Store indicator
Integer blockSize = matchRowsInBlock.size();
if (blockSize == null) { // should not happen
blockSize = 0;
}
Long freq = blockSize2Freq.get(Long.valueOf(blockSize));
if (freq == null) {
freq = 0l;
}
blockSize2Freq.put(Long.valueOf(blockSize), freq + 1);
}
blockKeyIndicator.setBlockSize2frequency(blockSize2Freq);
return rc;
}
private Map<String, List<String[]>> computeBlockingKey(Map<MetadataColumn, String> columnMap, List<Object[]> matchRows,
RecordMatchingIndicator recordMatchingIndicator) {
List<Map<String, String>> blockKeySchema = AnalysisRecordGroupingUtils.getBlockKeySchema(recordMatchingIndicator);
Map<String, String> colName2IndexMap = new HashMap<String, String>();
for (MetadataColumn metaCol : columnMap.keySet()) {
if(metaCol.getName()==null){
colName2IndexMap.put(metaCol.getLabel(), columnMap.get(metaCol));
}else{
colName2IndexMap.put(metaCol.getName(), columnMap.get(metaCol));
}
}
BlockingKeyHandler blockKeyHandler = new BlockingKeyHandler(blockKeySchema, colName2IndexMap);
blockKeyHandler.setInputData(matchRows);
blockKeyHandler.run();
Map<String, List<String[]>> resultData = blockKeyHandler.getResultDatas();
return resultData;
}
/**
* DOC zhao Comment method "computeMatchGroupResult".
*
* @param columnMap
* @param matchResultConsumer
* @param matchRows
* @return
*/
private void computeMatchGroupResult(Map<MetadataColumn, String> columnMap, MatchGroupResultConsumer matchResultConsumer,
List<Object[]> matchRows, RecordMatchingIndicator recordMatchingIndicator) throws BusinessException {
boolean isOpenWarningDialog = false;
AnalysisMatchRecordGrouping analysisMatchRecordGrouping = null;
if (recordMatchingIndicator.getBuiltInMatchRuleDefinition().getRecordLinkageAlgorithm()
.equals(RecordMatcherType.T_SwooshAlgorithm.name())) {
analysisMatchRecordGrouping = new AnalysisSwooshMatchRecordGrouping(matchResultConsumer);
} else {
analysisMatchRecordGrouping = new AnalysisMatchRecordGrouping(matchResultConsumer);
}
AnalysisRecordGroupingUtils.setRuleMatcher(columnMap, recordMatchingIndicator, analysisMatchRecordGrouping);
analysisMatchRecordGrouping.setMatchRows(matchRows);
try {
AnalysisRecordGroupingUtils.initialMatchGrouping(columnMap, recordMatchingIndicator, analysisMatchRecordGrouping);
analysisMatchRecordGrouping.run();
} catch (InstantiationException e1) {
isOpenWarningDialog = true;
} catch (IllegalAccessException e1) {
isOpenWarningDialog = true;
} catch (ClassNotFoundException e1) {
isOpenWarningDialog = true;
} finally {
if (isOpenWarningDialog) {
BusinessException businessException = new BusinessException();
throw businessException;
}
}
}
}