// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.record.linkage.ui.composite.utils;
import java.sql.Types;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.eclipse.emf.common.util.EList;
import org.talend.core.GlobalServiceRegister;
import org.talend.core.ITDQRepositoryService;
import org.talend.core.model.metadata.types.JavaTypesManager;
import org.talend.cwm.relational.TdColumn;
import org.talend.dataquality.PluginConstant;
import org.talend.dataquality.analysis.Analysis;
import org.talend.dataquality.indicators.Indicator;
import org.talend.dataquality.indicators.columnset.BlockKeyIndicator;
import org.talend.dataquality.indicators.columnset.ColumnsetPackage;
import org.talend.dataquality.indicators.columnset.RecordMatchingIndicator;
import org.talend.dataquality.record.linkage.constant.AttributeMatcherType;
import org.talend.dataquality.record.linkage.constant.TokenizedResolutionMethod;
import org.talend.dataquality.record.linkage.ui.composite.table.ISortComparator;
import org.talend.dataquality.record.linkage.ui.composite.table.SortComparator;
import org.talend.dataquality.record.linkage.ui.composite.table.SortState;
import org.talend.dataquality.record.linkage.utils.DefaultSurvivorShipDataTypeEnum;
import org.talend.dataquality.record.linkage.utils.HandleNullEnum;
import org.talend.dataquality.record.linkage.utils.MatchAnalysisConstant;
import org.talend.dataquality.record.linkage.utils.SurvivorShipAlgorithmEnum;
import org.talend.dataquality.rules.AlgorithmDefinition;
import org.talend.dataquality.rules.BlockKeyDefinition;
import org.talend.dataquality.rules.KeyDefinition;
import org.talend.dataquality.rules.MatchKeyDefinition;
import org.talend.dataquality.rules.MatchRule;
import org.talend.dataquality.rules.RulesFactory;
import org.talend.dq.analysis.AnalysisRecordGroupingUtils;
import orgomg.cwm.objectmodel.core.ModelElement;
/**
* created by zshen on Aug 6, 2013 Detailled comment
*
*/
public class MatchRuleAnlaysisUtils {
public static List<String> getColumnFromRuleMatcher(MatchRule ruleMatcher) {
List<String> returnList = new ArrayList<String>();
return returnList;
}
public static List<String> getColumnFromBlockKey(BlockKeyDefinition blockKeyDefinition) {
List<String> returnList = new ArrayList<String>();
return returnList;
}
/**
* DOC zshen Comment method "createDefaultRow".
*
* @param columnName
* @return
*/
public static MatchKeyDefinition createDefaultMatchRow(String columnName) {
MatchKeyDefinition createMatchKeyDefinition1 = RulesFactory.eINSTANCE.createMatchKeyDefinition();
AlgorithmDefinition createAlgorithmDefinition1 = RulesFactory.eINSTANCE.createAlgorithmDefinition();
// by default the name of the match attribute rule is the name of the selected column
createMatchKeyDefinition1.setName(columnName);
createMatchKeyDefinition1.setColumn(columnName);
createMatchKeyDefinition1.setConfidenceWeight(1);
createMatchKeyDefinition1.setHandleNull(HandleNullEnum.NULL_MATCH_NULL.getValue());
createMatchKeyDefinition1.setTokenizationType(TokenizedResolutionMethod.NO.getComponentValue());
createAlgorithmDefinition1.setAlgorithmParameters(StringUtils.EMPTY);
createAlgorithmDefinition1.setAlgorithmType(AttributeMatcherType.values()[0].name());
createMatchKeyDefinition1.setAlgorithm(createAlgorithmDefinition1);
createMatchKeyDefinition1.setThreshold(1.0);
return createMatchKeyDefinition1;
}
public static List<MatchRule> convertDataMapToRuleMatcher(Map<String, String> columnMap) {
List<MatchRule> matcherList = new ArrayList<MatchRule>();
if (columnMap == null) {
return matcherList;
}
MatchRule createRuleMatcher = RulesFactory.eINSTANCE.createMatchRule();
for (String columnName : columnMap.keySet()) {
MatchKeyDefinition createDefaultMatchRow = createDefaultMatchRow(columnName);
createRuleMatcher.getMatchKeys().add(createDefaultMatchRow);
}
matcherList.add(createRuleMatcher);
return matcherList;
}
/**
* DOC yyin Comment method "ruleMatcherConvert".
*
* @param blockKeyDef
* @param columnMap
* @return
*/
public static List<Map<String, String>> blockingKeyDataConvert(List<KeyDefinition> blockKeyDefList) {
List<Map<String, String>> resultListData = new ArrayList<Map<String, String>>();
for (KeyDefinition keyDef : blockKeyDefList) {
BlockKeyDefinition blockKeydef = (BlockKeyDefinition) keyDef;
String column = blockKeydef.getColumn();
String preAlgo = blockKeydef.getPreAlgorithm().getAlgorithmType();
String preAlgoValue = blockKeydef.getPreAlgorithm().getAlgorithmParameters();
String algorithm = blockKeydef.getAlgorithm().getAlgorithmType();
String algorithmValue = blockKeydef.getAlgorithm().getAlgorithmParameters();
String postAlgo = blockKeydef.getPostAlgorithm().getAlgorithmType();
String postAlgValue = blockKeydef.getPostAlgorithm().getAlgorithmParameters();
Map<String, String> blockKeyDefMap = AnalysisRecordGroupingUtils.getBlockingKeyMap(column, preAlgo, preAlgoValue,
algorithm, algorithmValue, postAlgo, postAlgValue);
resultListData.add(blockKeyDefMap);
}
return resultListData;
}
/**
* Get recording matching indicator from analysis
*
* @param analysis
* @return
*/
public static RecordMatchingIndicator getRecordMatchIndicatorFromAna(Analysis analysis) {
EList<Indicator> indicators = analysis.getResults().getIndicators();
for (Indicator ind : indicators) {
if (ind instanceof RecordMatchingIndicator) {
return (RecordMatchingIndicator) ind;
}
}
return null;
}
/**
* Get recording matching indicator and Blocking Indicator from analysis
*
* @param analysis
* @return the index 0 will be RecordMatchingIndicator and index 1 will be BlockKeyIndicator
*/
public static Object[] getNeedIndicatorFromAna(Analysis analysis) {
Object[] returnList = new Object[2];
EList<Indicator> indicators = analysis.getResults().getIndicators();
for (Indicator ind : indicators) {
if (ind instanceof RecordMatchingIndicator) {
returnList[0] = ind;
} else if (ind instanceof BlockKeyIndicator) {
returnList[1] = ind;
}
}
// If match rule definition is null, create a default.
if (returnList[0] == null) {
returnList[0] = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createRecordMatchingIndicator();
}
// If blocking key indicator is nul, create a default.
if (returnList[1] == null) {
returnList[1] = ColumnsetPackage.eINSTANCE.getColumnsetFactory().createBlockKeyIndicator();
}
return returnList;
}
/**
* check if the column name equals to these additional special columns
*
* @param columnName
* @return
*/
public static boolean isEqualsToAdditionalColumn(String columnName) {
if (MatchAnalysisConstant.GID.equals(columnName) || MatchAnalysisConstant.GRP_QUALITY.equals(columnName)
|| MatchAnalysisConstant.GROUP_SIZE.equals(columnName) || MatchAnalysisConstant.SCORE.equals(columnName)
|| MatchAnalysisConstant.ATTRIBUTE_SCORES.equals(columnName)
|| MatchAnalysisConstant.BLOCK_KEY.equals(columnName)) {
return true;
}
return false;
}
/**
* refresh data Table.
*
* @param analysis
*
* @param matchResultConsumer
*/
public static void refreshDataTable(ModelElement analysis, List<Object[]> resultData) {
ITDQRepositoryService tdqRepService = null;
if (GlobalServiceRegister.getDefault().isServiceRegistered(ITDQRepositoryService.class)) {
tdqRepService = (ITDQRepositoryService) GlobalServiceRegister.getDefault().getService(ITDQRepositoryService.class);
}
if (tdqRepService != null) {
tdqRepService.refreshTableWithResult(analysis, resultData);
}
}
/**
* sorting the result data by GID,master
*
* @param allColumns
* @param resultData
* @return
*/
public static List<Object[]> sortResultByGID(String[] allColumns, List<Object[]> resultData) {
int gidIndex = -1;
int masterIndex = -1;
for (int i = 0; i < allColumns.length; i++) {
if (StringUtils.endsWithIgnoreCase(allColumns[i], MatchAnalysisConstant.GID)) {
gidIndex = i;
} else if (StringUtils.endsWithIgnoreCase(allColumns[i], MatchAnalysisConstant.MASTER)) {
masterIndex = i;
}
}
// Sort by master first
final int masterIdx = masterIndex;
Comparator<Object[]> comparator = new Comparator<Object[]>() {
@Override
public int compare(Object[] row1, Object[] row2) {
return ((String) row2[masterIdx]).compareTo((String) row1[masterIdx]);
}
};
java.util.Collections.sort(resultData, comparator);
insertionSort(resultData, gidIndex);
return resultData;
}
public static void insertionSort(List<Object[]> data, int gidIdx) {
int in, out;
for (out = 1; out < data.size(); out++) {
Object[] temp = data.get(out);
in = out;
while (in > 0 && !isSameGroup(data.get(in - 1)[gidIdx].toString(), (temp[gidIdx]).toString())) {
data.set(in, data.get(in - 1));
--in;
}
data.set(in, temp);
}
}
/**
*
* @param group ID one
* @param group ID two.
* @return true if they are the same group considering the two merged groups (groupID contains two more UUID).
*/
public static boolean isSameGroup(String groupID1, String groupID2) {
if (groupID1 == null || groupID1.trim().equals(StringUtils.EMPTY) || groupID2 == null
|| groupID2.trim().equals(StringUtils.EMPTY)) {
return false;
}
String[] ids1 = StringUtils.splitByWholeSeparatorPreserveAllTokens(groupID1, PluginConstant.COMMA_STRING);
String[] ids2 = StringUtils.splitByWholeSeparatorPreserveAllTokens(groupID2, PluginConstant.COMMA_STRING);
for (String id1 : ids1) {
for (String id2 : ids2) {
if (id1.equalsIgnoreCase(id2)) {
return true;
}
}
}
return false;
}
public static List<Object[]> sortDataByColumn(final SortState sortState, List<Object[]> resultData,
final List<ModelElement> columns) {
Comparator<Object[]> comparator = new Comparator<Object[]>() {
ISortComparator sortComparator = null;
@Override
public int compare(Object[] row1, Object[] row2) {
// when the user select the special column which has no result yet
if ((row1.length - 1) < sortState.getSelectedColumnIndex()) {
return 0;
}
Object value1 = row1[sortState.getSelectedColumnIndex()];
Object value2 = row2[sortState.getSelectedColumnIndex()];
// get the sort comparator according to its type.
try {
if (sortComparator == null) {
sortComparator = getSortComparator(value1, sortState.getSelectedColumnIndex());
}
} catch (ParseException e) {
return 0;
}
if (sortComparator == null) {
return 0;
}
switch (sortState.getCurrentSortDirection()) {
case ASC:
return sortComparator.compareTwo(value1, value2);
case DESC:
return sortComparator.compareTwo(value2, value1);
default:
return 0;
}
}
// when the columns is TdColumn, use its sqlType to compare
private ISortComparator getSortComparator(Object object, int index) throws ParseException {
if (!(columns.get(0) instanceof TdColumn)) {// if not a db column
return SortComparator.getSortComparator(Types.VARCHAR);
}
if (columns.size() <= index) {// special columns
if (sortState.isGroupSizeColumn()) {// group size is integer type
return SortComparator.getSortComparator(Types.INTEGER);
} else {
return SortComparator.getSortComparator(Types.VARCHAR);
}
}
TdColumn element = (TdColumn) columns.get(index);
return SortComparator.getSortComparator(element.getSqlDataType().getJavaDataType());
}
};
java.util.Collections.sort(resultData, comparator);
return resultData;
}
public static boolean isSurvivorShipFunctionConsistentWithType(String algorithmType, String dataType) {
SurvivorShipAlgorithmEnum survivorShipAlgorithm = SurvivorShipAlgorithmEnum.getTypeBySavedValue(algorithmType);
switch (survivorShipAlgorithm) {
case LARGEST:
case SMALLEST:
// compare column's talend type || compare DefaultSurvivorShipDataTypeEnum own type
return JavaTypesManager.isNumber(dataType) || DefaultSurvivorShipDataTypeEnum.NUMBER.getValue().equals(dataType);
case LONGEST:
case SHORTEST:
return JavaTypesManager.isString(dataType) || DefaultSurvivorShipDataTypeEnum.STRING.getValue().equals(dataType);
case PREFER_TRUE:
case PREFER_FALSE:
return JavaTypesManager.isBoolean(dataType) || DefaultSurvivorShipDataTypeEnum.BOOLEAN.getValue().equals(dataType);
default:
return true;
}
}
}