/** * <copyright> </copyright> * * $Id$ */ package org.talend.dataquality.indicators.columnset.impl; import java.util.Arrays; import java.util.List; import java.util.TreeMap; import org.apache.commons.lang.StringUtils; import org.eclipse.emf.common.notify.Notification; import org.eclipse.emf.common.notify.NotificationChain; import org.eclipse.emf.ecore.EClass; import org.eclipse.emf.ecore.InternalEObject; import org.eclipse.emf.ecore.impl.ENotificationImpl; import org.talend.dataquality.PluginConstant; import org.talend.dataquality.indicators.columnset.ColumnsetPackage; import org.talend.dataquality.indicators.columnset.RecordMatchingIndicator; import org.talend.dataquality.rules.MatchRuleDefinition; /** * <!-- begin-user-doc --> An implementation of the model object '<em><b>Record Matching Indicator</b></em>'. <!-- * end-user-doc --> * <p> * The following features are implemented: * <ul> * <li>{@link org.talend.dataquality.indicators.columnset.impl.RecordMatchingIndicatorImpl#getGroupSize2groupFrequency * <em>Group Size2group Frequency</em>}</li> * <li>{@link org.talend.dataquality.indicators.columnset.impl.RecordMatchingIndicatorImpl#getMatchedRecordCount <em> * Matched Record Count</em>}</li> * <li>{@link org.talend.dataquality.indicators.columnset.impl.RecordMatchingIndicatorImpl#getSuspectRecordCount <em> * Suspect Record Count</em>}</li> * <li> * {@link org.talend.dataquality.indicators.columnset.impl.RecordMatchingIndicatorImpl#getBuiltInMatchRuleDefinition * <em>Built In Match Rule Definition</em>}</li> * </ul> * </p> * * @generated */ public class RecordMatchingIndicatorImpl extends ColumnSetMultiValueIndicatorImpl implements RecordMatchingIndicator { // To be syn with RecordMatcherType#T_SwooshAlgorithm private final static String T_SWOOSH_ALG_NAME = "T_SwooshAlgorithm"; /** * The default value of the '{@link #getGroupSize2groupFrequency() <em>Group Size2group Frequency</em>}' attribute. * <!-- begin-user-doc --> <!-- end-user-doc --> * * @see #getGroupSize2groupFrequency() * @generate NOT * @ordered */ protected static final TreeMap<Object, Long> GROUP_SIZE2GROUP_FREQUENCY_EDEFAULT = new TreeMap<Object, Long>(); /** * The cached value of the '{@link #getGroupSize2groupFrequency() <em>Group Size2group Frequency</em>}' attribute. * <!-- begin-user-doc --> <!-- end-user-doc --> * * @see #getGroupSize2groupFrequency() * @generated * @ordered */ protected TreeMap<Object, Long> groupSize2groupFrequency = GROUP_SIZE2GROUP_FREQUENCY_EDEFAULT; /** * The default value of the '{@link #getMatchedRecordCount() <em>Matched Record Count</em>}' attribute. <!-- * begin-user-doc --> <!-- end-user-doc --> * * @see #getMatchedRecordCount() * @generated * @ordered */ protected static final long MATCHED_RECORD_COUNT_EDEFAULT = 0L; /** * The cached value of the '{@link #getMatchedRecordCount() <em>Matched Record Count</em>}' attribute. <!-- * begin-user-doc --> <!-- end-user-doc --> * * @see #getMatchedRecordCount() * @generated * @ordered */ protected long matchedRecordCount = MATCHED_RECORD_COUNT_EDEFAULT; /** * The default value of the '{@link #getSuspectRecordCount() <em>Suspect Record Count</em>}' attribute. <!-- * begin-user-doc --> <!-- end-user-doc --> * * @see #getSuspectRecordCount() * @generated * @ordered */ protected static final long SUSPECT_RECORD_COUNT_EDEFAULT = 0L; /** * The cached value of the '{@link #getSuspectRecordCount() <em>Suspect Record Count</em>}' attribute. <!-- * begin-user-doc --> <!-- end-user-doc --> * * @see #getSuspectRecordCount() * @generated * @ordered */ protected long suspectRecordCount = SUSPECT_RECORD_COUNT_EDEFAULT; /** * The cached value of the '{@link #getBuiltInMatchRuleDefinition() <em>Built In Match Rule Definition</em>}' * containment reference. <!-- begin-user-doc --> <!-- end-user-doc --> * * @see #getBuiltInMatchRuleDefinition() * @generated * @ordered */ protected MatchRuleDefinition builtInMatchRuleDefinition; private int groupSizeColumnIndex = 0;// Column index of group size private int masterColumnIndex = 0; // Column index of master private int groupQualityColumnIndex = 0;// Column index of group quality private String[] matchRowSchema = null;// Schema of a match row /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ protected RecordMatchingIndicatorImpl() { super(); } @Override public void setMatchRowSchema(String[] matchRowSchema) { this.matchRowSchema = matchRowSchema; } @Override public String[] getMatchRowSchema() { return this.matchRowSchema; } /* * (non-Javadoc) * * @see org.talend.dataquality.indicators.columnset.impl.ColumnSetMultiValueIndicatorImpl#reset() */ @Override public boolean reset() { List<String> matchRowSchemaList = Arrays.asList(matchRowSchema); this.groupSizeColumnIndex = matchRowSchemaList.indexOf(PluginConstant.GRP_SIZE); this.masterColumnIndex = matchRowSchemaList.indexOf(PluginConstant.MASTER); this.groupQualityColumnIndex = matchRowSchemaList.indexOf(PluginConstant.GRP_QUALITY); matchedRecordCount = MATCHED_RECORD_COUNT_EDEFAULT; suspectRecordCount = SUSPECT_RECORD_COUNT_EDEFAULT; groupSize2groupFrequency = new TreeMap<Object, Long>(); return super.reset(); } /* * (non-Javadoc) * * @see * org.talend.dataquality.indicators.columnset.impl.ColumnSetMultiValueIndicatorImpl#handle(org.eclipse.emf.common * .util.EList) */ @Override public boolean handle(Object data) { String[] values = null; if (Object[].class.isInstance(data)) { values = new String[((Object[]) data).length]; for (int index = 0; index < values.length; index++) { Object value = ((Object[]) data)[index]; values[index] = value == null ? null : value.toString(); } } else if (String[].class.isInstance(data)) { values = (String[]) data; } else { return false; } Boolean isMaster = Boolean.valueOf(values[masterColumnIndex]); Integer groupSize = StringUtils.isEmpty(values[groupSizeColumnIndex]) ? 0 : Integer.valueOf(values[groupSizeColumnIndex]); if (isMaster) { // Find the master row if (null == groupSize2groupFrequency.get(values[groupSizeColumnIndex])) { groupSize2groupFrequency.put(values[groupSizeColumnIndex], 1l); } else { long freq = groupSize2groupFrequency.get(values[groupSizeColumnIndex]) + 1; groupSize2groupFrequency.put(values[groupSizeColumnIndex], freq); } // Compute matched record count if (builtInMatchRuleDefinition != null && groupSize > 1) { // Group quality score >= confidence threshold then it's a confident match group double groupScore = Double.valueOf(values[groupQualityColumnIndex]); double confidenceThreshold = builtInMatchRuleDefinition.getMatchGroupQualityThreshold(); if (groupScore >= confidenceThreshold) { matchedRecordCount = matchedRecordCount + groupSize; } else { suspectRecordCount = suspectRecordCount + groupSize; } } } if (T_SWOOSH_ALG_NAME.equals(getBuiltInMatchRuleDefinition().getRecordLinkageAlgorithm())) { // masters with group size greater than 1 should NOT be taken into account when compute row count in case of // t-swoosh algorithm. if (!(isMaster && groupSize > 1)) { count++; } } else { count++; } return Boolean.TRUE; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override protected EClass eStaticClass() { return ColumnsetPackage.Literals.RECORD_MATCHING_INDICATOR; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public TreeMap<Object, Long> getGroupSize2groupFrequency() { return groupSize2groupFrequency; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public void setGroupSize2groupFrequency(TreeMap<Object, Long> newGroupSize2groupFrequency) { TreeMap<Object, Long> oldGroupSize2groupFrequency = groupSize2groupFrequency; groupSize2groupFrequency = newGroupSize2groupFrequency; if (eNotificationRequired()) { eNotify(new ENotificationImpl(this, Notification.SET, ColumnsetPackage.RECORD_MATCHING_INDICATOR__GROUP_SIZE2GROUP_FREQUENCY, oldGroupSize2groupFrequency, groupSize2groupFrequency)); } } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public long getMatchedRecordCount() { return matchedRecordCount; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public void setMatchedRecordCount(long newMatchedRecordCount) { long oldMatchedRecordCount = matchedRecordCount; matchedRecordCount = newMatchedRecordCount; if (eNotificationRequired()) { eNotify(new ENotificationImpl(this, Notification.SET, ColumnsetPackage.RECORD_MATCHING_INDICATOR__MATCHED_RECORD_COUNT, oldMatchedRecordCount, matchedRecordCount)); } } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public long getSuspectRecordCount() { return suspectRecordCount; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public void setSuspectRecordCount(long newSuspectRecordCount) { long oldSuspectRecordCount = suspectRecordCount; suspectRecordCount = newSuspectRecordCount; if (eNotificationRequired()) { eNotify(new ENotificationImpl(this, Notification.SET, ColumnsetPackage.RECORD_MATCHING_INDICATOR__SUSPECT_RECORD_COUNT, oldSuspectRecordCount, suspectRecordCount)); } } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public MatchRuleDefinition getBuiltInMatchRuleDefinition() { return builtInMatchRuleDefinition; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ public NotificationChain basicSetBuiltInMatchRuleDefinition(MatchRuleDefinition newBuiltInMatchRuleDefinition, NotificationChain msgs) { MatchRuleDefinition oldBuiltInMatchRuleDefinition = builtInMatchRuleDefinition; builtInMatchRuleDefinition = newBuiltInMatchRuleDefinition; if (eNotificationRequired()) { ENotificationImpl notification = new ENotificationImpl(this, Notification.SET, ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION, oldBuiltInMatchRuleDefinition, newBuiltInMatchRuleDefinition); if (msgs == null) { msgs = notification; } else { msgs.add(notification); } } return msgs; } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public void setBuiltInMatchRuleDefinition(MatchRuleDefinition newBuiltInMatchRuleDefinition) { if (newBuiltInMatchRuleDefinition != builtInMatchRuleDefinition) { NotificationChain msgs = null; if (builtInMatchRuleDefinition != null) { msgs = ((InternalEObject) builtInMatchRuleDefinition).eInverseRemove(this, EOPPOSITE_FEATURE_BASE - ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION, null, msgs); } if (newBuiltInMatchRuleDefinition != null) { msgs = ((InternalEObject) newBuiltInMatchRuleDefinition).eInverseAdd(this, EOPPOSITE_FEATURE_BASE - ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION, null, msgs); } msgs = basicSetBuiltInMatchRuleDefinition(newBuiltInMatchRuleDefinition, msgs); if (msgs != null) { msgs.dispatch(); } } else if (eNotificationRequired()) { eNotify(new ENotificationImpl(this, Notification.SET, ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION, newBuiltInMatchRuleDefinition, newBuiltInMatchRuleDefinition)); } } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public NotificationChain eInverseRemove(InternalEObject otherEnd, int featureID, NotificationChain msgs) { switch (featureID) { case ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION: return basicSetBuiltInMatchRuleDefinition(null, msgs); } return super.eInverseRemove(otherEnd, featureID, msgs); } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public Object eGet(int featureID, boolean resolve, boolean coreType) { switch (featureID) { case ColumnsetPackage.RECORD_MATCHING_INDICATOR__GROUP_SIZE2GROUP_FREQUENCY: return getGroupSize2groupFrequency(); case ColumnsetPackage.RECORD_MATCHING_INDICATOR__MATCHED_RECORD_COUNT: return getMatchedRecordCount(); case ColumnsetPackage.RECORD_MATCHING_INDICATOR__SUSPECT_RECORD_COUNT: return getSuspectRecordCount(); case ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION: return getBuiltInMatchRuleDefinition(); } return super.eGet(featureID, resolve, coreType); } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @SuppressWarnings("unchecked") @Override public void eSet(int featureID, Object newValue) { switch (featureID) { case ColumnsetPackage.RECORD_MATCHING_INDICATOR__GROUP_SIZE2GROUP_FREQUENCY: setGroupSize2groupFrequency((TreeMap<Object, Long>) newValue); return; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__MATCHED_RECORD_COUNT: setMatchedRecordCount((Long) newValue); return; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__SUSPECT_RECORD_COUNT: setSuspectRecordCount((Long) newValue); return; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION: setBuiltInMatchRuleDefinition((MatchRuleDefinition) newValue); return; } super.eSet(featureID, newValue); } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public void eUnset(int featureID) { switch (featureID) { case ColumnsetPackage.RECORD_MATCHING_INDICATOR__GROUP_SIZE2GROUP_FREQUENCY: setGroupSize2groupFrequency(GROUP_SIZE2GROUP_FREQUENCY_EDEFAULT); return; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__MATCHED_RECORD_COUNT: setMatchedRecordCount(MATCHED_RECORD_COUNT_EDEFAULT); return; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__SUSPECT_RECORD_COUNT: setSuspectRecordCount(SUSPECT_RECORD_COUNT_EDEFAULT); return; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION: setBuiltInMatchRuleDefinition((MatchRuleDefinition) null); return; } super.eUnset(featureID); } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public boolean eIsSet(int featureID) { switch (featureID) { case ColumnsetPackage.RECORD_MATCHING_INDICATOR__GROUP_SIZE2GROUP_FREQUENCY: return GROUP_SIZE2GROUP_FREQUENCY_EDEFAULT == null ? groupSize2groupFrequency != null : !GROUP_SIZE2GROUP_FREQUENCY_EDEFAULT.equals(groupSize2groupFrequency); case ColumnsetPackage.RECORD_MATCHING_INDICATOR__MATCHED_RECORD_COUNT: return matchedRecordCount != MATCHED_RECORD_COUNT_EDEFAULT; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__SUSPECT_RECORD_COUNT: return suspectRecordCount != SUSPECT_RECORD_COUNT_EDEFAULT; case ColumnsetPackage.RECORD_MATCHING_INDICATOR__BUILT_IN_MATCH_RULE_DEFINITION: return builtInMatchRuleDefinition != null; } return super.eIsSet(featureID); } /** * <!-- begin-user-doc --> <!-- end-user-doc --> * * @generated */ @Override public String toString() { if (eIsProxy()) { return super.toString(); } StringBuffer result = new StringBuffer(super.toString()); result.append(" (groupSize2groupFrequency: "); result.append(groupSize2groupFrequency); result.append(", matchedRecordCount: "); result.append(matchedRecordCount); result.append(", suspectRecordCount: "); result.append(suspectRecordCount); result.append(')'); return result.toString(); } /* * (non-Javadoc) * * @see org.talend.dataquality.indicators.impl.IndicatorImpl#isSaveTempDataToFile() */ @Override public boolean isUsedMapDBMode() { return false; } } // RecordMatchingIndicatorImpl