/**
* <copyright> </copyright>
*
* $Id$
*/
package org.talend.dataquality.indicators.impl;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.eclipse.emf.ecore.EClass;
import org.talend.dataquality.indicators.BenfordLawFrequencyIndicator;
import org.talend.dataquality.indicators.IndicatorParameters;
import org.talend.dataquality.indicators.IndicatorsFactory;
import org.talend.dataquality.indicators.IndicatorsPackage;
/**
* <!-- begin-user-doc --> An implementation of the model object '<em><b>Benford Law Frequency Indicator</b></em>'. <!--
* end-user-doc -->
* <p>
* </p>
*
* @generated
*/
public class BenfordLawFrequencyIndicatorImpl extends FrequencyIndicatorImpl implements BenfordLawFrequencyIndicator {
private boolean isChecked = false;
public static String INVALID = "invalid";//$NON-NLS-1$
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
*
* @generated
*/
protected BenfordLawFrequencyIndicatorImpl() {
super();
}
/**
* handle some special cases for SQL engine:
* <ul>
* <li>when there are null values;</li>
* <li>when there miss some numbers between 1~9;</li>
* <li>when the column is double, the leading digit maybe 0;</li>
* <li>when the column is string type, when the leading is not 1~9, all counted into "invalid" one.</li>
* </ul>
*/
private void handleSpecialCharacterAndMissingValues() {
if (isChecked && this.valueToFreq.size() < 1) {
return;
}
// check for invalid
long counted = 0L;
List<Object> invalid = new ArrayList<Object>();
List<Object> lengthMore = new ArrayList<Object>();
for (Object val : this.valueToFreq.keySet()) {
if (isInvalid(val) < 0) {
invalid.add(val);
Long freq = this.valueToFreq.get(val);
counted = (freq == null) ? counted : counted + freq;
} else if (String.valueOf(val).length() > 1) { // check the length, should only = 1, if >1, cut it
lengthMore.add(val);
}
}
// combine all invalid into one <"invalid",counted>
if (invalid.size() > 0) {
for (Object val : invalid) {
this.valueToFreq.remove(val);
}
this.valueToFreq.put(INVALID, counted);
}
// check the length, should only = 1, if >1, cut it
if (lengthMore.size() > 0) {
for (Object val : lengthMore) {
String k = String.valueOf(val).substring(0, 1);
Long freq = this.valueToFreq.get(val);
this.valueToFreq.remove(val);
this.valueToFreq.put(k, freq);
}
}
// check from 1~9, if miss, add it as <number, 0L>
for (int i = 1; i < 10; i++) {
Long value = this.valueToFreq.get(String.valueOf(i));
if (value == null) {
this.valueToFreq.put(String.valueOf(i), 0L);
}
}
isChecked = true;
}
/**
* if the val is 1~9, then return 1 if it is "0", return 0, if it is return , return -1
*
* @param val
* @return
*/
private int isInvalid(Object val) {
// MOD msjian TDQ-6123: fix a IndexOutOfBoundsException
String strValue = String.valueOf(val);
if (strValue.length() < 1) {
return -1;
}
char lead = strValue.charAt(0);
// TDQ-6123~
if (Character.isDigit(lead)) {
if ('0' == lead) {
return 0;
}
return 1;
}
return -1;
}
/**
* <!-- begin-user-doc --> <!-- end-user-doc -->
*
* @generated
*/
@Override
protected EClass eStaticClass() {
return IndicatorsPackage.Literals.BENFORD_LAW_FREQUENCY_INDICATOR;
}
/**
* <!-- begin-user-doc --> count the occur times for 1~9 at the beginning of every numbers. <!-- end-user-doc -->
*
*
* @generated NOT
*/
@Override
public boolean handle(Object data) {
this.count++;
if (data == null) {
setValue(INVALID);
return true;
}
int isInvalid = isInvalid(data);
if (isInvalid == 0) {
setValue("0");
} else if (isInvalid < 0) {
setValue(INVALID);
} else {
setValue(String.valueOf(data).substring(0, 1));
}
return true;
}
private void setValue(String key) {
// increment frequency of leading digit in data
Long c = this.valueToFreq.get(key);
if (c == null) {
this.valueToFreq.put(key, 1L);
} else {
c++;
this.valueToFreq.put(key, c);
}
}
/**
* Added yyin 20121008, TDQ-6233, the default limit=10, change it to 50 (0-9, a-z,null)
*/
@Override
public IndicatorParameters getParameters() {
if (parameters == null) {
parameters = IndicatorsFactory.eINSTANCE.createIndicatorParameters();
parameters.setTopN(50);
}
return parameters;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.FrequencyIndicatorImpl#storeSqlResults(java.util.List)
*/
@Override
public boolean storeSqlResults(List<Object[]> objects) {
boolean ok = super.storeSqlResults(objects);
if (ok) {
this.handleSpecialCharacterAndMissingValues();
}
return ok;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.FrequencyIndicatorImpl#reset()
*/
@Override
public boolean reset() {
boolean ok = super.reset();
// initialize map with expected digits 1..9
for (int i = 1; i <= 9; i++) {
this.valueToFreq.put(String.valueOf(i), 0L);
}
this.valueToFreq.put("invalid", 0L);
return ok;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.impl.FrequencyIndicatorImpl#finalizeComputation()
*/
@Override
public boolean finalizeComputation() {
HashMap<Object, Long> map = new HashMap<Object, Long>();
for (int i = 0; i < 10; i++) {
Long value = this.valueToFreq.get(String.valueOf(i));
if (value == null) {
continue;
} else {
map.put(String.valueOf(i), value);
}
}
Long value = this.valueToFreq.get(INVALID);
if (value != null) {
map.put(INVALID, value);
}
this.valueToFreq.clear();
this.setValueToFreq(map);
return true;
}
} // BenfordLawFrequencyIndicatorImpl