// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.indicator.userdefine;
import java.util.HashMap;
import org.talend.dataquality.indicators.sql.impl.UserDefIndicatorImpl;
/**
* DOC yyin 201204 This Class provide the function to compute a group of data by Benford's Law and output the leading
* digits with its distribution in this dataset. Then the user can use it to compare with the standard, to detect
* possible cases of Fraud.
*
* related SQL: SELECT cast(LEFT(<%=__COLUMN_NAMES__%>,1) as char), COUNT(*) c FROM <%=__TABLE_NAME__%> t
* <%=__WHERE_CLAUSE__%> GROUP BY LEFT(<%=__COLUMN_NAMES__%>,1) order by LEFT(<%=__COLUMN_NAMES__%>,1)
*/
public class BenfordLawFrequencyIndicator extends UserDefIndicatorImpl {
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.Indicator#finalizeComputation()
*/
public boolean finalizeComputation() {
return true;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.Indicator#handle(java.lang.Object)
*/
public boolean handle(Object data) {
if (this.valueToFreq == null) {
this.valueToFreq = new HashMap<Object, Long>();
}
this.count++;
if (data == null)
return true;
Integer leadDigit = Integer.valueOf(String.valueOf(data).substring(0, 1));
// increment frequency of leading digit in data
Long c = this.valueToFreq.get(leadDigit);
if (c == null) {
// add value to map
this.valueToFreq.put(leadDigit, 1L);
c = 1L;
} else {
// already exists: increment number of occurences
c++;
this.valueToFreq.put(leadDigit, c);
}
return true;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataquality.indicators.Indicator#reset()
*/
public boolean reset() {
super.reset(); // reset the number of count
this.valueToFreq = new HashMap<Object, Long>(); // should be done in super class
return true;
}
}