// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dq.analysis.explore;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.emf.common.util.EList;
import org.talend.cwm.relational.TdColumn;
import org.talend.dataquality.PluginConstant;
import org.talend.dataquality.indicators.IndicatorsPackage;
import org.talend.dataquality.indicators.definition.CharactersMapping;
import org.talend.dataquality.indicators.definition.IndicatorDefinition;
import org.talend.dq.dbms.DB2DbmsLanguage;
import org.talend.dq.dbms.DbmsLanguageFactory;
import org.talend.dq.dbms.InfomixDbmsLanguage;
import org.talend.dq.indicators.definitions.DefinitionHandler;
import org.talend.utils.sql.Java2SqlType;
/**
* @author scorreia
*
* This class generate select statement for any function found in an instantiated query of the type:
* "SELECT function(col), COUNT(*) FROM ..."
*/
public class FunctionFrequencyStatExplorer extends FrequencyStatisticsExplorer {
// MOD mzhao 2010-04-12 bug 11554
private static final String REGEX = ".*\\s*SELECT.*\\s* (YEAR.*|REPLACE.*|TRANSLATE.*|TO_NUMBER.*|DATEPART.*)\\s*, COUNT\\(\\*\\)\\s*(AS|as)?\\s*\\w*\\s* FROM"; //$NON-NLS-1$
private static final String REGEX_INFORMIX = ".*\\s*SELECT (REPLACE.*)\\s*(AS|as)+\\s*\\w+\\s* FROM"; //$NON-NLS-1$
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.explore.FrequencyStatisticsExplorer#getFreqRowsStatement()
*/
@Override
protected String getFreqRowsStatement() {
// generate SELECT * FROM TABLE WHERE function(columnName) = labelToFind
TdColumn column = (TdColumn) indicator.getAnalyzedElement();
String clause = getInstantiatedClause();
return "SELECT * FROM " + getFullyQualifiedTableName(column) + dbmsLanguage.where() + inBrackets(clause) //$NON-NLS-1$
+ andDataFilterClause();
}
@Override
protected String getInstantiatedClause() {
// get function which convert data into a pattern
String colName = columnName;
String function = null;
TdColumn column = (TdColumn) indicator.getAnalyzedElement();
int javaType = column.getSqlDataType().getJavaDataType();
if (!Java2SqlType.isNumbericInSQL(javaType)) {
function = getFunction();
} else {
colName = columnNameConvert();
}
// MOD mzhao bug 9681 2009-11-09
Object value = null;
if (Java2SqlType.isNumbericInSQL(javaType) && dbmsLanguage instanceof DB2DbmsLanguage) {
value = entity.getKey();
} else {
value = "'" + entity.getKey() + "'";//$NON-NLS-1$//$NON-NLS-2$
}
String clause = entity.isLabelNull() ? columnName + dbmsLanguage.isNull() : ((function == null ? colName : function)
+ dbmsLanguage.equal() + value);
// ADD xqliu 2011-06-03 bug 20600's note 86482
if (dbmsLanguage instanceof InfomixDbmsLanguage) {
clause = clause.replaceAll(InfomixDbmsLanguage.AS_REPLACE_COLUMN, PluginConstant.EMPTY_STRING);
}
// ~ 20600
return clause;
}
private String getFunction() {
String instantiatedSQL = getIndicatorExpressionSQL();
if (instantiatedSQL == null) {
return instantiatedSQL;
}
Pattern p = Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE);
// MOD mzhao 2010-04-12 bug 11554
String dbmsName = this.dbmsLanguage.getDbmsName();
if (DbmsLanguageFactory.isInfomix(dbmsName)) {
p = Pattern.compile(REGEX_INFORMIX, Pattern.CASE_INSENSITIVE);
}
// ~
Matcher matcher = p.matcher(instantiatedSQL);
matcher.find();
// MOD mzhao 2009-11-09 bug 9681: Catch the possibility that the sql body contains "TOP" keywords.
// MOD MOD mzhao 2010-04-12 bug 11554
String group = matcher.group(1);
return group;
}
private String addFunctionTypeConvert(String colName) {
if (colName == null) {
return colName;
}
colName = " CAST(" + colName + " AS CHAR(20)) ";//$NON-NLS-1$//$NON-NLS-2$
return colName;
}
@SuppressWarnings("deprecation")
private String columnNameConvert() {
String colName = columnName;
TdColumn column = (TdColumn) indicator.getAnalyzedElement();
IndicatorDefinition indicatorDefinition;
String label = indicator.getIndicatorDefinition().getLabel();
if (label == null || PluginConstant.EMPTY_STRING.equals(label)) {
indicatorDefinition = indicator.getIndicatorDefinition();
} else {
indicatorDefinition = DefinitionHandler.getInstance().getIndicatorDefinition(label);
}
if (indicator.eClass().equals(IndicatorsPackage.eINSTANCE.getPatternFreqIndicator())
|| indicator.eClass().equals(IndicatorsPackage.eINSTANCE.getPatternLowFreqIndicator())) {
// MOD zshen for bug 12675 2010-05-12
if (Java2SqlType.isNumbericInSQL(column.getSqlDataType().getJavaDataType())) {
colName = addFunctionTypeConvert(colName);
}
// ~12675
final EList<CharactersMapping> charactersMapping = indicatorDefinition.getCharactersMapping();
colName = dbmsLanguage.getPatternFinderFunction(colName, charactersMapping);
if (columnName == null) { // no replacement found, try the default one
colName = dbmsLanguage.getPatternFinderDefaultFunction(colName);
}
}
return colName;
}
}