// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataprofiler.core.migration.impl;
import java.util.Date;
import org.apache.commons.lang.StringUtils;
import org.talend.core.model.metadata.builder.database.dburl.SupportDBUrlType;
import org.talend.dataprofiler.core.migration.AbstractWorksapceUpdateTask;
import org.talend.dataprofiler.core.migration.helper.IndicatorDefinitionFileHelper;
import org.talend.dataquality.indicators.definition.IndicatorDefinition;
import org.talend.dq.indicators.definitions.DefinitionHandler;
/**
* created by yyin on 2014-2-18 : add the sql expressions for the database:Netezza, in the system indicators of: some
* Text indicators, Pattern Frequency Statistics, Soundex, and Benford indicator; and add the default one in pattern
* finder and soundex who does not have the default one before
*
*/
public class AddNetezzaExpressionInIndicatorsTask extends AbstractWorksapceUpdateTask {
private static final String AVERAGE_LENGTH = "Average Length"; //$NON-NLS-1$
// related sql expression
private final String AVERAGE_LENGTH_SQL = "SELECT SUM(CHAR_LENGTH(<%=__COLUMN_NAMES__%>)), COUNT(<%=__COLUMN_NAMES__%>) FROM <%=__TABLE_NAME__%> WHERE (<%=__COLUMN_NAMES__%> IS NOT NULL ) AND (TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'NULL TALEND')) <> '' ) <%=__AND_WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String AVERAGE_LENGTH_WITH_BLANK_AND_NULL = "Average Length With Blank and Null"; //$NON-NLS-1$
private static final String AVERAGE_LENGTH_WITH_BLANK_AND_NULL_SQL = "SELECT SUM(CHAR_LENGTH(CASE WHEN CHAR_LENGTH( TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'')) ) =0 THEN '' ELSE ISNULL(<%=__COLUMN_NAMES__%>,'') END)), COUNT(*) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String AVERAGE_LENGTH_WITH_NULL = "Average Length With Null"; //$NON-NLS-1$
private static final String AVERAGE_LENGTH_WITH_NULL_SQL = "SELECT SUM(CHAR_LENGTH(ISNULL(<%=__COLUMN_NAMES__%>,''))), COUNT(*) FROM <%=__TABLE_NAME__%> WHERE (TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'NULL TALEND')) <> '' ) <%=__AND_WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String MAXIMAL_LENGTH = "Maximal Length"; //$NON-NLS-1$
private static final String MAXIMAL_LENGTH_SQL = "SELECT MAX(CHAR_LENGTH(<%=__COLUMN_NAMES__%>)) FROM <%=__TABLE_NAME__%> WHERE (<%=__COLUMN_NAMES__%> IS NOT NULL ) AND (TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'NULL TALEND')) <> '' ) <%=__AND_WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String MAXIMAL_LENGTH_WITH_NULL = "Maximal Length With Null"; //$NON-NLS-1$
private static final String MAXIMAL_LENGTH_WITH_NULL_SQL = "SELECT MAX(CHAR_LENGTH(ISNULL(<%=__COLUMN_NAMES__%>,''))) FROM <%=__TABLE_NAME__%> WHERE (TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'NULL TALEND')) <> '' ) <%=__AND_WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String MAXIMAL_LENGTH_WITH_BLANK_AND_NULL = "Maximal Length With Blank and Null";//$NON-NLS-1$
private static final String MAXIMAL_LENGTH_WITH_BLANK_AND_NULL_SQL = "SELECT MAX(CHAR_LENGTH(ISNULL(<%=__COLUMN_NAMES__%>,''))) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>";//$NON-NLS-1$
private static final String MINIMAL_LENGTH = "Minimal Length"; //$NON-NLS-1$
private static final String MINIMAL_LENGTH_SQL = "SELECT MIN(CHAR_LENGTH(<%=__COLUMN_NAMES__%>)) FROM <%=__TABLE_NAME__%> WHERE (<%=__COLUMN_NAMES__%> IS NOT NULL ) AND (TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'NULL TALEND')) <> '' ) <%=__AND_WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String MINIMAL_LENGTH_WITH_BLANK_AND_NULL = "Minimal Length With Blank and Null"; //$NON-NLS-1$
private static final String MINIMAL_LENGTH_WITH_BLANK_AND_NULL_SQL = "SELECT MIN(CHAR_LENGTH(ISNULL(<%=__COLUMN_NAMES__%>,''))) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String MINIMAL_LENGTH_WITH_NULL = "Minimal Length With Null"; //$NON-NLS-1$
private static final String MINIMAL_LENGTH_WITH_NULL_SQL = "SELECT MIN(CHAR_LENGTH(ISNULL(<%=__COLUMN_NAMES__%>,''))) FROM <%=__TABLE_NAME__%> WHERE (TRIM(ISNULL(<%=__COLUMN_NAMES__%>,'NULL TALEND')) <> '' ) <%=__AND_WHERE_CLAUSE__%>"; //$NON-NLS-1$
private static final String BENFORD_LAW = "Benford Law Frequency";//$NON-NLS-1$
private static final String BENFORD_LAW_SQL = "SELECT cast(SUBSTR(<%=__COLUMN_NAMES__%>,1,1) as char), COUNT(*) c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY 1 order by 1";//$NON-NLS-1$
private static final String SOUNDEX_LOW_FREQUENCY = "Soundex Low Frequency Table";//$NON-NLS-1$
private static final String SOUNDEX_LOW_FREQUENCY_DEFAULT = "SELECT MAX(<%=__COLUMN_NAMES__%>), SOUNDEX(<%=__COLUMN_NAMES__%>), COUNT(*) c, COUNT(DISTINCT <%=__COLUMN_NAMES__%>) d FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY SOUNDEX(<%=__COLUMN_NAMES__%>) ORDER BY d,c ASC"; //$NON-NLS-1$;
private static final String SOUNDEX_LOW_FREQUENCY_SQL = "SELECT MAX(<%=__COLUMN_NAMES__%>), NYSIIS(<%=__COLUMN_NAMES__%>), COUNT(*) c, COUNT(DISTINCT <%=__COLUMN_NAMES__%>) d FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY NYSIIS(<%=__COLUMN_NAMES__%>) ORDER BY d,c ASC";//$NON-NLS-1$
private static final String SOUNDEX_FREQUENCY = "Soundex Frequency Table";//$NON-NLS-1$
private static final String SOUNDEX_FREQUENCY_DEFAULT = "SELECT MAX(<%=__COLUMN_NAMES__%>), SOUNDEX(<%=__COLUMN_NAMES__%>), COUNT(*) c, COUNT(DISTINCT <%=__COLUMN_NAMES__%>) d FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY SOUNDEX(<%=__COLUMN_NAMES__%>) ORDER BY d DESC,c DESC";//$NON-NLS-1$
private static final String SOUNDEX_FREQUENCY_SQL = "SELECT MAX(<%=__COLUMN_NAMES__%>), NYSIIS(<%=__COLUMN_NAMES__%>) , COUNT(*) c, COUNT(DISTINCT <%=__COLUMN_NAMES__%>) d FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY 2 ORDER BY d DESC,c DESC";//$NON-NLS-1$
private static final String PATTERN_LOW_FREQUENCY = "Pattern Low Frequency Table"; //$NON-NLS-1$
private static final String PATTERN_LOW_FREQUENCY_DEFAULT = "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY <%=__GROUP_BY_ALIAS__%> ORDER BY c ASC"; //$NON-NLS-1$
private static final String PATTERN_LOW_FREQUENCY_SQL = "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY <%=__GROUP_BY_ALIAS__%> ORDER BY c ASC";//$NON-NLS-1$
private static final String PATTERN_FREQUENCY = "Pattern Frequency Table";//$NON-NLS-1$
private static final String PATTERN_FREQUENCY_DEFAULT = "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY <%=__GROUP_BY_ALIAS__%> ORDER BY c DESC";//$NON-NLS-1$
private static final String PATTERN_FREQUENCY_SQL = "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) AS c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY <%=__COLUMN_NAMES__%> ORDER BY c DESC";//$NON-NLS-1$
private final String Netezza = SupportDBUrlType.NETEZZADEFAULTURL.getLanguage();
private final String SQL = "SQL";//$NON-NLS-1$
private final String CHAR_TOREPLACE = "abcdefghijklmnopqrstuvwxyzçâêîôûéèùïöüABCDEFGHIJKLMNOPQRSTUVWXYZÇÂÊÎÔÛÉÈÙÏÖÜ0123456789";//$NON-NLS-1$
private final String CHAR_REPLACE = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA9999999999";//$NON-NLS-1$
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.core.migration.AMigrationTask#doExecute()
*/
@Override
protected boolean doExecute() throws Exception {
boolean result = true;
// the following indicator only need to add Netezza expression
result = result && addExpression(AVERAGE_LENGTH, AVERAGE_LENGTH_SQL, Netezza, false);
result = result
&& addExpression(AVERAGE_LENGTH_WITH_BLANK_AND_NULL, AVERAGE_LENGTH_WITH_BLANK_AND_NULL_SQL, Netezza, false);
result = result && addExpression(AVERAGE_LENGTH_WITH_NULL, AVERAGE_LENGTH_WITH_NULL_SQL, Netezza, false);
result = result && addExpression(MAXIMAL_LENGTH, MAXIMAL_LENGTH_SQL, Netezza, false);
result = result && addExpression(MAXIMAL_LENGTH_WITH_NULL, MAXIMAL_LENGTH_WITH_NULL_SQL, Netezza, false);
result = result
&& addExpression(MAXIMAL_LENGTH_WITH_BLANK_AND_NULL, MAXIMAL_LENGTH_WITH_BLANK_AND_NULL_SQL, Netezza, false);
result = result && addExpression(MINIMAL_LENGTH, MINIMAL_LENGTH_SQL, Netezza, false);
result = result
&& addExpression(MINIMAL_LENGTH_WITH_BLANK_AND_NULL, MINIMAL_LENGTH_WITH_BLANK_AND_NULL_SQL, Netezza, false);
result = result && addExpression(MINIMAL_LENGTH_WITH_NULL, MINIMAL_LENGTH_WITH_NULL_SQL, Netezza, false);
result = result && addExpression(BENFORD_LAW, BENFORD_LAW_SQL, Netezza, false);
// next: the following indicators need to add default expression, and Netezza expression
result = result && addExpression(SOUNDEX_LOW_FREQUENCY, SOUNDEX_LOW_FREQUENCY_DEFAULT, SQL, false);
result = result && addExpression(SOUNDEX_FREQUENCY, SOUNDEX_FREQUENCY_DEFAULT, SQL, false);
result = result && addExpression(SOUNDEX_LOW_FREQUENCY, SOUNDEX_LOW_FREQUENCY_SQL, Netezza, false);
result = result && addExpression(SOUNDEX_FREQUENCY, SOUNDEX_FREQUENCY_SQL, Netezza, false);
// for pattern frequency, also need to add character map(default, and Netezza)
result = result && addExpression(PATTERN_LOW_FREQUENCY, PATTERN_LOW_FREQUENCY_DEFAULT, SQL, true);
result = result && addExpression(PATTERN_FREQUENCY, PATTERN_FREQUENCY_DEFAULT, SQL, true);
result = result && addExpression(PATTERN_LOW_FREQUENCY, PATTERN_LOW_FREQUENCY_SQL, Netezza, true);
result = result && addExpression(PATTERN_FREQUENCY, PATTERN_FREQUENCY_SQL, Netezza, true);
DefinitionHandler.getInstance().reloadIndicatorsDefinitions();
return result;
}
private boolean addExpression(String indicatorName, String body, String language, boolean withMap) {
IndicatorDefinition indiDefinition = IndicatorDefinitionFileHelper.getSystemIndicatorByName(indicatorName);
if (indiDefinition != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(indiDefinition, language)) {
IndicatorDefinitionFileHelper.addSqlExpression(indiDefinition, language, body);
if (withMap) {
IndicatorDefinitionFileHelper.addCharacterMapping(indiDefinition, language, StringUtils.EMPTY, CHAR_TOREPLACE,
CHAR_REPLACE);
}
return IndicatorDefinitionFileHelper.save(indiDefinition);
}
return true;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.core.migration.IWorkspaceMigrationTask#getMigrationTaskType()
*/
public MigrationTaskType getMigrationTaskType() {
return MigrationTaskType.FILE;
}
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.core.migration.IWorkspaceMigrationTask#getOrder()
*/
public Date getOrder() {
return createDate(2014, 02, 18);
}
}