// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataprofiler.core.migration.impl;
import java.util.Date;
import org.talend.core.model.metadata.builder.database.dburl.SupportDBUrlType;
import org.talend.dataprofiler.core.migration.AbstractWorksapceUpdateTask;
import org.talend.dataprofiler.core.migration.helper.IndicatorDefinitionFileHelper;
import org.talend.dataquality.indicators.definition.IndicatorDefinition;
import org.talend.dq.indicators.definitions.DefinitionHandler;
public class UpdateIndicatorsForVertica extends AbstractWorksapceUpdateTask {
private final String patternMatchSQL = "SELECT COUNT(CASE WHEN REGEXP_LIKE(TO_CHAR(<%=__COLUMN_NAMES__%>),<%=__PATTERN_EXPR__%>) THEN 1 END), COUNT(*) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>"; //$NON-NLS-1$
private final String patternFreqSQL = "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) AS c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY <%=__COLUMN_NAMES__%> ORDER BY c DESC"; //$NON-NLS-1$
private final String benFordSQL = "SELECT SUBSTR(TO_CHAR(<%=__COLUMN_NAMES__%>),1,1) , COUNT(*) c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY SUBSTR(TO_CHAR(<%=__COLUMN_NAMES__%>),1,1) order by SUBSTR(TO_CHAR(<%=__COLUMN_NAMES__%>),1,1)"; //$NON-NLS-1$
private final String lowPatternFreqSQL = "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) AS c FROM <%=__TABLE_NAME__%> t <%=__WHERE_CLAUSE__%> GROUP BY <%=__COLUMN_NAMES__%> ORDER BY c ASC"; //$NON-NLS-1$
private final String patternMatchIndiDefId = "_yb-_8Dh8Ed2XmO7pl5Yuyg"; //$NON-NLS-1$
private final String patternFreqIndiDefId = "_kQzTsJR-Ed2XO-JvLwVAwg"; //$NON-NLS-1$
private final String lowPatternFreqIndiDefId = "_OCTbwJR_Ed2XO-JvLwVAwg"; //$NON-NLS-1$
private final String benfordId = "_yRkFIezIEeG0fbygDv6UrQ"; //$NON-NLS-1$
private final String charToReplace = "abcdefghijklmnopqrstuvwxyzçâêîôûéèùïöüABCDEFGHIJKLMNOPQRSTUVWXYZÇÂÊÎÔÛÉÈÙÏÖÜ0123456789"; //$NON-NLS-1$
private final String newReplaceChar = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA9999999999"; //$NON-NLS-1$
private final String name = "Characters mapping on Vertica"; //$NON-NLS-1$
private final String language = SupportDBUrlType.VERTICA.getLanguage();
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.migration.IMigrationTask#getOrder()
*/
public Date getOrder() {
return createDate(2013, 8, 26);
}
/*
* (non-Javadoc)
*
* @see org.talend.dataprofiler.migration.IMigrationTask#getMigrationTaskType()
*/
public MigrationTaskType getMigrationTaskType() {
return MigrationTaskType.FILE;
}
/**
* migrate these indicators for Vertica database:Pattern Frequency Table,Pattern Low Frequency Table,Regular
* Expression Matching.
*/
@Override
protected boolean doExecute() throws Exception {
DefinitionHandler definitionHandler = DefinitionHandler.getInstance();
IndicatorDefinition patternMatchIndDef = definitionHandler.getDefinitionById(patternMatchIndiDefId);
IndicatorDefinition patternFreqIndDef = definitionHandler.getDefinitionById(patternFreqIndiDefId);
IndicatorDefinition patternLowPatternFreqIndDef = definitionHandler.getDefinitionById(lowPatternFreqIndiDefId);
IndicatorDefinition benFordIndDef = definitionHandler.getDefinitionById(benfordId);
boolean isNeedReload = false;
if (patternMatchIndDef != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(patternMatchIndDef, language)) {
IndicatorDefinitionFileHelper.addSqlExpression(patternMatchIndDef, language, patternMatchSQL);
IndicatorDefinitionFileHelper.save(patternMatchIndDef);
isNeedReload = true;
}
if (patternFreqIndDef != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(patternFreqIndDef, language)) {
IndicatorDefinitionFileHelper.addSqlExpression(patternFreqIndDef, language, patternFreqSQL);
IndicatorDefinitionFileHelper.addCharacterMapping(patternFreqIndDef, language, name, charToReplace, newReplaceChar);
IndicatorDefinitionFileHelper.save(patternFreqIndDef);
if (!isNeedReload) {
isNeedReload = true;
}
}
if (patternLowPatternFreqIndDef != null
&& !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(patternLowPatternFreqIndDef, language)) {
IndicatorDefinitionFileHelper.addSqlExpression(patternLowPatternFreqIndDef, language, lowPatternFreqSQL);
IndicatorDefinitionFileHelper.addCharacterMapping(patternLowPatternFreqIndDef, language, name, charToReplace,
newReplaceChar);
IndicatorDefinitionFileHelper.save(patternLowPatternFreqIndDef);
if (!isNeedReload) {
isNeedReload = true;
}
}
if (benFordIndDef != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(benFordIndDef, language)) {
IndicatorDefinitionFileHelper.addSqlExpression(benFordIndDef, language, benFordSQL);
IndicatorDefinitionFileHelper.save(benFordIndDef);
if (!isNeedReload) {
isNeedReload = true;
}
}
if (isNeedReload) {
definitionHandler.reloadIndicatorsDefinitions();
}
return true;
}
}