// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataprofiler.core.migration.impl; import java.util.Date; import org.eclipse.emf.common.util.EList; import org.talend.core.model.metadata.builder.database.dburl.SupportDBUrlType; import org.talend.cwm.helper.ResourceHelper; import org.talend.dataprofiler.core.migration.AbstractWorksapceUpdateTask; import org.talend.dataprofiler.core.migration.helper.IndicatorDefinitionFileHelper; import org.talend.dataquality.indicators.definition.IndicatorDefinition; import org.talend.dq.indicators.definitions.DefinitionHandler; /** * * Add Sql Expression in some indicators for hive language. * * $Id: talend.epf 55206 2011-02-15 17:32:14Z mhirt $ * */ public class UpdateIndicatorForHiveTask extends AbstractWorksapceUpdateTask { private final String AVERAGE_LENGTH_UUID = "_ccIR4BF2Ed2PKb6nEJEvhw";//$NON-NLS-1$ private final String MODE_UUID = "_ccIR4RF2Ed2PKb6nEJEvhw";//$NON-NLS-1$ private final String ROWCOMPARISON_UUID = "_jMh4gJE3Ed2HGNmGoaS";//$NON-NLS-1$ private final String UNIQUE_UUID = "_ccHq0RF2Ed2PKb6nEJEvhw";//$NON-NLS-1$ private final String DUPLICATE_UUID = "_ccHq0hF2Ed2PKb6nEJEvhw";//$NON-NLS-1$ private final String REGEXPATTERNMATCH_UUID = "_yb-_8Dh8Ed2XmO7pl5Yuyg";//$NON-NLS-1$ private final String HIVE = SupportDBUrlType.HIVEDEFAULTURL.getLanguage(); private DefinitionHandler definitionHandler; /* * (non-Javadoc) * * @see org.talend.dataprofiler.migration.IMigrationTask#getOrder() */ public Date getOrder() { return createDate(2012, 8, 10); } /* * (non-Javadoc) * * @see org.talend.dataprofiler.migration.IMigrationTask#getMigrationTaskType() */ public MigrationTaskType getMigrationTaskType() { return MigrationTaskType.FILE; } /** * updatae some indicator definition by uuid. */ @Override protected boolean doExecute() throws Exception { definitionHandler = DefinitionHandler.getInstance(); IndicatorDefinition uniqueDefinition = definitionHandler.getDefinitionById(UNIQUE_UUID); if (uniqueDefinition != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(uniqueDefinition, HIVE)) { IndicatorDefinitionFileHelper .addSqlExpression( uniqueDefinition, HIVE, "SELECT COUNT(*) FROM (SELECT <%=__COLUMN_NAMES__%>, COUNT(*) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%> GROUP BY <%=__COLUMN_NAMES__%> HAVING COUNT(*) = 1) myquery"); //$NON-NLS-1$ IndicatorDefinitionFileHelper.save(uniqueDefinition); } IndicatorDefinition duplicateDefinition = definitionHandler.getDefinitionById(DUPLICATE_UUID); if (duplicateDefinition != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(duplicateDefinition, HIVE)) { IndicatorDefinitionFileHelper .addSqlExpression( duplicateDefinition, HIVE, "SELECT COUNT(*) FROM (SELECT <%=__COLUMN_NAMES__%>, COUNT(*) FROM <%=__TABLE_NAME__%> m <%=__WHERE_CLAUSE__%> GROUP BY <%=__COLUMN_NAMES__%> HAVING COUNT(*) > 1) myquery"); //$NON-NLS-1$ IndicatorDefinitionFileHelper.save(duplicateDefinition); } IndicatorDefinition regexPatternDefinition = definitionHandler.getDefinitionById(REGEXPATTERNMATCH_UUID); if (regexPatternDefinition != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(regexPatternDefinition, HIVE)) { IndicatorDefinitionFileHelper .addSqlExpression( regexPatternDefinition, HIVE, "SELECT COUNT(CASE WHEN <%=__COLUMN_NAMES__%> REGEXP <%=__PATTERN_EXPR__%> THEN 1 END), COUNT(*) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>"); //$NON-NLS-1$ IndicatorDefinitionFileHelper.save(regexPatternDefinition); } IndicatorDefinition rowCompareDefinition = definitionHandler.getDefinitionById(ROWCOMPARISON_UUID); if (rowCompareDefinition != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(rowCompareDefinition, HIVE)) { IndicatorDefinitionFileHelper .addSqlExpression( rowCompareDefinition, HIVE, "SELECT COUNT(*) FROM <%=__TABLE_NAME__%> LEFT OUTER JOIN <%=__TABLE_NAME_2__%> ON (<%=__JOIN_CLAUSE__%>) WHERE (<%=__WHERE_CLAUSE__%>)"); //$NON-NLS-1$ IndicatorDefinitionFileHelper.save(rowCompareDefinition); } IndicatorDefinition modeDefinition = definitionHandler.getDefinitionById(MODE_UUID); if (modeDefinition != null && !IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(modeDefinition, HIVE)) { IndicatorDefinitionFileHelper .addSqlExpression( modeDefinition, HIVE, "SELECT <%=__COLUMN_NAMES__%>, COUNT(*) c FROM <%=__TABLE_NAME__%> m <%=__WHERE_CLAUSE__%> GROUP BY <%=__COLUMN_NAMES__%> ORDER BY c DESC "); //$NON-NLS-1$ IndicatorDefinitionFileHelper.save(modeDefinition); } updateTextIndicators(); return true; } private void updateTextIndicators() { String minLengthSql = "SELECT MIN(LENGTH(<%=__COLUMN_NAMES__%>)) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>";//$NON-NLS-1$ String maxLengthSql = "SELECT MAX(LENGTH(<%=__COLUMN_NAMES__%>)) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>";//$NON-NLS-1$ String averageLengthSql = "SELECT SUM(LENGTH(<%=__COLUMN_NAMES__%>)), COUNT(<%=__COLUMN_NAMES__%>) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>";//$NON-NLS-1$ String averageLengthWithSql = "SELECT SUM(LENGTH(<%=__COLUMN_NAMES__%>)), COUNT(*) FROM <%=__TABLE_NAME__%> <%=__WHERE_CLAUSE__%>";//$NON-NLS-1$ IndicatorDefinition textIndicatorDefinition = definitionHandler.getDefinitionById("_yb9x0zh8Ed2XmO7pl5Yuyg");//$NON-NLS-1$ if(textIndicatorDefinition!=null){ EList<IndicatorDefinition> aggregatedDefinitions = textIndicatorDefinition.getAggregatedDefinitions(); if(!aggregatedDefinitions.isEmpty()){ for (IndicatorDefinition indiDef : aggregatedDefinitions) { if (indiDef == null || indiDef.eIsProxy() || indiDef.getLabel() == null || IndicatorDefinitionFileHelper.isExistSqlExprWithLanguage(indiDef, HIVE)) { continue; } if (indiDef.getLabel().startsWith("Minimal")) {//$NON-NLS-1$ IndicatorDefinitionFileHelper.addSqlExpression(indiDef, HIVE, minLengthSql); } else if (indiDef.getLabel().startsWith("Maximal")) {//$NON-NLS-1$ IndicatorDefinitionFileHelper.addSqlExpression(indiDef, HIVE, maxLengthSql); } else if (AVERAGE_LENGTH_UUID.equals(ResourceHelper.getUUID(indiDef))) { // Average Length IndicatorDefinitionFileHelper.addSqlExpression(indiDef, HIVE, averageLengthSql); } else if (indiDef.getLabel().startsWith("Average")) {//$NON-NLS-1$ IndicatorDefinitionFileHelper.addSqlExpression(indiDef, HIVE, averageLengthWithSql); } IndicatorDefinitionFileHelper.save(indiDef); } } } } }