// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataprofiler.core.ui.utils; import java.sql.Types; import org.talend.core.model.metadata.builder.connection.Connection; import org.talend.core.model.metadata.builder.connection.MetadataColumn; import org.talend.cwm.helper.ConnectionHelper; import org.talend.cwm.relational.TdColumn; import org.talend.dataprofiler.core.model.ModelElementIndicator; import org.talend.dataquality.analysis.ExecutionLanguage; import org.talend.dataquality.domain.pattern.Pattern; import org.talend.dataquality.helpers.IndicatorHelper; import org.talend.dataquality.helpers.MetadataHelper; import org.talend.dataquality.indicators.DataminingType; import org.talend.dataquality.indicators.Indicator; import org.talend.dataquality.indicators.definition.CharactersMapping; import org.talend.dq.dbms.DbmsLanguage; import org.talend.dq.dbms.DbmsLanguageFactory; import org.talend.dq.helper.RepositoryNodeHelper; import org.talend.dq.helper.UDIHelper; import org.talend.dq.nodes.indicator.IIndicatorNode; import org.talend.dq.nodes.indicator.type.IndicatorEnum; import org.talend.repository.model.IRepositoryNode; import org.talend.utils.sql.Java2SqlType; import org.talend.utils.sql.TalendTypeConvert; import orgomg.cwm.objectmodel.core.Expression; import orgomg.cwm.objectmodel.core.ModelElement; /** * DOC xqliu class global comment. Detailled comment */ public final class ModelElementIndicatorRule { private ModelElementIndicatorRule() { } public static boolean match(IIndicatorNode node, ModelElementIndicator meIndicator, ExecutionLanguage language) { IndicatorEnum indicatorType = node.getIndicatorEnum(); if (indicatorType == null) { for (IIndicatorNode one : node.getChildren()) { if (match(one, meIndicator, language)) { return true; } } return false; } IRepositoryNode rd = meIndicator.getModelElementRepositoryNode(); // return patternRule(indicatorType, ((MetadataColumnRepositoryObject) rd.getObject()).getTdColumn(), language); return patternRule(indicatorType, RepositoryNodeHelper.getSubModelElement(rd), language, node); } public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language) { return patternRule(indicatorType, me, language, null); } public static boolean patternRule(IndicatorEnum indicatorType, ModelElement me, ExecutionLanguage language, IIndicatorNode node) { int javaType = 0; boolean isDeliFileColumn = !(me instanceof TdColumn) && me instanceof MetadataColumn; int isTeradataInterval = -1; if (me instanceof TdColumn) { javaType = ((TdColumn) me).getSqlDataType().getJavaDataType(); // Added yyin 20121211 TDQ-6099: isTeradataInterval = Java2SqlType.isTeradataIntervalType(((TdColumn) me).getSqlDataType().getName()); // ~ } else if (isDeliFileColumn) { javaType = TalendTypeConvert.convertToJDBCType(((MetadataColumn) me).getTalendType()); } DataminingType dataminingType = MetadataHelper.getDataminingType(me); if (dataminingType == null || isDeliFileColumn) { dataminingType = MetadataHelper.getDefaultDataminingType(javaType); } // MOD qiongli 2012-4-25 TDQ-2699 Connection connection = null; if (me instanceof TdColumn) { connection = ConnectionHelper.getTdDataProvider((TdColumn) me); } else if (me instanceof MetadataColumn) { connection = ConnectionHelper.getTdDataProvider((MetadataColumn) me); } Indicator indicator = null; if (node != null) { indicator = node.getIndicatorInstance(); } boolean isSQLEngine = ExecutionLanguage.SQL.equals(language); boolean isJavaEngine = ExecutionLanguage.JAVA.equals(language); DbmsLanguage dbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(connection, language); if (javaType == Types.LONGVARCHAR && isSQLEngine) { if (connection != null && ConnectionHelper.isDb2(connection)) { return enableLongVarchar(indicatorType, dataminingType, me); } } // MOD qiongli 2012-8-10 TDQ-5907 need to disabled indicators for hive with sql engine. boolean isHiveSQL = connection == null ? false : ConnectionHelper.isHive(connection) && isSQLEngine; // MOD msjian 2013-5-15 TDQ-7275 need to disabled indicators for teradata with sql engine. boolean isTeradataSQL = connection == null ? false : ConnectionHelper.isTeradata(connection) && isSQLEngine; // MOD msjian 2016-8-25 TDQ-12349 need to disabled indicators for ingres with sql engine. boolean isIngres = connection == null ? false : ConnectionHelper.isIngress(connection) && isSQLEngine; // MOD msjian 2016-8-25 TDQ-12464 need to disabled indicators for ingres with sql engine. boolean isSybase = connection == null ? false : ConnectionHelper.isSybase(connection) && isSQLEngine; // MOD qiongli 2013-8-27 TDQ-2104 disabled soundex indicators for hive with sql engine. boolean isVerticaSQL = connection == null ? false : ConnectionHelper.isVertica(connection) && isSQLEngine; switch (indicatorType) { case CountsIndicatorEnum: case RowCountIndicatorEnum: case NullCountIndicatorEnum: case DistinctCountIndicatorEnum: case UniqueIndicatorEnum: case DuplicateCountIndicatorEnum: // MOD scorreia 2008-06-04 enable distinct count, unique count and duplicate count for all types // if (dataminingType == DataminingType.NOMINAL) { return true; case DefValueCountIndicatorEnum: Expression initialValue = null; if (me instanceof TdColumn) { initialValue = ((TdColumn) me).getInitialValue(); } if (initialValue != null && initialValue.getBody() != null) { // MOD scorreia 2009-04-21 bug 6979 // non nullable numeric column give a non null default value as '' return initialValue.getBody().length() != 0 || Java2SqlType.isTextInSQL(javaType); } break; case BlankCountIndicatorEnum: // MOD klliu 2011-07-19 bug 22980 from repository as same as indicator dialog // MOD xwang 2011-07-29 bug TDQ-1731 disable blank count checkable for other data type but Text if (!Java2SqlType.isTextInSQL(javaType)) { return false; } else if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) { // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type. return false; } else { return true; } case TextIndicatorEnum: case MinLengthIndicatorEnum: case MinLengthWithNullIndicatorEnum: case MinLengthWithBlankIndicatorEnum: case MinLengthWithBlankNullIndicatorEnum: case MaxLengthIndicatorEnum: case MaxLengthWithNullIndicatorEnum: case MaxLengthWithBlankIndicatorEnum: case MaxLengthWithBlankNullIndicatorEnum: case AverageLengthIndicatorEnum: case AverageLengthWithNullIndicatorEnum: case AverageLengthWithBlankIndicatorEnum: case AverageLengthWithNullBlankIndicatorEnum: if (Java2SqlType.isTextInSQL(javaType)) { // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type. if (isTeradataInterval > 0) { return false; } if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) { return true; } } break; case EastAsiaPatternFreqIndicatorEnum: case EastAsiaPatternLowFreqIndicatorEnum: if (isSQLEngine && isEmpryExpression(indicator, dbmsLanguage)) { return false; } else if (isJavaEngine) { return true; } case BenfordLawFrequencyIndicatorEnum: // Added yyin 20121211 TDQ-6099: disable these three for INTERVAL type of Teradata // disable the benford for interval type: both sql and java if (isTeradataInterval > 0) { return false; } case PatternFreqIndicatorEnum: case PatternLowFreqIndicatorEnum: if (isTeradataSQL || isIngres || isSybase) { return false; } case ModeIndicatorEnum: // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type. if (isTeradataInterval == Java2SqlType.TERADATA_INTERVAL_TO && isSQLEngine) { return false; } case FrequencyIndicatorEnum: case LowFrequencyIndicatorEnum: if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) { return true; } break; // MOD zshen 2010-01-27 Date Pattern frequency indicator case DatePatternFreqIndicatorEnum: if (isJavaEngine && (Java2SqlType.isDateInSQL(javaType) || Java2SqlType.isTextInSQL(javaType))) { return true; } break; // MOD mzhao 2009-03-05 Soundex frequency indicator case SoundexIndicatorEnum: case SoundexLowIndicatorEnum: if (!Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) { if (isHiveSQL || isVerticaSQL) { return false; } // Added yyin 20121212 TDQ-6099: disable for Teradata's interval_xx_to_xx type. if (isTeradataInterval > 0 && isSQLEngine) { return false; } return true; } break; case MeanIndicatorEnum: case MedianIndicatorEnum: case IQRIndicatorEnum: case LowerQuartileIndicatorEnum: case UpperQuartileIndicatorEnum: // MOD scorreia 2008-09-19 do not allow box plot on date fields because it is not correctly handled in the // graphics and database yet. if (Java2SqlType.isNumbericInSQL(javaType) /* || Java2SqlType.isDateInSQL(javaType) */) { if (dataminingType == DataminingType.INTERVAL) { if (isHiveSQL && !(indicatorType == IndicatorEnum.MeanIndicatorEnum)) { return false; } return true; } } break; case BoxIIndicatorEnum: case RangeIndicatorEnum: case MinValueIndicatorEnum: case MaxValueIndicatorEnum: // MOD scorreia 2008-09-25 do not allow min and max on date fields because it is not correctly handled in // the graphics and database yet. if (Java2SqlType.isNumbericInSQL(javaType) || Java2SqlType.isDateInSQL(javaType)) { if (dataminingType == DataminingType.INTERVAL) { return true; } } break; case DateFrequencyIndicatorEnum: case WeekFrequencyIndicatorEnum: case MonthFrequencyIndicatorEnum: case QuarterFrequencyIndicatorEnum: case YearFrequencyIndicatorEnum: case DateLowFrequencyIndicatorEnum: case WeekLowFrequencyIndicatorEnum: case MonthLowFrequencyIndicatorEnum: case QuarterLowFrequencyIndicatorEnum: case YearLowFrequencyIndicatorEnum: // ADD yyi 2010-07-23 13676 // Mod yyin 20120511 TDQ-5241 if (Java2SqlType.isDateInSQL(javaType) && !Java2SqlType.isTimeSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) { return true; } break; case BinFrequencyIndicatorEnum: case BinLowFrequencyIndicatorEnum: if (Java2SqlType.isNumbericInSQL(javaType) && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) { return true; } break; case ValidPhoneCountIndicatorEnum: case PossiblePhoneCountIndicatorEnum: case ValidRegCodeCountIndicatorEnum: case InvalidRegCodeCountIndicatorEnum: case WellFormE164PhoneCountIndicatorEnum: case WellFormIntePhoneCountIndicatorEnum: case WellFormNationalPhoneCountIndicatorEnum: case PhoneNumbStatisticsIndicatorEnum: case FormatFreqPieIndictorEnum: if (isJavaEngine && (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL)) { return true; } break; case SqlPatternMatchingIndicatorEnum: if (node == null) { return false; } if (!isSQLEngine) { return false; } Pattern pattern = IndicatorHelper.getPattern(indicator); Expression returnExpression = dbmsLanguage.getRegexp(pattern); if (returnExpression != null) { return true; } break; case RegexpMatchingIndicatorEnum: if (node == null) { return false; } pattern = IndicatorHelper.getPattern(indicator); returnExpression = dbmsLanguage.getRegexp(pattern); if (returnExpression != null) { return true; } break; case UserDefinedIndicatorEnum: // judge language if (node == null) { return false; } Indicator judi = null; try { judi = UDIHelper.adaptToJavaUDI(indicator); } catch (Throwable e) { return false; } if (judi != null) { indicator = judi; } returnExpression = dbmsLanguage.getExpression(indicator); if (isJavaEngine && judi == null || isSQLEngine && returnExpression == null) { return false; } return true; default: return false; } return false; } /** * DOC talend Comment method "isEmpryExpression". * * @param indicator * @return */ private static boolean isEmpryExpression(Indicator indicator, DbmsLanguage dbmsLanguage) { Expression sqlExpression = dbmsLanguage.getSqlExpression(indicator.getIndicatorDefinition()); CharactersMapping characterMappingExpression = dbmsLanguage.getChartacterMappingExpression(indicator .getIndicatorDefinition()); return sqlExpression == null || characterMappingExpression == null; } /** * * just several indicator support longvarchar.because longvarchar dosn't support some sql query,.eg.,distinct,group * by,function... * * @param indicatorType * @param dataminingType * @return */ private static boolean enableLongVarchar(IndicatorEnum indicatorType, DataminingType dataminingType, ModelElement mod) { switch (indicatorType) { case CountsIndicatorEnum: case RowCountIndicatorEnum: return true; case TextIndicatorEnum: case MinLengthWithBlankIndicatorEnum: case MinLengthWithNullIndicatorEnum: case MinLengthWithBlankNullIndicatorEnum: case MaxLengthWithBlankIndicatorEnum: case MaxLengthWithBlankNullIndicatorEnum: case MaxLengthWithNullIndicatorEnum: case AverageLengthIndicatorEnum: case AverageLengthWithBlankIndicatorEnum: case AverageLengthWithNullBlankIndicatorEnum: case AverageLengthWithNullIndicatorEnum: case MinLengthIndicatorEnum: case MaxLengthIndicatorEnum: if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.UNSTRUCTURED_TEXT) { return true; } break; case PatternFreqIndicatorEnum: case PatternLowFreqIndicatorEnum: if (dataminingType == DataminingType.NOMINAL || dataminingType == DataminingType.INTERVAL) { return true; } break; default: return false; } return false; } }