// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dq.analysis; import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import org.eclipse.emf.common.util.EList; import org.eclipse.jface.dialogs.MessageDialog; import org.eclipse.swt.widgets.Display; import org.eclipse.ui.PlatformUI; import org.talend.cwm.helper.SwitchHelpers; import org.talend.cwm.management.i18n.Messages; import org.talend.cwm.relational.TdColumn; import org.talend.dataquality.PluginConstant; import org.talend.dataquality.analysis.Analysis; import org.talend.dataquality.analysis.AnalysisContext; import org.talend.dataquality.analysis.AnalysisType; import org.talend.dataquality.helpers.BooleanExpressionHelper; import org.talend.dataquality.indicators.Indicator; import org.talend.dataquality.indicators.columnset.AllMatchIndicator; import org.talend.dataquality.indicators.columnset.ColumnSetMultiValueIndicator; import org.talend.dataquality.indicators.columnset.ColumnsetPackage; import org.talend.dataquality.indicators.columnset.SimpleStatIndicator; import org.talend.dataquality.indicators.definition.IndicatorDefinition; import org.talend.dq.helper.AnalysisExecutorHelper; import org.talend.dq.helper.ContextHelper; import org.talend.utils.sugars.ReturnCode; import org.talend.utils.sugars.TypedReturnCode; import orgomg.cwm.objectmodel.core.Expression; import orgomg.cwm.objectmodel.core.ModelElement; /** * DOC scorreia class global comment. Detailled comment */ public class MultiColumnAnalysisExecutor extends ColumnAnalysisSqlExecutor { private static Logger log = Logger.getLogger(MultiColumnAnalysisExecutor.class); private String catalogOrSchema = null; /* * (non-Javadoc) * * @see * org.talend.dq.analysis.ColumnAnalysisSqlExecutor#createSqlStatement(org.talend.dataquality.analysis.Analysis) */ @Override public String createSqlStatement(Analysis analysis) { this.cachedAnalysis = analysis; EList<Indicator> indicators = analysis.getResults().getIndicators(); for (Indicator indicator : indicators) { instantiateQuery(indicator); } // no query to return, here we only instantiate several SQL queries return PluginConstant.EMPTY_STRING; } /** * DOC scorreia Comment method "instantiateQuery". * * @param indicator */ private void instantiateQuery(Indicator indicator) { if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) { ColumnSetMultiValueIndicator colSetMultValIndicator = (ColumnSetMultiValueIndicator) indicator; final EList<ModelElement> analyzedColumns = colSetMultValIndicator.getAnalyzedColumns(); final EList<String> numericFunctions = initializeNumericFunctions(colSetMultValIndicator); final EList<String> dateFunctions = initializeDateFunctions(colSetMultValIndicator); // ADD msjian 2011-5-30 17479: Excel Odbc connection can not run well on the correlation analysis // note: this feature is not supported now, if support, delete this final String caseStr = "SUM(CASE WHEN {0} IS NULL THEN 1 ELSE 0 END)";//$NON-NLS-1$ if ("EXCEL".equals(dbms().getDbmsName()) //$NON-NLS-1$ && (dateFunctions.contains(caseStr) || numericFunctions.contains(caseStr))) { setError(Messages.getString("MultiColumnAnalysisExecutor.errMessage")); //$NON-NLS-1$ Display.getDefault().syncExec(new Runnable() { public void run() { MessageDialog.openWarning(PlatformUI.getWorkbench().getActiveWorkbenchWindow().getShell(), Messages.getString("MultiColumnAnalysisExecutor.errTitle"), getErrorMessage()); //$NON-NLS-1$ return; } }); } // ~ // get indicator definition final Expression sqlGenericExpression = dbms().getSqlExpression(indicator.getIndicatorDefinition()); // separate nominal from numeric columns List<String> nominalColumns = new ArrayList<String>(); for (ModelElement column : colSetMultValIndicator.getNominalColumns()) { nominalColumns.add(getQuotedColumnName(column)); } List<String> computedColumns = new ArrayList<String>(); for (ModelElement column : colSetMultValIndicator.getNumericColumns()) { // call functions for each column for (String f : numericFunctions) { computedColumns.add(replaceVariablesLow(f, getQuotedColumnName(column))); } } for (ModelElement column : colSetMultValIndicator.getDateColumns()) { // call functions for each column for (String f : dateFunctions) { computedColumns.add(replaceVariablesLow(f, getQuotedColumnName(column))); } } // add count(*) computedColumns.add(colSetMultValIndicator.getCountAll()); // MOD msjian TDQ-7254: fix the columnset analysis run get error. the columnset analysis don't need to // consider the datamining type. List<String> columns = new ArrayList<String>(); // if the analysis type is columnset, use analyzed columns if (AnalysisType.COLUMN_SET == cachedAnalysis.getParameters().getAnalysisType()) { for (ModelElement column : analyzedColumns) { columns.add(getQuotedColumnName(column)); } } else { columns = nominalColumns; } // TDQ-7254~ String selectItems = createSelect(columns, computedColumns); String grpByClause = createGroupBy(columns); // all columns must belong to the same table TdColumn firstColumn = SwitchHelpers.COLUMN_SWITCH.doSwitch(analyzedColumns.get(0)); String tableName = dbms().getQueryColumnSetWithPrefix(firstColumn); this.catalogOrSchema = dbms().getCatalogOrSchemaName(firstColumn); // definition is SELECT <%=__COLUMN_NAMES__%> FROM <%=__TABLE_NAME__%> GROUP BY // <%=__GROUP_BY_ALIAS__%> String sqlExpr = dbms().fillGenericQueryWithColumnTableAndAlias(sqlGenericExpression.getBody(), selectItems, tableName, grpByClause); // handle data filter String stringDataFilter = ContextHelper.getDataFilterWithoutContext(cachedAnalysis); if (stringDataFilter == null) { stringDataFilter = PluginConstant.EMPTY_STRING; } sqlExpr = dbms().addWhereToStatement(sqlExpr, stringDataFilter); indicator.setInstantiatedExpression(BooleanExpressionHelper.createTdExpression(sqlGenericExpression.getLanguage(), sqlExpr)); // MOD qiongli 2011-3-30 feature 19192.allow drill down for sql engine. if (ColumnsetPackage.eINSTANCE.getSimpleStatIndicator().isSuperTypeOf(indicator.eClass())) { SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator; // MOD TDQ-7287 lost some columns(type!=norminal) when view values in column set ana. yyin 20130514 String columnsName = createSelect(columns, new ArrayList<String>()); for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) { final Expression leafSqlGenericExpression = dbms().getSqlExpression(leafIndicator.getIndicatorDefinition()); String leafSqlExpr = dbms().fillGenericQueryWithColumnTableAndAlias(leafSqlGenericExpression.getBody(), columnsName, tableName, grpByClause); leafSqlExpr = dbms().addWhereToStatement(leafSqlExpr, stringDataFilter); leafIndicator.setInstantiatedExpression(BooleanExpressionHelper.createTdExpression( leafSqlGenericExpression.getLanguage(), leafSqlExpr)); } } } } /** * DOC scorreia Comment method "initializeNumericFunctions". * * @param indicator * @return */ private EList<String> initializeNumericFunctions(ColumnSetMultiValueIndicator indicator) { final EList<String> numericFunctions = indicator.getNumericFunctions(); if (!numericFunctions.isEmpty()) { // could be already set return numericFunctions; } final IndicatorDefinition indicatorDefinition = indicator.getIndicatorDefinition(); final List<String> aggregate1argFunctions = dbms().getAggregate1argFunctions(indicatorDefinition); numericFunctions.addAll(aggregate1argFunctions); return numericFunctions; } /** * DOC scorreia Comment method "initializeNumericFunctions". * * @param indicator * @return */ private EList<String> initializeDateFunctions(ColumnSetMultiValueIndicator indicator) { final EList<String> dateFunctions = indicator.getDateFunctions(); if (!dateFunctions.isEmpty()) { // could be already set return dateFunctions; } final IndicatorDefinition indicatorDefinition = indicator.getIndicatorDefinition(); final List<String> date1argFunctions = dbms().getDate1argFunctions(indicatorDefinition); dateFunctions.addAll(date1argFunctions); return dateFunctions; } /** * DOC scorreia Comment method "getTableName". * * @param analyzedColumns * @return the quoted table name */ /* * tableName = columnSetOwner.getName(); Package pack = PackageHelper.getCatalogOrSchema(columnSetOwner); if (pack * == null) { log.error("No Catalog or Schema found for column set owner: " + tableName); continue; // do not break * until we find the owner } this.catalogOrSchema = pack.getName(); break; // all columns should belong to the same * table */ /* * (non-Javadoc) * * @see org.talend.dq.analysis.AnalysisExecutor#runAnalysis(org.talend.dataquality.analysis.Analysis, * java.lang.String) */ @Override protected boolean runAnalysis(Analysis analysis, String sqlStatement) { boolean isSuccess = true; TypedReturnCode<java.sql.Connection> trc = this.getConnectionBeforeRun(analysis); if (!trc.isOk()) { log.error(trc.getMessage()); setError(trc.getMessage()); traceError(Messages.getString( "FunctionalDependencyExecutor.CANNOTEXECUTEANALYSIS", analysis.getName(), trc.getMessage()));//$NON-NLS-1$ return Boolean.FALSE; } Connection connection = trc.getObject(); try { // execute the sql statement for each indicator EList<Indicator> indicators = analysis.getResults().getIndicators(); for (Indicator indicator : indicators) { indicator.prepare(); // set the connection's catalog if (this.catalogOrSchema != null) { // check whether null argument can be given changeCatalog(this.catalogOrSchema, connection); } Expression query = dbms().getInstantiatedExpression(indicator); if (query == null) { traceError("Query not executed for indicator: \"" + AnalysisExecutorHelper.getIndicatorName(indicator) + "\" "//$NON-NLS-1$//$NON-NLS-2$ + "query is null");//$NON-NLS-1$ isSuccess = Boolean.FALSE; continue; } try { Boolean isExeSuccess = executeQuery(indicator, connection, query); if (!isExeSuccess) { traceError("Query not executed for indicator: \"" + AnalysisExecutorHelper.getIndicatorName(indicator) + "\" "//$NON-NLS-1$//$NON-NLS-2$ + ((query == null) ? "query is null" : "SQL query: " + query.getBody()));//$NON-NLS-1$//$NON-NLS-2$ isSuccess = Boolean.FALSE; continue; } } catch (Exception e) { traceError(e.getMessage()); isSuccess = Boolean.FALSE; continue; } indicator.setComputed(true); } } finally { ReturnCode rc = closeConnection(analysis, connection); if (!rc.isOk()) { isSuccess = Boolean.FALSE; } } return isSuccess; } /** * DOC scorreia Comment method "createGroupBy". * * @param nominalColumns * @return */ private String createGroupBy(List<String> nominalColumns) { StringBuilder builder = new StringBuilder(); final int nbNomColumns = nominalColumns.size(); for (int i = 0; i < nbNomColumns; i++) { builder.append(nominalColumns.get(i)); if (i != nbNomColumns - 1) { builder.append(COMMA); } } return builder.toString(); } private static final String COMMA = ","; //$NON-NLS-1$ /** * DOC scorreia Comment method "createSelect". * * @param nominalColumns * @param computedColumns * @return */ private String createSelect(List<String> nominalColumns, List<String> computedColumns) { StringBuilder builder = new StringBuilder(); final int nbNomColumns = nominalColumns.size(); final int nbComColumns = computedColumns.size(); for (int i = 0; i < nbNomColumns; i++) { builder.append(nominalColumns.get(i)); if (i != nbNomColumns - 1 || nbComColumns != 0) { builder.append(COMMA); } } for (int i = 0; i < nbComColumns; i++) { builder.append(computedColumns.get(i)); if (i != nbComColumns - 1) { builder.append(COMMA); } } return builder.toString(); } /** * DOC scorreia Comment method "executeQuery". * * @param indicator * @param connection * @param query * @return */ private boolean executeQuery(Indicator indicator, Connection connection, Expression query) { try { List<Object[]> myResultSet = executeQuery(catalogOrSchema, connection, query.getBody()); // give result to indicator so that it handles the results return indicator.storeSqlResults(myResultSet); } catch (SQLException e) { log.error(e, e); return false; } } /* * (non-Javadoc) * * @see * org.talend.dq.analysis.ColumnAnalysisExecutor#checkAnalyzedElements(org.talend.dataquality.analysis.Analysis, * org.talend.dataquality.analysis.AnalysisContext) */ @Override protected boolean checkAnalyzedElements(Analysis analysis, AnalysisContext context) { return true; } /* * (non-Javadoc) * * @see org.talend.dq.analysis.ColumnAnalysisExecutor#check(org.talend.dataquality.analysis.Analysis) */ @Override protected boolean check(Analysis analysis) { boolean check = super.check(analysis); if (!check) { return false; } else { EList<Indicator> indicators = analysis.getResults().getIndicators(); for (Indicator indicator : indicators) { if (indicator instanceof AllMatchIndicator) { // MOD qiongli 2011-6-16 bug 21768,column set dosen't support pattern in sql engine. setError(Messages.getString("MultiColumnAnalysisExecutor.noSupportSqlEngine")); //$NON-NLS-1$ return false; } } } return true; } }