// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dq.analysis;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.log4j.Logger;
import org.eclipse.emf.common.util.EList;
import org.talend.cwm.exception.AnalysisExecutionException;
import org.talend.cwm.helper.CatalogHelper;
import org.talend.cwm.helper.ColumnHelper;
import org.talend.cwm.helper.SchemaHelper;
import org.talend.cwm.management.i18n.Messages;
import org.talend.cwm.relational.TdColumn;
import org.talend.dataquality.PluginConstant;
import org.talend.dataquality.analysis.Analysis;
import org.talend.dataquality.analysis.AnalysisContext;
import org.talend.dataquality.helpers.AnalysisHelper;
import org.talend.dataquality.indicators.Indicator;
import org.talend.dataquality.indicators.columnset.ColumnsetPackage;
import org.talend.dataquality.indicators.columnset.RowMatchingIndicator;
import org.talend.dataquality.indicators.definition.IndicatorDefinition;
import org.talend.dq.helper.AnalysisExecutorHelper;
import org.talend.dq.helper.ContextHelper;
import org.talend.dq.helper.EObjectHelper;
import org.talend.utils.sugars.ReturnCode;
import org.talend.utils.sugars.TypedReturnCode;
import orgomg.cwm.objectmodel.core.CoreFactory;
import orgomg.cwm.objectmodel.core.Expression;
import orgomg.cwm.resource.relational.Catalog;
import orgomg.cwm.resource.relational.ColumnSet;
import orgomg.cwm.resource.relational.Schema;
/**
* @author scorreia
*
* executes multicolumn comparison.
*/
public class RowMatchingAnalysisExecutor extends ColumnAnalysisSqlExecutor {
private static Logger log = Logger.getLogger(RowMatchingAnalysisExecutor.class);
private String catalogOrSchema = null;
private boolean reversion = false;
private HashMap<Indicator, Boolean> indiReversionMap;
private void reset() {
catalogOrSchema = null;
}
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.AnalysisExecutor#createSqlStatement(org.talend.dataquality.analysis.Analysis)
*/
@Override
public String createSqlStatement(Analysis analysis) {
this.cachedAnalysis = analysis;
this.reset();
EList<Indicator> indicators = analysis.getResults().getIndicators();
if (indiReversionMap == null) {
indiReversionMap = new HashMap<Indicator, Boolean>();
}
for (int i = 0; i < indicators.size(); i++) {
Indicator indicator = indicators.get(i);
if (i == 1) {
indiReversionMap.put(indicator, Boolean.TRUE);
} else {
indiReversionMap.put(indicator, Boolean.FALSE);
}
instantiateQuery(indicator);
}
// no query to return, here we only instantiate several SQL queries
return PluginConstant.EMPTY_STRING;
}
/**
* DOC scorreia Comment method "instantiateQuery".
*
* @param indicator
*/
private boolean instantiateQuery(Indicator indicator) {
// indicator.reset(); // rli reset will clear columnSetA and columnSetB, we will lost our analysedElement
// information. So comment it. // scorreia -> changed the implementation of reset() so that it can now be called
// (but is not need, hence we keep it commented)
if (ColumnsetPackage.eINSTANCE.getRowMatchingIndicator().equals(indicator.eClass())) {
RowMatchingIndicator rowMatchingIndicator = (RowMatchingIndicator) indicator;
EList<TdColumn> columnSetA = rowMatchingIndicator.getColumnSetA();
EList<TdColumn> columnSetB = rowMatchingIndicator.getColumnSetB();
if (columnSetA.size() != columnSetB.size()) {
traceError("Cannot compare two column sets with different size");//$NON-NLS-1$
return Boolean.FALSE;
// break;
}
IndicatorDefinition indicatorDefinition = indicator.getIndicatorDefinition();
Expression sqlGenericExpression = dbms().getSqlExpression(indicatorDefinition);
boolean useNulls = false; // TODO scorreia create an indicator for each option
Expression instantiatedSqlExpression = createInstantiatedSqlExpression(sqlGenericExpression, columnSetA, columnSetB,
useNulls, indicator);
indicator.setInstantiatedExpression(instantiatedSqlExpression);
return true;
}
traceError("Unhandled given indicator: " + AnalysisExecutorHelper.getIndicatorName(indicator));//$NON-NLS-1$
return Boolean.FALSE;
}
/**
* DOC scorreia Comment method "createInstantiatedSqlExpression".
*
* @param sqlGenericExpression
* @param columnSetA
* @param columnSetB
* @param useNulls
* @return
*/
private Expression createInstantiatedSqlExpression(Expression sqlGenericExpression, EList<TdColumn> columnSetA,
EList<TdColumn> columnSetB, boolean useNulls, Indicator indicator) {
// MOD scorreia 2009-05-25 allow to compare elements from the same table
// aliases of tables
String aliasA = "A"; //$NON-NLS-1$
String aliasB = "B"; //$NON-NLS-1$
// MOD xqliu 2009-06-16 bug 7334
String dataFilterA = ContextHelper.getDataFilterWithoutContext(this.cachedAnalysis, AnalysisHelper.DATA_FILTER_A);
String dataFilterB = ContextHelper.getDataFilterWithoutContext(this.cachedAnalysis, AnalysisHelper.DATA_FILTER_B);
// MOD qiongli 2011-12-28 TDQ-4240.get the reversion value from a Map before generating sqlExpression.
reversion = indiReversionMap != null && indiReversionMap.get(indicator) != null ? indiReversionMap.get(indicator)
.booleanValue() : false;
if (reversion) {
dataFilterA = ContextHelper.getDataFilterWithoutContext(this.cachedAnalysis, AnalysisHelper.DATA_FILTER_B);
dataFilterB = ContextHelper.getDataFilterWithoutContext(this.cachedAnalysis, AnalysisHelper.DATA_FILTER_A);
}
String tableNameA = addDataFilterWithTableName(getTableName(columnSetA), dataFilterA) + " " + aliasA; //$NON-NLS-1$
String tableNameB = addDataFilterWithTableName(getTableName(columnSetB), dataFilterB) + " " + aliasB; //$NON-NLS-1$
// ~
// Generic SQL expression is something like:
// SELECT COUNT(*) FROM <%=__TABLE_NAME__%> LEFT JOIN <%=__TABLE_NAME_2__%> ON (<%=__JOIN_CLAUSE__%>) WHERE
// (<%=__WHERE_CLAUSE__%>)
String genericSQL = sqlGenericExpression.getBody();
String joinClause = createJoinClause(aliasA, columnSetA, aliasB, columnSetB, useNulls);
String whereClause = createWhereClause(aliasB, columnSetB);
if (useNulls) {
// add a where clause to avoid the equality of rows fully null (i.e. rows like "null,null,null"
whereClause += dbms().and() + '(' + createNotNullCondition(aliasA, columnSetA) + ')';
}
String instantiatedSQL = dbms().fillGenericQueryWithJoin(genericSQL, tableNameA, tableNameB, joinClause, whereClause);
Expression instantiatedExpression = CoreFactory.eINSTANCE.createExpression();
instantiatedExpression.setLanguage(sqlGenericExpression.getLanguage());
instantiatedExpression.setBody(instantiatedSQL);
return instantiatedExpression;
}
private String addDataFilterWithTableName(String tableName, String dataFilter) {
if (dataFilter == null || dataFilter.trim().equals(PluginConstant.EMPTY_STRING)) {
return tableName;
}
return "(SELECT * FROM " + tableName + " WHERE ( " + dataFilter + " ))"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
}
/**
* DOC scorreia Comment method "createWhereClause".
*
* @param tableNameB
* @param columnSetB
* @return
*/
private String createWhereClause(String tableNameB, EList<TdColumn> columnSetB) {
final String isNull = dbms().isNull();
final String and = dbms().and();
return conditionOnAllColumns(tableNameB, columnSetB, isNull, and);
}
private String createNotNullCondition(String tableNameB, EList<TdColumn> columnSetB) {
final String isNotNull = dbms().isNotNull();
final String or = dbms().or();
return conditionOnAllColumns(tableNameB, columnSetB, isNotNull, or);
}
/**
* DOC scorreia Comment method "conditionOnAllColumns".
*
* @param tableName
* @param columnSet
* @param isNull
* @param and
* @return
*/
private String conditionOnAllColumns(String tableName, EList<TdColumn> columnSet, final String isNull, final String and) {
int size = columnSet.size();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < size; i++) {
builder.append(tableName).append(PluginConstant.DOT_STRING).append(getQuotedColumnName(columnSet.get(i)))
.append(isNull);
if (i != size - 1) {
builder.append(and);
}
}
return builder.toString();
}
/**
* Method "createJoinClause".
*
* @param tableNameA already quoted table name
* @param columnSetA
* @param tableNameB already quoted table name
* @param columnSetB
* @return
*/
private String createJoinClause(String tableNameA, EList<TdColumn> columnSetA, String tableNameB, EList<TdColumn> columnSetB,
final boolean useNulls) {
StringBuilder builder = new StringBuilder();
int size = columnSetA.size();
for (int i = 0; i < size; i++) {
String colA = tableNameA + PluginConstant.DOT_STRING + getQuotedColumnName(columnSetA.get(i));
String colB = tableNameB + PluginConstant.DOT_STRING + getQuotedColumnName(columnSetB.get(i));
builder.append(" (").append(colA).append(dbms().equal()).append(colB); //$NON-NLS-1$
if (useNulls) { // allow to identify rows like ('a', null) = ('a', null)
builder.append(dbms().or()).append(bothNull(colA, colB));
}
builder.append(") "); //$NON-NLS-1$
if (i != size - 1) {
builder.append(dbms().and());
}
}
return builder.toString();
}
/**
* Method "bothNull".
*
* @param colA a column name from table A
* @param colB a column name from table B
* @return colA IS NULL AND colB IS NULL
*/
private String bothNull(String colA, String colB) {
StringBuilder builder = new StringBuilder();
builder.append(" (").append(colA).append(dbms().isNull()).append(dbms().and()).append(colB).append(dbms().isNull()) //$NON-NLS-1$
.append(") "); //$NON-NLS-1$
return builder.toString();
}
/**
* DOC scorreia Comment method "getTableName".
*
* @param columnSetA
* @return
*/
private String getTableName(EList<TdColumn> columnSetA) {
String tableName = null;
for (TdColumn column : columnSetA) {
if (column == null) {
continue;
}
if (column.eIsProxy()) {
column = (TdColumn) EObjectHelper.resolveObject(column);
}
if (belongToSameSchemata(column)) {
ColumnSet columnSetOwner = ColumnHelper.getColumnOwnerAsColumnSet(column);
if (columnSetOwner == null) {
log.error(Messages.getString("FunctionalDependencyExecutor.COLUMNSETOWNERISNULL", column.getName()));//$NON-NLS-1$
continue;
} else {
tableName = dbms().getQueryColumnSetWithPrefix(column);
// ~11005
this.catalogOrSchema = getCatalogOrSchemaName(column);
break; // all columns should belong to the same table
}
} else {
log.error(getErrorMessage());
}
}
return quote(tableName);
}
/*
* (non-Javadoc)
*
* @see org.talend.dq.analysis.AnalysisExecutor#runAnalysis(org.talend.dataquality.analysis.Analysis,
* java.lang.String)
*/
@Override
protected boolean runAnalysis(Analysis analysis, String sqlStatement) {
boolean isSuccess = Boolean.TRUE;
TypedReturnCode<java.sql.Connection> trc = this.getConnectionBeforeRun(analysis);
if (!trc.isOk()) {
log.error(trc.getMessage());
setError(trc.getMessage());
traceError(Messages.getString(
"FunctionalDependencyExecutor.CANNOTEXECUTEANALYSIS", analysis.getName(), trc.getMessage()));//$NON-NLS-1$
return Boolean.FALSE;
}
Connection connection = trc.getObject();
try {
// execute the sql statement for each indicator
EList<Indicator> indicators = analysis.getResults().getIndicators();
EList<Indicator> deactivatedIndicators = analysis.getParameters().getDeactivatedIndicators();
for (Indicator indicator : indicators) {
if (deactivatedIndicators.contains(indicator)) {
continue; // do not evaluate this indicator
}
// set the connection's catalog
if (this.catalogOrSchema != null) { // check whether null argument can be given
changeCatalog(this.catalogOrSchema, connection);
}
Expression query = dbms().getInstantiatedExpression(indicator);
if (query == null) {
traceError("Query not executed for indicator: \"" + AnalysisExecutorHelper.getIndicatorName(indicator) + "\" "//$NON-NLS-1$//$NON-NLS-2$
+ "query is null");//$NON-NLS-1$
isSuccess = Boolean.FALSE;
continue;
}
try {
Boolean isExecSuccess = executeQuery(indicator, connection, query);
indicator.setComputed(true);
if (!isExecSuccess) {
traceError("Query not executed for indicator: \"" + AnalysisExecutorHelper.getIndicatorName(indicator) + "\" "//$NON-NLS-1$//$NON-NLS-2$
+ "SQL query: " + query.getBody());//$NON-NLS-1$
isSuccess = Boolean.FALSE;
continue;
}
} catch (AnalysisExecutionException e) {
traceError(e.getMessage());
isSuccess = Boolean.FALSE;
continue;
}
}
} finally {
ReturnCode rc = closeConnection(analysis, connection);
if (!rc.isOk()) {
isSuccess = Boolean.FALSE;
}
}
return isSuccess;
}
/**
* DOC scorreia Comment method "executeQuery".
*
* @param indicator
* @param connection
* @param query
* @return
* @throws AnalysisExecutionException
*/
private boolean executeQuery(Indicator indicator, Connection connection, Expression query) throws AnalysisExecutionException {
try {
List<Object[]> myResultSet = executeQuery(catalogOrSchema, connection, query.getBody());
String tableName = getAnalyzedTable(indicator);
// MOD xqliu 2009-06-16 bug 7334
// set data filter here
reversion = indiReversionMap != null && indiReversionMap.get(indicator) != null ? indiReversionMap.get(indicator)
.booleanValue() : false;
String stringDataFilter = reversion ? ContextHelper.getDataFilterWithoutContext(this.cachedAnalysis,
AnalysisHelper.DATA_FILTER_B) : ContextHelper.getDataFilterWithoutContext(this.cachedAnalysis,
AnalysisHelper.DATA_FILTER_A);
List<String> whereClauses = new ArrayList<String>();
if (stringDataFilter != null && !stringDataFilter.trim().equals(PluginConstant.EMPTY_STRING)) {
whereClauses.add(stringDataFilter);
}
// ~
// give result to indicator so that it handles the results
boolean ok = indicator.storeSqlResults(myResultSet);
// get row count and store it in indicator
Long count = getCount(cachedAnalysis, "*", tableName, catalogOrSchema, whereClauses); //$NON-NLS-1$
ok = ok && count != null;
indicator.setCount(count);
// compute matching count
if (ColumnsetPackage.eINSTANCE.getRowMatchingIndicator().equals(indicator.eClass())) {
RowMatchingIndicator rowMatchingIndicator = (RowMatchingIndicator) indicator;
Long notMatchingValueCount = rowMatchingIndicator.getNotMatchingValueCount();
ok = ok && notMatchingValueCount != null;
if (ok) {
rowMatchingIndicator.setMatchingValueCount(count - notMatchingValueCount);
}
}
return ok;
} catch (SQLException e) {
log.error(e, e);
// MOD TDQ-8388 return the exact error message
throw new AnalysisExecutionException(e.getMessage());
}
}
/**
* Method "getAnalyzedTable".
*
* @param indicator
* @return the table name (within quotes)
*/
private String getAnalyzedTable(Indicator indicator) {
// MOD mzhao bug 11481. get table name with catalog or schema prefix.
String analyzedTableName = null;
ColumnSet columnSetOwner = (ColumnSet) indicator.getAnalyzedElement();
if (columnSetOwner == null) {
log.error(Messages.getString(
"RowMatchingAnalysisExecutor.COLUMNSETOWNERISNULL", AnalysisExecutorHelper.getIndicatorName(indicator)));//$NON-NLS-1$
} else {
if (columnSetOwner.eIsProxy()) {
columnSetOwner = (ColumnSet) EObjectHelper.resolveObject(columnSetOwner);
}
String schemaName = getQuotedSchemaName(columnSetOwner);
String table = quote(columnSetOwner.getName());
String catalogName = getQuotedCatalogName(columnSetOwner);
if (catalogName == null && schemaName != null) {
// try to get catalog above schema
final Schema parentSchema = SchemaHelper.getParentSchema(columnSetOwner);
final Catalog parentCatalog = CatalogHelper.getParentCatalog(parentSchema);
catalogName = parentCatalog != null ? parentCatalog.getName() : null;
}
analyzedTableName = dbms().toQualifiedName(catalogName, schemaName, table);
}
return analyzedTableName;
}
@Override
protected boolean checkAnalyzedElements(final Analysis analysis, AnalysisContext context) {
AnalysisHandler analysisHandler = new AnalysisHandler();
analysisHandler.setAnalysis(analysis);
// TODO How to handle the context.getAnalysedElements()???
// for (ModelElement node : context.getAnalysedElements()) {
// TdColumn column = SwitchHelpers.COLUMN_SWITCH.doSwitch(node);
//
// // --- Check that each analyzed element has at least one indicator
// if (analysisHandler.getIndicators(column).size() == 0) {
// this.errorMessage = "Each column must have at least one indicator, "
// + "please select some indicator(s) to compute on each column!";
// return false;
// }
//
// // --- get the data provider
// TdDataProvider dp = DataProviderHelper.getTdDataProvider(column);
// if (!isAccessWith(dp)) {
// this.errorMessage = "All columns must belong to the same connection! Remove column " + column.getName()
// + " from this analysis! It does not belong to \"" + dataprovider.getName() + "\"";
// return false;
// }
// }
return true;
}
}