// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dq.indicators; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.sql.Time; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.log4j.Logger; import org.eclipse.emf.common.util.EMap; import org.talend.commons.utils.SpecialValueDisplay; import org.talend.cwm.helper.ColumnSetHelper; import org.talend.cwm.helper.SwitchHelpers; import org.talend.cwm.management.i18n.Messages; import org.talend.cwm.relational.TdColumn; import org.talend.dataquality.PluginConstant; import org.talend.dataquality.analysis.Analysis; import org.talend.dataquality.analysis.AnalysisFactory; import org.talend.dataquality.analysis.AnalysisResult; import org.talend.dataquality.analysis.AnalyzedDataSet; import org.talend.dataquality.analysis.ExecutionLanguage; import org.talend.dataquality.analysis.impl.AnalyzedDataSetImpl; import org.talend.dataquality.helpers.IndicatorHelper; import org.talend.dataquality.indicators.DuplicateCountIndicator; import org.talend.dataquality.indicators.FormatFreqPieIndicator; import org.talend.dataquality.indicators.FrequencyIndicator; import org.talend.dataquality.indicators.Indicator; import org.talend.dataquality.indicators.MaxLengthIndicator; import org.talend.dataquality.indicators.MinLengthIndicator; import org.talend.dataquality.indicators.PatternFreqIndicator; import org.talend.dataquality.indicators.PatternLowFreqIndicator; import org.talend.dataquality.indicators.UniqueCountIndicator; import org.talend.dataquality.indicators.mapdb.MapDBUtils; import org.talend.dataquality.indicators.mapdb.TalendFormatDate; import org.talend.dataquality.indicators.mapdb.TalendFormatTime; import org.talend.dataquality.indicators.sql.UserDefIndicator; import org.talend.dq.dbms.DbmsLanguage; import org.talend.dq.dbms.DbmsLanguageFactory; import org.talend.dq.helper.UDIHelper; import org.talend.utils.collections.MultiMapHelper; import org.talend.utils.sql.ResultSetUtils; import org.talend.utils.sugars.ReturnCode; import orgomg.cwm.resource.relational.ColumnSet; /** * @author scorreia * * Computes indicators on columns with java engine. It means that we call indicator.handle(object) method. */ public class IndicatorEvaluator extends Evaluator<String> { private static Logger log = Logger.getLogger(IndicatorEvaluator.class); public IndicatorEvaluator(Analysis analysis) { this.analysis = analysis; } @Override protected ReturnCode executeSqlQuery(String sqlStatement) throws SQLException { ReturnCode ok = new ReturnCode(true); // check analyzed columns Set<String> columns = getAnalyzedElements(); // feature 0010630 zshen:Make order unify which columns and columnName in the sqlStatement.mssqlOdbc need do // this List<String> columnlist = sortColumnName(columns, sqlStatement); if (columnlist.isEmpty()) { ok.setReturnCode(Messages.getString("IndicatorEvaluator.DefineAnalyzedColumns"), false); //$NON-NLS-1$ return ok; } // ADD xqliu 2010-07-27 bug 13826 Map<String, String> columnlistMap = buildColumnListMap(columnlist); // ~ 13826 // create query statement // feature 0010630 zshen: Tables are not found when using Excel with ODBC connection Statement statement = createStatement(); // MOD xqliu 2009-02-09 bug 6237 if (continueRun()) { if (log.isInfoEnabled()) { log.info("Executing query: " + sqlStatement); //$NON-NLS-1$ } statement.execute(sqlStatement); } // get the results ResultSet resultSet = statement.getResultSet(); if (resultSet == null) { String mess = Messages.getString("Evaluator.NoResultSet", sqlStatement); //$NON-NLS-1$ log.warn(mess); ok.setReturnCode(mess, false); statement.close(); return ok; } // MOD qiongli TDQ-7282,check invalid judi.if there are invalid judis,return false code and show message later. ok = getMessageForInvalidJUDIs(); int columnCount = resultSet.getMetaData().getColumnCount(); int maxNumberRows = analysis.getParameters().getMaxNumberRows(); // MOD mzhao feature: 12919, add capability to dill down data on Java engine. AnalysisResult anaResult = analysis.getResults(); EMap<Indicator, AnalyzedDataSet> indicToRowMap = anaResult.getIndicToRowMap(); indicToRowMap.clear(); int recordIncrement = 0; // --- for each row int columnListSize = columnlist.size(); label: while (resultSet.next()) { // --- for each column // feature 0010630 zshen: dislodge the Qualifiers from name of the column for (int i = 0; i < columnListSize; i++) { // MOD xqliu 2010-07-27 bug 13826 String col = columnlist.get(i); List<Indicator> indicators = getIndicators(col); col = columnlistMap.get(col); // --- get content of column Object object = ResultSetUtils.getObject(resultSet, col); // MOD zshen, when the type of object is TIMESTAMP then need getTimestamp(col) to get correct value,or // the value only is the name of type and can't be match with TIMESTAMP. // FIXME this will slow down a lot the computation if (object != null && !(object instanceof String) && object.toString().indexOf("TIMESTAMP") > -1) { //$NON-NLS-1$ object = resultSet.getTimestamp(col); } // TDQ-11299: fix the ClassCastException: java.sql.Date cannot be cast to java.lang.String if (object instanceof Date) { if (object instanceof Time) { object = new TalendFormatTime((Time) object); } else { object = new TalendFormatDate((Date) object); } } // TDQ-11299~ // --- give row to handle to indicators for (Indicator indicator : indicators) { // MOD xqliu 2009-02-09 bug 6237 if (!continueRun()) { break label; } // Added yyin 20120608 TDQ-3589 if (indicator instanceof DuplicateCountIndicator) { ((DuplicateCountIndicator) indicator).handle(object, resultSet, columnCount); } else { // ~ indicator.handle(object); // ~MOD mzhao feature: 12919 } AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator); if (analyzedDataSet == null) { analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet(); indicToRowMap.put(indicator, analyzedDataSet); analyzedDataSet.setDataCount(maxNumberRows); analyzedDataSet.setRecordSize(0); } // should store data for dirll down if (analysis.getParameters().isStoreData()) { // current indicator is need to store the data if (indicator.mustStoreRow()) { List<Object[]> valueObjectList = initDataSet(indicator, indicToRowMap, object); // MOD zshen add another loop to insert all of columnValue on the row into indicator. recordIncrement = valueObjectList.size(); // MOD klliu 2011-06-30 bug 22523 whichever is Table or View,that finds columns should ues // columnset ColumnSet doSwitch = SwitchHelpers.COLUMN_SET_SWITCH.doSwitch(indicator.getAnalyzedElement() .eContainer()); List<TdColumn> columnList = ColumnSetHelper.getColumns(doSwitch); List<Object> inputRowList = new ArrayList<Object>(); for (int j = 0; j < columnCount; j++) { String newcol = columnList.get(j).getName(); Object newobject = ResultSetUtils.getObject(resultSet, newcol); // TDQ-10833 Format Drill down Date data by TalendFormatTime with // "HH:mm:ss:SSS",TalendFormatDate with "yyyy-MM-dd HH:mm:ss:SSS".So that it is // same format as result page. if (newobject instanceof Date) { if (newobject instanceof Time) { newobject = new TalendFormatTime((Time) newobject); } else { newobject = new TalendFormatDate((Date) newobject); } } if (indicator.isUsedMapDBMode()) { inputRowList.add(newobject == null ? PluginConstant.NULL_STRING : newobject); continue; } else { if (recordIncrement < maxNumberRows) {// decide whether current record is more than // max // Number else don't need to record more than // data. if (recordIncrement < valueObjectList.size()) {// decide whether need to // increase // current array. valueObjectList.get(recordIncrement)[j] = newobject; } else { Object[] valueObject = new Object[columnCount]; valueObject[j] = newobject; valueObjectList.add(valueObject); } } else { break; } } } if (indicator.isUsedMapDBMode()) { MapDBUtils.handleDrillDownData(object, inputRowList, indicator); } // ~ } else if (indicator instanceof UniqueCountIndicator && analysis.getResults().getIndicToRowMap().get(indicator).getData() != null) { List<Object[]> removeValueObjectList = analysis.getResults().getIndicToRowMap().get(indicator) .getData(); // MOD klliu 2011-06-30 bug 22523 whichever is Table or View,that finds columns should ues // columnset ColumnSet doSwitch = SwitchHelpers.COLUMN_SET_SWITCH.doSwitch(indicator.getAnalyzedElement() .eContainer()); List<TdColumn> columnElementList = ColumnSetHelper.getColumns(doSwitch); int offsetting = columnElementList.indexOf(indicator.getAnalyzedElement()); for (Object[] dataObject : removeValueObjectList) { if (dataObject[offsetting].equals(object)) { removeValueObjectList.remove(dataObject); break; } } } } } } } // --- release resultset resultSet.close(); // --- release statement statement.close(); // --- close connection getConnection().close(); return ok; } /** * DOC xqliu Comment method "buildColumnListMap". bug 13826 * * @param columnlist * @return */ private Map<String, String> buildColumnListMap(List<String> columnlist) { Map<String, String> result = new HashMap<String, String>(); for (String col : columnlist) { result.put(col, col.substring(col.lastIndexOf('.') + 1)); } return result; } @SuppressWarnings("unchecked") protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap, Object object) { AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator); List<Object[]> valueObjectList = null; if (analyzedDataSet == null) { analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet(); indicToRowMap.put(indicator, analyzedDataSet); analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows()); analyzedDataSet.setRecordSize(0); } if (indicator instanceof FrequencyIndicator || indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) { Map<Object, List<Object[]>> valueObjectListMap = analyzedDataSet.getFrequencyData(); if (valueObjectListMap == null) { valueObjectListMap = new HashMap<Object, List<Object[]>>(); analyzedDataSet.setFrequencyData(valueObjectListMap); } String key = null; if (object == null) { key = SpecialValueDisplay.NULL_FIELD; } else if (indicator instanceof MinLengthIndicator || indicator instanceof MaxLengthIndicator) { key = String.valueOf(object.toString().length()); } else if (object.equals(PluginConstant.EMPTY_STRING)) { key = SpecialValueDisplay.EMPTY_FIELD; } else if (indicator instanceof PatternLowFreqIndicator) { key = ((PatternLowFreqIndicator) indicator).convertCharacters(object.toString()); } else if (indicator instanceof PatternFreqIndicator) { key = ((PatternFreqIndicator) indicator).convertCharacters(object.toString()); } else if (indicator instanceof FormatFreqPieIndicator) { // MOD qiongli 2011-8-26,feature TDQ-3253. key = ((FormatFreqPieIndicator) indicator).getCurrentKey(); } else { key = object.toString(); } valueObjectList = valueObjectListMap.get(key); if (valueObjectList == null) { valueObjectList = new ArrayList<Object[]>(); valueObjectListMap.put(key, valueObjectList); } } else if (indicator.isInValidRow() || indicator.isValidRow()) { List<Object> patternData = analyzedDataSet.getPatternData(); if (patternData == null) { patternData = new ArrayList<Object>(); patternData.add(new ArrayList<Object[]>());// mapping with AnalyzedDataSetImpl.VALID_VALUE patternData.add(new ArrayList<Object[]>());// mapping with AnalyzedDataSetImpl.INVALID_VALUE analyzedDataSet.setPatternData(patternData); } Object listObject = indicator.isInValidRow() ? patternData.get(AnalyzedDataSetImpl.INVALID_VALUE) : patternData .get(AnalyzedDataSetImpl.VALID_VALUE); if (listObject instanceof ArrayList<?>) { valueObjectList = (ArrayList<Object[]>) listObject; } } else { valueObjectList = analyzedDataSet.getData(); if (valueObjectList == null) { valueObjectList = new ArrayList<Object[]>(); analyzedDataSet.setData(valueObjectList); } } return valueObjectList; } /** * * @author zshen * @param columns * @param sqlStatement * @return the same order List which columnName in the sqlStatement */ public List<String> sortColumnName(Set<String> columns, String sqlStatement) { List<String> columnNameList = new ArrayList<String>(); Map<Integer, String> offset = new HashMap<Integer, String>(); for (String col : columns) { int offsetCol = col.lastIndexOf('.') + 1; String colName = col.substring(offsetCol); DbmsLanguage createDbmsLanguage = DbmsLanguageFactory.createDbmsLanguage(analysis, ExecutionLanguage.SQL); int location = sqlStatement.indexOf(createDbmsLanguage.quote(colName)); offset.put(location, col); } Integer[] keyArray = offset.keySet().toArray(new Integer[offset.keySet().size()]); int temp = 0; for (int i = 0; i < columns.size(); i++) { for (int j = keyArray.length - 1; j > i; j--) { if (keyArray[j] < keyArray[j - 1]) { temp = keyArray[j]; keyArray[j] = keyArray[j - 1]; keyArray[j - 1] = temp; } } columnNameList.add(offset.get(keyArray[i])); } return columnNameList; } /* * (non-Javadoc) * * @see org.talend.dq.indicators.Evaluator#storeIndicator(java.lang.Object, * org.talend.dataquality.indicators.Indicator) * * MOD scorreia 2009-04-24 overrided to solve bug 7093 */ @Override public boolean storeIndicator(String elementToAnalyze, Indicator indicator) { boolean ok = true; final List<Indicator> indicatorLeaves = IndicatorHelper.getIndicatorLeaves(indicator); for (Indicator leaf : indicatorLeaves) { if (!this.allIndicators.contains(leaf) && !MultiMapHelper.addUniqueObjectToListMap(elementToAnalyze, leaf, elementToIndicators)) { ok = false; } } this.allIndicators.addAll(indicatorLeaves); return ok; } /** * * check each UDI if has realted java expression(class) for Java Engine,remove it from elementToIndicators(no need * to compute),then populate the message * * @param analysis * @return */ protected ReturnCode getMessageForInvalidJUDIs() { ReturnCode ret = new ReturnCode(Boolean.TRUE); Set<String> invalidUdiNames = new HashSet<String>(); Set<String> columns = elementToIndicators.keySet(); Iterator<String> colIt = columns.iterator(); while (colIt.hasNext()) { String nextCol = colIt.next(); List<Indicator> indicators = elementToIndicators.get(nextCol); List<Indicator> needRemovedInds = new ArrayList<Indicator>(); for (Indicator ind : indicators) { if (ind instanceof UserDefIndicator && !UDIHelper.isJUDIValid(ind.getIndicatorDefinition())) { invalidUdiNames.add(ind.getName()); needRemovedInds.add(ind); } } if (!needRemovedInds.isEmpty()) { indicators.removeAll(needRemovedInds); allIndicators.removeAll(needRemovedInds); } } if (!invalidUdiNames.isEmpty()) { String message = invalidUdiNames.toString(); ret.setReturnCode(Messages.getString("IndicatorEvaluator.NoExpressionFound", message), false); //$NON-NLS-1$ } return ret; } }