// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dq.indicators; import java.io.File; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.commons.lang.math.NumberUtils; import org.apache.log4j.Logger; import org.eclipse.core.runtime.IPath; import org.eclipse.core.runtime.Path; import org.eclipse.emf.common.util.BasicEList; import org.eclipse.emf.common.util.EList; import org.eclipse.emf.common.util.EMap; import org.eclipse.jface.dialogs.MessageDialog; import org.eclipse.swt.widgets.Display; import org.talend.core.model.metadata.builder.connection.DelimitedFileConnection; import org.talend.core.model.metadata.builder.connection.Escape; import org.talend.core.model.metadata.builder.connection.MetadataColumn; import org.talend.core.model.metadata.builder.database.JavaSqlFactory; import org.talend.cwm.helper.ColumnHelper; import org.talend.cwm.helper.ModelElementHelper; import org.talend.cwm.management.i18n.Messages; import org.talend.cwm.xml.TdXmlSchema; import org.talend.dataquality.PluginConstant; import org.talend.dataquality.analysis.Analysis; import org.talend.dataquality.analysis.AnalysisFactory; import org.talend.dataquality.analysis.AnalysisResult; import org.talend.dataquality.analysis.AnalyzedDataSet; import org.talend.dataquality.helpers.AnalysisHelper; import org.talend.dataquality.indicators.DistinctCountIndicator; import org.talend.dataquality.indicators.DuplicateCountIndicator; import org.talend.dataquality.indicators.Indicator; import org.talend.dataquality.indicators.RowCountIndicator; import org.talend.dataquality.indicators.UniqueCountIndicator; import org.talend.dataquality.indicators.columnset.AllMatchIndicator; import org.talend.dataquality.indicators.columnset.ColumnsetPackage; import org.talend.dataquality.indicators.columnset.SimpleStatIndicator; import org.talend.dq.helper.AnalysisExecutorHelper; import org.talend.dq.helper.FileUtils; import org.talend.fileprocess.FileInputDelimited; import org.talend.utils.sql.ResultSetUtils; import org.talend.utils.sql.TalendTypeConvert; import org.talend.utils.sugars.ReturnCode; import orgomg.cwm.objectmodel.core.ModelElement; import com.talend.csv.CSVReader; /** * DOC qiongli class global comment. Detailled comment */ public class ColumnSetIndicatorEvaluator extends Evaluator<String> { private static Logger log = Logger.getLogger(ColumnSetIndicatorEvaluator.class); // MOD yyi 2011-02-22 17871:delimitefile protected boolean isDelimitedFile = false; protected TdXmlSchema tdXmlDocument; private boolean isBadlyFormFlatFile = false; public ColumnSetIndicatorEvaluator(Analysis analysis) { this.analysis = analysis; this.isDelimitedFile = analysis.getContext().getConnection() instanceof DelimitedFileConnection; } @Override protected ReturnCode executeSqlQuery(String sqlStatement) throws SQLException { ReturnCode ok = new ReturnCode(true); AnalysisResult anaResult = analysis.getResults(); EMap<Indicator, AnalyzedDataSet> indicToRowMap = anaResult.getIndicToRowMap(); indicToRowMap.clear(); if (isDelimitedFile) { ok = evaluateByDelimitedFile(sqlStatement, ok); } else { ok = evaluateBySql(sqlStatement, ok); } return ok; } /** * * DOC qiongli Comment method "getAnalyzedElementsName". * * @return */ private List<String> getAnalyzedElementsName() { List<String> columnsName = new ArrayList<String>(); List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements(); for (ModelElement me : analysisElementList) { String name = ModelElementHelper.getName(me); if (name != null) { columnsName.add(name); } } return columnsName; } /** * * orgnize EList 'objectLs' by SQL. * * @param sqlStatement * @param ok * @return * @throws SQLException */ private ReturnCode evaluateBySql(String sqlStatement, ReturnCode ok) throws SQLException { Statement statement = null; ResultSet resultSet = null; try { statement = createStatement(); if (continueRun()) { if (log.isInfoEnabled()) { log.info("Executing query: " + sqlStatement); //$NON-NLS-1$ } statement.execute(sqlStatement); } // get the results resultSet = statement.getResultSet(); List<String> columnNames = getAnalyzedElementsName(); if (resultSet == null) { String mess = Messages.getString("Evaluator.NoResultSet", sqlStatement); //$NON-NLS-1$ log.warn(mess); ok.setReturnCode(mess, false); return ok; } EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap(); indicToRowMap.clear(); while (resultSet.next()) { // MOD yyi 2012-04-11 TDQ-4916:Add memory control for java analysis. if (!continueRun()) { break; } EList<Object> objectLs = new BasicEList<Object>(); Iterator<String> it = columnNames.iterator(); while (it.hasNext()) { Object obj = ResultSetUtils.getObject(resultSet, it.next()); objectLs.add(obj); } if (objectLs.size() == 0) { continue; } handleObjects(objectLs, resultSet); } } finally { if (resultSet != null) { resultSet.close(); } if (statement != null) { statement.close(); } closeConnection(); } return ok; } /** * * orgnize EList 'objectLs' for DelimitedFile connection. * * @param sqlStatement * @param returnCode * @return */ private ReturnCode evaluateByDelimitedFile(String sqlStatement, ReturnCode returnCode) { DelimitedFileConnection fileConnection = (DelimitedFileConnection) analysis.getContext().getConnection(); String path = JavaSqlFactory.getURL(fileConnection); String rowSeparator = JavaSqlFactory.getRowSeparatorValue(fileConnection); IPath iPath = new Path(path); File file = iPath.toFile(); if (!file.exists()) { returnCode.setReturnCode(Messages.getString("ColumnSetIndicatorEvaluator.FileNotFound", file.getName()), false); //$NON-NLS-1$ return returnCode; } CSVReader csvReader = null; try { List<ModelElement> analysisElementList = this.analysis.getContext().getAnalysedElements(); EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap(); indicToRowMap.clear(); if (Escape.CSV.equals(fileConnection.getEscapeType())) { // use CsvReader to parse. csvReader = FileUtils.createCsvReader(file, fileConnection); this.useCsvReader(csvReader, file, fileConnection, analysisElementList); } else { // use TOSDelimitedReader in FileInputDelimited to parse. FileInputDelimited fileInputDelimited = AnalysisExecutorHelper.createFileInputDelimited(fileConnection); long currentRow = JavaSqlFactory.getHeadValue(fileConnection); int columsCount = 0; while (fileInputDelimited.nextRecord()) { if (!continueRun()) { break; } currentRow++; if (columsCount == 0) { columsCount = fileInputDelimited.getColumnsCountOfCurrentRow(); } String[] rowValues = new String[columsCount]; for (int i = 0; i < columsCount; i++) { rowValues[i] = fileInputDelimited.get(i); } orgnizeObjectsToHandel(path, rowValues, currentRow, analysisElementList, rowSeparator); } // TDQ-5851~ fileInputDelimited.close(); } } catch (Exception e) { log.error(e, e); returnCode.setReturnCode(e.getMessage(), false); } finally { if (csvReader != null) { try { csvReader.close(); } catch (IOException e) { log.error(e, e); } } } return returnCode; } private void useCsvReader(CSVReader csvReader, File file, DelimitedFileConnection dfCon, List<ModelElement> analysisElementList) throws Exception { FileUtils.initializeCsvReader(dfCon, csvReader); long currentRecord = 0; int limitValue = JavaSqlFactory.getLimitValue(dfCon); int headValue = JavaSqlFactory.getHeadValue(dfCon); for (int i = 0; i < headValue && csvReader.readNext(); i++) { // do nothing, just ignore the header part } while (csvReader.readNext()) { currentRecord++; if (!continueRun() || limitValue != -1 && currentRecord > limitValue) { break; } String[] rowValues = csvReader.getValues(); this.orgnizeObjectsToHandel(dfCon.getFilePath(), rowValues, currentRecord, analysisElementList, dfCon.getFieldSeparatorValue()); } } /** * * orgnize a List by a row,then call 'handleObjects(...)'. * * @param rowValues * @param currentRow * @param analysisElementList * @param separator */ private void orgnizeObjectsToHandel(String fileName, String[] rowValues, long currentRow, List<ModelElement> analysisElementList, String separator) { EList<Object> objectLs = new BasicEList<Object>(); MetadataColumn mColumn = null; Object object = null; for (int i = 0; i < analysisElementList.size(); i++) { mColumn = (MetadataColumn) analysisElementList.get(i); Integer position = ColumnHelper.getColumnIndex(mColumn); // MOD qiongli 2011-4-2,bug 20033,warning with a badly form file if (position == null || position >= rowValues.length) { log.warn(Messages.getString("DelimitedFileIndicatorEvaluator.incorrectData", //$NON-NLS-1$ mColumn.getLabel(), currentRow, fileName)); if (!isBadlyFormFlatFile) { isBadlyFormFlatFile = true; Display.getDefault().asyncExec(new Runnable() { public void run() { MessageDialog.openWarning(null, Messages.getString("DelimitedFileIndicatorEvaluator.badlyForm.Title"), //$NON-NLS-1$ Messages.getString("DelimitedFileIndicatorEvaluator.badlyForm.Message")); //$NON-NLS-1$ } }); } continue; } object = TalendTypeConvert.convertToObject(mColumn.getTalendType(), rowValues[position], mColumn.getPattern()); objectLs.add(object); } if (mColumn != null) { List<MetadataColumn> columnList = ColumnHelper.getColumnOwnerAsMetadataTable(mColumn).getColumns(); handleObjects(objectLs, rowValues, columnList); } } /** * DOC qiongli Comment method "handleObjects". * * @param objectLs * @throws SQLException */ private void handleObjects(EList<Object> objectLs, ResultSet resultSet) throws SQLException { if (objectLs.size() == 0) { return; } EList<Indicator> indicators = analysis.getResults().getIndicators(); // EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap(); // int recordIncrement = 0; for (Indicator indicator : indicators) { if (!this.continueRun()) { break; } if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) { indicator.handle(objectLs); } } } /** * handle Objects and store data for delimited file . * * @param objectLs * @param rowValues * @param metadataColumn is one of analysedElements.it is used to get its Table then get the table's columns. */ private void handleObjects(EList<Object> objectLs, String[] rowValues, List<MetadataColumn> columnList) { if (objectLs.size() == 0) { return; } EList<Indicator> indicators = analysis.getResults().getIndicators(); EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap(); int recordIncrement = 0; if (indicators != null) { for (Indicator indicator : indicators) { if (!this.continueRun()) { break; } if (ColumnsetPackage.eINSTANCE.getColumnSetMultiValueIndicator().isSuperTypeOf(indicator.eClass())) { indicator.handle(objectLs); // feature 19192,store all rows value for RowCountIndicator if (indicator instanceof SimpleStatIndicator) { SimpleStatIndicator simpIndi = (SimpleStatIndicator) indicator; for (Indicator leafIndicator : simpIndi.getLeafIndicators()) { if (!this.continueRun()) { break; } // MOD 20130517 yyin: TDQ-7279 Column Set Analysis on FIle/DB - Can't view rows for // (distinct count, unique count, duplicate count, etc). if (!analysis.getParameters().isStoreData()) {// ~ continue; } List<Object[]> valueObjectList = initDataSet(leafIndicator, indicToRowMap); recordIncrement = valueObjectList.size(); Object[] valueObject = new Object[columnList.size()]; if (recordIncrement < analysis.getParameters().getMaxNumberRows()) { for (int j = 0; j < columnList.size(); j++) { if (!this.continueRun()) { break; } Object newobject = PluginConstant.EMPTY_STRING; // if (recordIncrement < analysis.getParameters().getMaxNumberRows()) { if (j < rowValues.length) { newobject = rowValues[j]; } if (recordIncrement < valueObjectList.size()) { valueObjectList.get(recordIncrement)[j] = newobject; } else { valueObject[j] = newobject; valueObjectList.add(valueObject); } // } } } } } } } } } /* * ADD yyi 2011-02-22 17871:delimitefile * * @see org.talend.dq.indicators.Evaluator#checkConnection() */ @Override protected ReturnCode checkConnection() { if (isDelimitedFile) { return new ReturnCode(); } return super.checkConnection(); } /* * ADD yyi 2011-02-24 17871:delimitefile * * @see org.talend.dq.indicators.Evaluator#closeConnection() */ @Override protected ReturnCode closeConnection() { if (isDelimitedFile) { return new ReturnCode(); } return super.closeConnection(); } protected List<Object[]> initDataSet(Indicator indicator, EMap<Indicator, AnalyzedDataSet> indicToRowMap) { AnalyzedDataSet analyzedDataSet = indicToRowMap.get(indicator); List<Object[]> valueObjectList = null; if (analyzedDataSet == null) { analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet(); indicToRowMap.put(indicator, analyzedDataSet); analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows()); analyzedDataSet.setRecordSize(0); } valueObjectList = analyzedDataSet.getData(); if (valueObjectList == null) { valueObjectList = new ArrayList<Object[]>(); analyzedDataSet.setData(valueObjectList); } return valueObjectList; } /** * store data which from 'simpleIndicator.getListRows()' except RowCountIndicator. * * @param indicToRowMap */ private void storeDataSet() { EMap<Indicator, AnalyzedDataSet> indicToRowMap = analysis.getResults().getIndicToRowMap(); for (Indicator indicator : analysis.getResults().getIndicators()) { if (indicator instanceof SimpleStatIndicator) { SimpleStatIndicator simpleIndicator = (SimpleStatIndicator) indicator; if (!analysis.getParameters().isStoreData()) { break; } if (simpleIndicator.isUsedMapDBMode() && AnalysisHelper.isJavaExecutionEngine(analysis)) { // nothing need to do } else { List<Object[]> listRows = simpleIndicator.getListRows(); if (listRows == null || listRows.isEmpty()) { break; } for (Indicator leafIndicator : simpleIndicator.getLeafIndicators()) { if (leafIndicator instanceof RowCountIndicator) { continue; } List<Object[]> dataList = new ArrayList<Object[]>(); AnalyzedDataSet analyzedDataSet = indicToRowMap.get(leafIndicator); if (analyzedDataSet == null) { analyzedDataSet = AnalysisFactory.eINSTANCE.createAnalyzedDataSet(); indicToRowMap.put(leafIndicator, analyzedDataSet); analyzedDataSet.setDataCount(analysis.getParameters().getMaxNumberRows()); analyzedDataSet.setRecordSize(0); } for (int i = 0; i < listRows.size(); i++) { // if (dataList.size() >= analyzedDataSet.getDataCount()) { // break; // } Object[] object = listRows.get(i); // the last element store the count value. Object count = object[object.length > 0 ? object.length - 1 : 0]; if (leafIndicator instanceof DistinctCountIndicator) { dataList.add(object); } else if (leafIndicator instanceof UniqueCountIndicator) { if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) { if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() == 1) { dataList.add(object); } } } else if (leafIndicator instanceof DuplicateCountIndicator) { if (count != null && NumberUtils.isNumber(count + PluginConstant.EMPTY_STRING)) { if (Long.valueOf(count + PluginConstant.EMPTY_STRING).longValue() > 1) { dataList.add(object); } } } } analyzedDataSet.setData(dataList); } // MOD sizhaoliu TDQ-7144 clear the listRows after usage for drill down if (!simpleIndicator.isStoreData()) { simpleIndicator.setListRows(new ArrayList<Object[]>()); } } } if (indicator instanceof AllMatchIndicator) { AllMatchIndicator allMatchIndicator = (AllMatchIndicator) indicator; if (!allMatchIndicator.isStoreData()) { allMatchIndicator.setListRows(new ArrayList<Object[]>()); } } } } public TdXmlSchema getTdXmlDocument() { return this.tdXmlDocument; } public void setTdXmlDocument(TdXmlSchema tdXmlDocument) { this.tdXmlDocument = tdXmlDocument; } @Override public ReturnCode evaluateIndicators(String sqlStatement, boolean closeConnection) { ReturnCode returnCode = super.evaluateIndicators(sqlStatement, closeConnection); storeDataSet(); return returnCode; } }