/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.studio.io.gui.internal.steps.configuration; import java.text.DateFormat; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import javax.swing.table.AbstractTableModel; import com.rapidminer.core.io.data.ColumnMetaData; import com.rapidminer.core.io.data.ColumnMetaData.ColumnType; import com.rapidminer.core.io.data.DataSet; import com.rapidminer.core.io.data.DataSetException; import com.rapidminer.core.io.data.DataSetMetaData; import com.rapidminer.core.io.data.DataSetRow; import com.rapidminer.core.io.data.ParseException; import com.rapidminer.core.io.data.source.DataSource; import com.rapidminer.example.Attribute; import com.rapidminer.operator.nio.ImportWizardUtils; import com.rapidminer.operator.nio.model.DefaultPreview; import com.rapidminer.tools.I18N; import com.rapidminer.tools.ProgressListener; import com.rapidminer.tools.Tools; /** * A model for the column configuration data table. It loads the model data from preview * {@link DataSet} provided by {@link DataSource#getPreview()}. It does not load more data than * defined by {@link ImportWizardUtils#getPreviewLength()}. Stores {@link ParsingError}s encountered * during loading and the erroneous cells. * * @author Nils Woehler, Gisa Schaefer * @since 7.0.0 */ final class ConfigureDataTableModel extends AbstractTableModel { private static final long serialVersionUID = 1L; /** error message for more than two values in binary column */ private static final String ALREADY_TWO_BINARY_VALUES = I18N .getGUILabel("io.dataimport.step.data_column_configuration.error_table.parsing_error.not_binary_column") + " "; private final DataSetMetaData metaData; private int previewSize = ImportWizardUtils.getPreviewLength(); private String[][] data; private final List<ParsingError> parsingErrorList = new LinkedList<>(); private Map<Integer, Set<Integer>> errorCells = new HashMap<>(); private final Map<Integer, Set<String>> binaryMapping = new HashMap<>(); private DataSet dataSet; /** * original meta data of the data set. Is not automatically changed. All changes have to be * undone. */ private final DataSetMetaData originalDataSourceMetaData; /** * Creates a new model instance. * * @param dataSource * the data source used to query the preview data * @param metaData * the metaData copy to use * @param l * the progress listener to report progress to * @throws DataSetException * in case reading the preview fails */ public ConfigureDataTableModel(DataSource dataSource, DataSetMetaData metaData, ProgressListener l) throws DataSetException { this.metaData = metaData; this.originalDataSourceMetaData = dataSource.getMetadata(); this.dataSet = dataSource.getPreview(ImportWizardUtils.getPreviewLength()); read(dataSet, l); } /** * Rereads the data with the current date format. * * @param listener * @throws DataSetException */ void reread(ProgressListener listener) throws DataSetException { DateFormat originalDateFormat = originalDataSourceMetaData.getDateFormat(); originalDataSourceMetaData.setDateFormat(metaData.getDateFormat()); try { read(dataSet, listener); } finally { originalDataSourceMetaData.setDateFormat(originalDateFormat); } } private synchronized void read(DataSet dataPreview, ProgressListener listener) throws DataSetException { if (listener != null) { listener.setTotal(previewSize); } List<String[]> dataList = new LinkedList<String[]>(); parsingErrorList.clear(); errorCells.clear(); binaryMapping.clear(); int columnIndex = 0; for (ColumnMetaData column : metaData.getColumnMetaData()) { if (column.getType() == ColumnType.BINARY) { binaryMapping.put(columnIndex, new HashSet<String>(2)); } columnIndex++; } // start from the beginning dataPreview.reset(); int numberOfColumns = dataPreview.getNumberOfColumns(); // read in data until preview size is reached while (dataPreview.hasNext()) { DataSetRow dataRow = dataPreview.nextRow(); String[] row = new String[numberOfColumns]; for (int i = 0; i < row.length; i++) { if (dataRow.isMissing(i)) { row[i] = Attribute.MISSING_NOMINAL_VALUE; } else { final ColumnType columnType = metaData.getColumnMetaData(i).getType(); readNotMissingEntry(dataRow, row, i, dataPreview.getCurrentRowIndex(), columnType, errorCells); } } dataList.add(row); if (listener != null) { listener.setCompleted(dataList.size()); } } // copy to array since will be accessed by index this.data = dataList.toArray(new String[dataList.size()][]); if (listener != null) { listener.complete(); } } /** * Reads the entry specified by columnIndex from the dataRow and stores it in the row array. * * @param dataRow * the data row containing the data * @param row * the row array where to store the data * @param columnIndex * the column to consider * @param rowIndex * the current row, used for errors * @param columnType * the type of the column * @param errorCells * the map where to store the errorCells */ private void readNotMissingEntry(DataSetRow dataRow, String[] row, int columnIndex, int rowIndex, final ColumnType columnType, Map<Integer, Set<Integer>> errorCells) { try { switch (columnType) { case DATE: row[columnIndex] = Tools.formatDate(dataRow.getDate(columnIndex)); break; case DATETIME: row[columnIndex] = Tools.formatDateTime(dataRow.getDate(columnIndex)); break; case TIME: row[columnIndex] = Tools.formatTime(dataRow.getDate(columnIndex)); break; case REAL: row[columnIndex] = Tools.formatNumber(dataRow.getDouble(columnIndex)); break; case INTEGER: // can do round here, since value is not NaN row[columnIndex] = Tools.formatIntegerIfPossible(Math.round(dataRow.getDouble(columnIndex))); break; case CATEGORICAL: row[columnIndex] = DefaultPreview.shortenDisplayValue(dataRow.getString(columnIndex)); break; case BINARY: String value = dataRow.getString(columnIndex); final Set<String> binaryEntries = binaryMapping.get(columnIndex); if (binaryEntries.size() == 2 && !binaryEntries.contains(value)) { throw new ParseException(ALREADY_TWO_BINARY_VALUES + binaryEntries.toString()); } else { binaryEntries.add(value); row[columnIndex] = DefaultPreview.shortenDisplayValue(value); } break; default: break; } } catch (ParseException e) { row[columnIndex] = null; // store error with original value if possible String originalValue = null; // if the type is categorical, getString was already called above, so there will be a // parse exception again if (columnType != ColumnType.CATEGORICAL) { try { originalValue = dataRow.getString(columnIndex); } catch (ParseException e1) { originalValue = null; } } parsingErrorList.add(new ParsingError(columnIndex, rowIndex, originalValue, e.getMessage())); Set<Integer> errors = errorCells.get(columnIndex); if (errors != null) { errors.add(rowIndex); } else { Set<Integer> errorRows = new HashSet<>(); errorRows.add(rowIndex); errorCells.put(columnIndex, errorRows); } } } /** * Reads the column with index columnIndex again and stores the results. * * @param columnIndex * the column to reread * @throws DataSetException */ synchronized void rereadColumn(int columnIndex, ProgressListener listener) throws DataSetException { if (listener != null) { listener.setTotal(100); } DateFormat originalDateFormat = originalDataSourceMetaData.getDateFormat(); originalDataSourceMetaData.setDateFormat(metaData.getDateFormat()); try { dataSet.reset(); final ColumnType columnType = metaData.getColumnMetaData(columnIndex).getType(); if (columnType == ColumnType.BINARY && !binaryMapping.containsKey(columnIndex)) { binaryMapping.put(columnIndex, new HashSet<String>(2)); } // copy errors cells such that errorCells change all at once Map<Integer, Set<Integer>> errorCellsCopy = new HashMap<Integer, Set<Integer>>(errorCells); errorCellsCopy.remove(columnIndex); removeFromErrors(columnIndex); int rowIndex = 0; while (dataSet.hasNext() && rowIndex < data.length) { DataSetRow dataRow = dataSet.nextRow(); String[] containerRow = data[rowIndex]; if (dataRow.isMissing(columnIndex)) { containerRow[columnIndex] = Attribute.MISSING_NOMINAL_VALUE; } else { readNotMissingEntry(dataRow, containerRow, columnIndex, rowIndex, columnType, errorCellsCopy); } if (listener != null) { listener.setCompleted(100 * rowIndex / data.length); } rowIndex++; } errorCells = errorCellsCopy; } finally { originalDataSourceMetaData.setDateFormat(originalDateFormat); } } /** * Removes all entries associated to the column with columnIndex from {@link #parsingErrorList}. * * @param columnIndex * the index for which to delete the entries */ private void removeFromErrors(int columnIndex) { Iterator<ParsingError> iterator = parsingErrorList.iterator(); while (iterator.hasNext()) { if (iterator.next().getColumn() == columnIndex) { iterator.remove(); } } } /** * @return the stored list of {@link ParsingError}s that occurred during construction of this * table model */ List<ParsingError> getParsingErrors() { return parsingErrorList; } @Override public int getRowCount() { return data.length; } @Override public String getColumnName(int column) { return metaData.getColumnMetaData(column).getName(); } @Override public int getColumnCount() { if (data != null && data.length > 0) { return data[0].length; } else { return 0; } } @Override public Object getValueAt(int rowIndex, int columnIndex) { final String[] row = data[rowIndex]; if (row == null) { return null; } else if (columnIndex >= row.length) { return null; } else { return row[columnIndex]; } } /** * Returns whether a parsing error happened for the cell specified by rowIndex and columnIndex. * * @param rowIndex * the row index * @param columnIndex * the column index * @return {@code true} if a parsing error happened for this cell */ boolean hasError(int rowIndex, int columnIndex) { Set<Integer> errorRows = errorCells.get(columnIndex); if (errorRows != null && errorRows.contains(rowIndex) && !metaData.getColumnMetaData(columnIndex).isRemoved()) { return true; } return false; } }