/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.studio.io.gui.internal.steps.configuration;
import java.text.DateFormat;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.swing.table.AbstractTableModel;
import com.rapidminer.core.io.data.ColumnMetaData;
import com.rapidminer.core.io.data.ColumnMetaData.ColumnType;
import com.rapidminer.core.io.data.DataSet;
import com.rapidminer.core.io.data.DataSetException;
import com.rapidminer.core.io.data.DataSetMetaData;
import com.rapidminer.core.io.data.DataSetRow;
import com.rapidminer.core.io.data.ParseException;
import com.rapidminer.core.io.data.source.DataSource;
import com.rapidminer.example.Attribute;
import com.rapidminer.operator.nio.ImportWizardUtils;
import com.rapidminer.operator.nio.model.DefaultPreview;
import com.rapidminer.tools.I18N;
import com.rapidminer.tools.ProgressListener;
import com.rapidminer.tools.Tools;
/**
* A model for the column configuration data table. It loads the model data from preview
* {@link DataSet} provided by {@link DataSource#getPreview()}. It does not load more data than
* defined by {@link ImportWizardUtils#getPreviewLength()}. Stores {@link ParsingError}s encountered
* during loading and the erroneous cells.
*
* @author Nils Woehler, Gisa Schaefer
* @since 7.0.0
*/
final class ConfigureDataTableModel extends AbstractTableModel {
private static final long serialVersionUID = 1L;
/** error message for more than two values in binary column */
private static final String ALREADY_TWO_BINARY_VALUES = I18N
.getGUILabel("io.dataimport.step.data_column_configuration.error_table.parsing_error.not_binary_column") + " ";
private final DataSetMetaData metaData;
private int previewSize = ImportWizardUtils.getPreviewLength();
private String[][] data;
private final List<ParsingError> parsingErrorList = new LinkedList<>();
private Map<Integer, Set<Integer>> errorCells = new HashMap<>();
private final Map<Integer, Set<String>> binaryMapping = new HashMap<>();
private DataSet dataSet;
/**
* original meta data of the data set. Is not automatically changed. All changes have to be
* undone.
*/
private final DataSetMetaData originalDataSourceMetaData;
/**
* Creates a new model instance.
*
* @param dataSource
* the data source used to query the preview data
* @param metaData
* the metaData copy to use
* @param l
* the progress listener to report progress to
* @throws DataSetException
* in case reading the preview fails
*/
public ConfigureDataTableModel(DataSource dataSource, DataSetMetaData metaData, ProgressListener l)
throws DataSetException {
this.metaData = metaData;
this.originalDataSourceMetaData = dataSource.getMetadata();
this.dataSet = dataSource.getPreview(ImportWizardUtils.getPreviewLength());
read(dataSet, l);
}
/**
* Rereads the data with the current date format.
*
* @param listener
* @throws DataSetException
*/
void reread(ProgressListener listener) throws DataSetException {
DateFormat originalDateFormat = originalDataSourceMetaData.getDateFormat();
originalDataSourceMetaData.setDateFormat(metaData.getDateFormat());
try {
read(dataSet, listener);
} finally {
originalDataSourceMetaData.setDateFormat(originalDateFormat);
}
}
private synchronized void read(DataSet dataPreview, ProgressListener listener) throws DataSetException {
if (listener != null) {
listener.setTotal(previewSize);
}
List<String[]> dataList = new LinkedList<String[]>();
parsingErrorList.clear();
errorCells.clear();
binaryMapping.clear();
int columnIndex = 0;
for (ColumnMetaData column : metaData.getColumnMetaData()) {
if (column.getType() == ColumnType.BINARY) {
binaryMapping.put(columnIndex, new HashSet<String>(2));
}
columnIndex++;
}
// start from the beginning
dataPreview.reset();
int numberOfColumns = dataPreview.getNumberOfColumns();
// read in data until preview size is reached
while (dataPreview.hasNext()) {
DataSetRow dataRow = dataPreview.nextRow();
String[] row = new String[numberOfColumns];
for (int i = 0; i < row.length; i++) {
if (dataRow.isMissing(i)) {
row[i] = Attribute.MISSING_NOMINAL_VALUE;
} else {
final ColumnType columnType = metaData.getColumnMetaData(i).getType();
readNotMissingEntry(dataRow, row, i, dataPreview.getCurrentRowIndex(), columnType, errorCells);
}
}
dataList.add(row);
if (listener != null) {
listener.setCompleted(dataList.size());
}
}
// copy to array since will be accessed by index
this.data = dataList.toArray(new String[dataList.size()][]);
if (listener != null) {
listener.complete();
}
}
/**
* Reads the entry specified by columnIndex from the dataRow and stores it in the row array.
*
* @param dataRow
* the data row containing the data
* @param row
* the row array where to store the data
* @param columnIndex
* the column to consider
* @param rowIndex
* the current row, used for errors
* @param columnType
* the type of the column
* @param errorCells
* the map where to store the errorCells
*/
private void readNotMissingEntry(DataSetRow dataRow, String[] row, int columnIndex, int rowIndex,
final ColumnType columnType, Map<Integer, Set<Integer>> errorCells) {
try {
switch (columnType) {
case DATE:
row[columnIndex] = Tools.formatDate(dataRow.getDate(columnIndex));
break;
case DATETIME:
row[columnIndex] = Tools.formatDateTime(dataRow.getDate(columnIndex));
break;
case TIME:
row[columnIndex] = Tools.formatTime(dataRow.getDate(columnIndex));
break;
case REAL:
row[columnIndex] = Tools.formatNumber(dataRow.getDouble(columnIndex));
break;
case INTEGER:
// can do round here, since value is not NaN
row[columnIndex] = Tools.formatIntegerIfPossible(Math.round(dataRow.getDouble(columnIndex)));
break;
case CATEGORICAL:
row[columnIndex] = DefaultPreview.shortenDisplayValue(dataRow.getString(columnIndex));
break;
case BINARY:
String value = dataRow.getString(columnIndex);
final Set<String> binaryEntries = binaryMapping.get(columnIndex);
if (binaryEntries.size() == 2 && !binaryEntries.contains(value)) {
throw new ParseException(ALREADY_TWO_BINARY_VALUES + binaryEntries.toString());
} else {
binaryEntries.add(value);
row[columnIndex] = DefaultPreview.shortenDisplayValue(value);
}
break;
default:
break;
}
} catch (ParseException e) {
row[columnIndex] = null;
// store error with original value if possible
String originalValue = null;
// if the type is categorical, getString was already called above, so there will be a
// parse exception again
if (columnType != ColumnType.CATEGORICAL) {
try {
originalValue = dataRow.getString(columnIndex);
} catch (ParseException e1) {
originalValue = null;
}
}
parsingErrorList.add(new ParsingError(columnIndex, rowIndex, originalValue, e.getMessage()));
Set<Integer> errors = errorCells.get(columnIndex);
if (errors != null) {
errors.add(rowIndex);
} else {
Set<Integer> errorRows = new HashSet<>();
errorRows.add(rowIndex);
errorCells.put(columnIndex, errorRows);
}
}
}
/**
* Reads the column with index columnIndex again and stores the results.
*
* @param columnIndex
* the column to reread
* @throws DataSetException
*/
synchronized void rereadColumn(int columnIndex, ProgressListener listener) throws DataSetException {
if (listener != null) {
listener.setTotal(100);
}
DateFormat originalDateFormat = originalDataSourceMetaData.getDateFormat();
originalDataSourceMetaData.setDateFormat(metaData.getDateFormat());
try {
dataSet.reset();
final ColumnType columnType = metaData.getColumnMetaData(columnIndex).getType();
if (columnType == ColumnType.BINARY && !binaryMapping.containsKey(columnIndex)) {
binaryMapping.put(columnIndex, new HashSet<String>(2));
}
// copy errors cells such that errorCells change all at once
Map<Integer, Set<Integer>> errorCellsCopy = new HashMap<Integer, Set<Integer>>(errorCells);
errorCellsCopy.remove(columnIndex);
removeFromErrors(columnIndex);
int rowIndex = 0;
while (dataSet.hasNext() && rowIndex < data.length) {
DataSetRow dataRow = dataSet.nextRow();
String[] containerRow = data[rowIndex];
if (dataRow.isMissing(columnIndex)) {
containerRow[columnIndex] = Attribute.MISSING_NOMINAL_VALUE;
} else {
readNotMissingEntry(dataRow, containerRow, columnIndex, rowIndex, columnType, errorCellsCopy);
}
if (listener != null) {
listener.setCompleted(100 * rowIndex / data.length);
}
rowIndex++;
}
errorCells = errorCellsCopy;
} finally {
originalDataSourceMetaData.setDateFormat(originalDateFormat);
}
}
/**
* Removes all entries associated to the column with columnIndex from {@link #parsingErrorList}.
*
* @param columnIndex
* the index for which to delete the entries
*/
private void removeFromErrors(int columnIndex) {
Iterator<ParsingError> iterator = parsingErrorList.iterator();
while (iterator.hasNext()) {
if (iterator.next().getColumn() == columnIndex) {
iterator.remove();
}
}
}
/**
* @return the stored list of {@link ParsingError}s that occurred during construction of this
* table model
*/
List<ParsingError> getParsingErrors() {
return parsingErrorList;
}
@Override
public int getRowCount() {
return data.length;
}
@Override
public String getColumnName(int column) {
return metaData.getColumnMetaData(column).getName();
}
@Override
public int getColumnCount() {
if (data != null && data.length > 0) {
return data[0].length;
} else {
return 0;
}
}
@Override
public Object getValueAt(int rowIndex, int columnIndex) {
final String[] row = data[rowIndex];
if (row == null) {
return null;
} else if (columnIndex >= row.length) {
return null;
} else {
return row[columnIndex];
}
}
/**
* Returns whether a parsing error happened for the cell specified by rowIndex and columnIndex.
*
* @param rowIndex
* the row index
* @param columnIndex
* the column index
* @return {@code true} if a parsing error happened for this cell
*/
boolean hasError(int rowIndex, int columnIndex) {
Set<Integer> errorRows = errorCells.get(columnIndex);
if (errorRows != null && errorRows.contains(rowIndex) && !metaData.getColumnMetaData(columnIndex).isRemoved()) {
return true;
}
return false;
}
}