/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.studio.io.data.internal.file.excel;
import java.nio.file.Path;
import java.text.DateFormat;
import com.rapidminer.core.io.data.DataSet;
import com.rapidminer.core.io.data.DataSetException;
import com.rapidminer.core.io.data.DataSetMetaData;
import com.rapidminer.core.io.data.source.DataSource;
import com.rapidminer.core.io.data.source.DataSourceConfiguration;
import com.rapidminer.core.io.data.source.FileDataSource;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.nio.model.DataResultSet;
import com.rapidminer.operator.nio.model.DateFormatProvider;
import com.rapidminer.operator.nio.model.ExcelResultSetConfiguration;
import com.rapidminer.operator.nio.model.xlsx.XlsxResultSet.XlsxReadMode;
import com.rapidminer.operator.nio.model.xlsx.XlsxSheetMetaDataParser;
import com.rapidminer.operator.nio.model.xlsx.XlsxUtilities.XlsxCellCoordinates;
import com.rapidminer.studio.io.data.internal.ResultSetAdapter;
import com.rapidminer.studio.io.data.internal.ResultSetAdapterUtils;
/**
* A {@link DataSource} implementation for Excel files.
*
* @author Nils Woehler
* @since 7.0.0
*/
final class ExcelDataSource extends FileDataSource {
private DataSetMetaData metaData = null;
private ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration();
private int headerRowIndex = 0;
private transient DataSet previewDataSet = null;
private transient DataSourceConfiguration previewConfiguration = null;
private transient DataSet dataSet = null;
private transient DataSourceConfiguration dataSetConfiguration = null;
@Override
public void setLocation(Path newLocation) {
super.setLocation(newLocation);
if (newLocation != null) {
configuration.setWorkbookFile(newLocation.toFile());
/*
* Setting the workbook file will reset the row and column offset to 0. We want the row
* offset to -1 by default as this means we want to import the whole column from the
* beginning.
*/
getResultSetConfiguration().setRowOffset(XlsxCellCoordinates.NO_ROW_NUMBER);
getResultSetConfiguration().setRowLast(XlsxSheetMetaDataParser.MAXIMUM_XLSX_ROW_INDEX);
}
}
@Override
public DataSet getData() throws DataSetException {
/*
* Create a new data set instance in case no data set is available yet or configuration has
* changed
*/
boolean configurationChanged = dataSetConfiguration != null
&& !dataSetConfiguration.getParameters().equals(getConfiguration().getParameters());
if (dataSet == null || configurationChanged) {
// close old preview set in case a new one is created
if (dataSet != null) {
dataSet.close();
dataSet = null;
}
try {
this.dataSet = createDataSet(XlsxReadMode.OPERATOR);
this.dataSetConfiguration = getConfiguration();
} catch (OperatorException e) {
throw new DataSetException(e.getMessage(), e.getCause());
}
}
return dataSet;
}
@Override
public DataSet getPreview(int maxPreviewSize) throws DataSetException {
/*
* Create a new preview data set instance in case no preview is available yet or
* configuration has changed
*/
boolean configChange = previewConfiguration != null
&& !previewConfiguration.getParameters().equals(getConfiguration().getParameters());
if (previewDataSet == null || configChange) {
// close old preview set in case a new one is created
if (previewDataSet != null) {
previewDataSet.close();
previewDataSet = null;
}
try {
this.previewDataSet = createDataSet(XlsxReadMode.WIZARD_PREVIEW, maxPreviewSize);
this.previewConfiguration = getConfiguration();
} catch (OperatorException e) {
throw new DataSetException(e.getMessage(), e.getCause());
}
}
return previewDataSet;
}
private ExcelResultSetAdapter createDataSet(XlsxReadMode readMode) throws OperatorException, DataSetException {
return createDataSet(readMode, -1);
}
private ExcelResultSetAdapter createDataSet(XlsxReadMode readMode, int maxPreviewSize)
throws OperatorException, DataSetException {
int startRow = getStartRowIndex();
int endRow = getEndRowIndex();
if (readMode == XlsxReadMode.WIZARD_PREVIEW) {
// set end row such that length is maximal preview length
final int endRowByLength = startRow + maxPreviewSize - 1;
if (endRow > ResultSetAdapter.NO_END_ROW) {
endRow = Math.min(endRow, endRowByLength);
} else {
endRow = endRowByLength;
}
}
DateFormatProvider provider = new DateFormatProvider() {
@Override
public DateFormat geDateFormat() {
return getMetadata().getDateFormat();
}
};
return new ExcelResultSetAdapter(getResultSetConfiguration().makeDataResultSet(null, readMode, provider), startRow,
endRow);
}
/**
* @return the {@link ExcelResultSetConfiguration} for this {@link ExcelDataSource}. It is
* holding information about the Excel file path and the configured sheet and cell range
* to be imported.
*/
ExcelResultSetConfiguration getResultSetConfiguration() {
return configuration;
}
/**
* @return the actual data content start index (without the header row if defined)
*/
private int getStartRowIndex() {
int rowOffset = getResultSetConfiguration().getRowOffset();
// adjust start row to first row if rowOffset is set to NO_ROW_NUMBER
int startRow = rowOffset == XlsxCellCoordinates.NO_ROW_NUMBER ? 0 : rowOffset;
if (getHeaderRowIndex() > ResultSetAdapter.NO_HEADER_ROW && getHeaderRowIndex() == startRow) {
startRow++;
}
return startRow;
}
/**
* @return the index of the last row to be imported or {@link ResultSetAdapter#NO_END_ROW} in
* case the whole Excel file should be imported
*/
private int getEndRowIndex() {
if (getResultSetConfiguration().getRowLast() == Integer.MAX_VALUE) {
return ResultSetAdapter.NO_END_ROW;
} else {
return getResultSetConfiguration().getRowLast();
}
}
/**
* Returns the index of the header row.
*
* @return the index of the header row or {@link ResultSetAdapter#NO_HEADER_ROW} if no header
* row is specified.
*/
public int getHeaderRowIndex() {
return headerRowIndex;
}
/**
* Updates the header row index.
*
* @param headerRowIndex
* the new header row index
*/
public void setHeaderRowIndex(int headerRowIndex) {
this.headerRowIndex = headerRowIndex;
}
@Override
public DataSetMetaData getMetadata() {
return metaData;
}
/**
* Creates a new meta data instance with the results of the
* {@link ExcelSheetSelectionWizardStep} and assigns it to the {@link #metaData} field of the
* {@link ExcelDataSource}.
* <p>
* The method checks if the header row and the starting row exist and throws an exception
* otherwise.
*
* @throws DataSetException
* in case the guessing failed (e.g. because of file reading errors, wrong file
* path, etc.)
*/
void createMetaData() throws DataSetException {
// create a new Excel ResultSet configuration which reads the whole selected sheet
// we cannot call getData() here as it might already skip the first lines
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
configuration.setWorkbookFile(getLocation().toFile());
configuration.setSheet(getResultSetConfiguration().getSheet());
configuration.setColumnOffset(getResultSetConfiguration().getColumnOffset());
configuration.setColumnLast(getResultSetConfiguration().getColumnLast());
configuration.setEncoding(getResultSetConfiguration().getEncoding());
try (DataResultSet resultSet = configuration.makeDataResultSet(null)) {
this.metaData = ResultSetAdapterUtils.createMetaData(resultSet, null, getStartRowIndex(),
getHeaderRowIndex());
} catch (OperatorException e) {
throw new DataSetException(e.getMessage(), e);
}
}
}
@Override
public DataSourceConfiguration getConfiguration() {
return new ExcelDataSourceConfiguration(this);
}
@Override
public void configure(DataSourceConfiguration configuration) throws DataSetException {}
@Override
public void close() throws DataSetException {
configuration.close();
if (previewDataSet != null) {
previewDataSet.close();
previewDataSet = null;
}
if (dataSet != null) {
dataSet.close();
dataSet = null;
}
}
}