/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.nio.model;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import java.util.NoSuchElementException;
import jxl.Cell;
import jxl.CellType;
import jxl.DateCell;
import jxl.NumberCell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.tools.ProgressListener;
import com.rapidminer.tools.Tools;
/**
* A DataResultSet for an Excel File.
*
* @author Sebastian Land
*
*/
public class ExcelResultSet implements DataResultSet {
private Sheet sheet = null;
private boolean[] emptyRows;
private boolean[] emptyColumns;
private int rowOffset = 0;
private int columnOffset = 0;
private int totalNumberOfRows = 0;
private int totalNumberOfColumns = 0;
private int currentRow;
private Cell[] currentRowCells;
private Workbook workbook;
//private ExcelResultSetConfiguration configuration;
private String[] attributeNames;
/**
* The constructor to build an ExcelResultSet from the given configuration. The calling operator might be null. It
* is only needed for error handling.
*/
public ExcelResultSet(Operator callingOperator, ExcelResultSetConfiguration configuration) throws OperatorException {
//this.configuration = configuration;
// reading configuration
columnOffset = configuration.getColumnOffset();
rowOffset = configuration.getRowOffset();
currentRow = configuration.getRowOffset() - 1;
// check range
if (columnOffset > configuration.getColumnLast() || rowOffset > configuration.getRowLast() || columnOffset < 0 || rowOffset < 0)
throw new UserError(callingOperator, 223, Tools.getExcelColumnName(columnOffset) + rowOffset + ":" + Tools.getExcelColumnName(configuration.getColumnLast()) + configuration.getRowLast());
// check file presence
if (configuration.getFile() == null) {
throw new UserError(callingOperator, "file_consumer.no_file_defined");
}
// load the excelWorkbook if it is not set
try {
workbook = configuration.getWorkbook();
} catch (IOException e) {
throw new UserError(callingOperator, 302, configuration.getFile().getPath(), e.getMessage());
} catch (BiffException e) {
throw new UserError(callingOperator, 302, configuration.getFile().getPath(), e.getMessage());
}
try {
sheet = workbook.getSheet(configuration.getSheet());
} catch (IndexOutOfBoundsException e) {
throw new UserError(callingOperator, 953, configuration.getSheet());
}
totalNumberOfColumns = Math.min(configuration.getColumnLast(), sheet.getColumns() - 1) - columnOffset + 1;
totalNumberOfRows = Math.min(configuration.getRowLast(), sheet.getRows() - 1) - rowOffset + 1;
if (totalNumberOfColumns < 0 || totalNumberOfRows < 0)
throw new UserError(callingOperator, 404);
emptyColumns = new boolean[totalNumberOfColumns];
emptyRows = new boolean[totalNumberOfRows];
// filling offsets
Arrays.fill(emptyColumns, true);
Arrays.fill(emptyRows, true);
// determine offsets and emptiness
boolean foundAny = false;
for (int r = 0; r < totalNumberOfRows; r++) {
for (int c = 0; c < totalNumberOfColumns; c++) {
if (emptyRows[r] || emptyColumns[c]) {
final Cell cell = sheet.getCell(c + columnOffset, r + rowOffset);
if (cell.getType() != CellType.EMPTY && !"".equals(cell.getContents().trim())) {
foundAny = true;
emptyRows[r] = false;
emptyColumns[c] = false;
}
}
}
}
if (!foundAny) {
throw new UserError(callingOperator, 302, configuration.getFile().getPath(), "spreadsheet seems to be empty");
}
// retrieve attribute names: first count columns
int numberOfAttributes = 0;
for (int i = 0; i < totalNumberOfColumns; i++) {
if (!emptyColumns[i])
numberOfAttributes++;
}
// retrieve or generate attribute names
attributeNames = new String[numberOfAttributes];
if (!configuration.isEmulatingOldNames()) {
for (int i = 0; i < numberOfAttributes; i++) {
if (!emptyColumns[i]) {
attributeNames[i] = Tools.getExcelColumnName(i);
}
}
} else {
// emulate old 5.0.x style
for (int i = 0; i < numberOfAttributes; i++) {
if (!emptyColumns[i]) {
attributeNames[i] = "attribute_" + i;
}
}
}
}
@Override
public void reset(ProgressListener listener) {
currentRow = rowOffset - 1;
if (listener != null) {
listener.setTotal(totalNumberOfRows);
listener.setCompleted(0);
}
}
@Override
public boolean hasNext() {
int nextRow = currentRow + 1;
while (nextRow < totalNumberOfRows + rowOffset && emptyRows[nextRow - rowOffset])
nextRow++;
return nextRow < totalNumberOfRows + rowOffset;
}
@Override
public void next(ProgressListener listener) {
currentRow++;
while (currentRow < totalNumberOfRows + rowOffset && emptyRows[currentRow - rowOffset]) {
currentRow++;
}
if (currentRow >= totalNumberOfRows + rowOffset) {
throw new NoSuchElementException("No further row in excel sheet.");
}
currentRowCells = new Cell[attributeNames.length];
int columnCounter = 0;
for (int c = 0; c < totalNumberOfColumns; c++) {
if (!emptyColumns[c]) {
currentRowCells[columnCounter] = sheet.getCell(c + columnOffset, currentRow);
columnCounter++;
}
}
// notifying progress listener
if (listener != null) {
listener.setCompleted(currentRow);
}
}
@Override
public void close() throws OperatorException {
//configuration.closeWorkbook();
}
@Override
public int getNumberOfColumns() {
return attributeNames.length;
}
@Override
public String[] getColumnNames() {
return attributeNames;
}
@Override
public boolean isMissing(int columnIndex) {
Cell cell = getCurrentCell(columnIndex);
return cell.getType() == CellType.EMPTY ||
cell.getType() == CellType.ERROR ||
cell.getType() == CellType.FORMULA_ERROR ||
cell.getContents() == null ||
"".equals(cell.getContents().trim());
}
private Cell getCurrentCell(int index) {
// return currentRowCells[index + columnOffset];
return currentRowCells[index];
}
@Override
public Number getNumber(int columnIndex) throws ParseException {
final Cell cell = getCurrentCell(columnIndex);
if ((cell.getType() == CellType.NUMBER) ||
(cell.getType() == CellType.NUMBER_FORMULA)) {
final double value = ((NumberCell) cell).getValue();
return Double.valueOf(value);
} else {
String valueString = cell.getContents();
try {
return Double.valueOf(valueString);
} catch (NumberFormatException e) {
throw new ParseException(new ParsingError(currentRow, columnIndex, ParsingError.ErrorCode.UNPARSEABLE_REAL, valueString));
}
}
}
@Override
public Date getDate(int columnIndex) throws ParseException {
final Cell cell = getCurrentCell(columnIndex);
if ((cell.getType() == CellType.DATE) ||
(cell.getType() == CellType.DATE_FORMULA)) {
return ((DateCell) cell).getDate();
} else {
String valueString = cell.getContents();
throw new ParseException(new ParsingError(currentRow, columnIndex, ParsingError.ErrorCode.UNPARSEABLE_DATE, valueString));
}
// // TODO: Why is that???
// int offset = TimeZone.getDefault().getOffset(date.getTime());
// return new Date(date.getTime() - offset);
}
@Override
public String getString(int columnIndex) {
return getCurrentCell(columnIndex).getContents();
}
@Override
public int[] getValueTypes() {
return new int[this.attributeNames.length];
}
@Override
public ValueType getNativeValueType(int columnIndex) throws ParseException {
final CellType type = getCurrentCell(columnIndex).getType();
if (type == CellType.EMPTY) {
return ValueType.EMPTY;
} else if ((type == CellType.NUMBER) || (type == CellType.NUMBER_FORMULA)) {
return ValueType.NUMBER;
} else if ((type == CellType.DATE) || (type == CellType.DATE_FORMULA)) {
return ValueType.DATE;
} else {
return ValueType.STRING;
}
}
@Override
public int getCurrentRow() {
return currentRow;
}
}