/* * RapidMiner * * Copyright (C) 2001-2014 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.nio.model; import static com.rapidminer.operator.nio.ExcelExampleSource.PARAMETER_SHEET_NUMBER; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.charset.Charset; import java.util.logging.Level; import javax.swing.table.AbstractTableModel; import javax.swing.table.TableModel; import jxl.Workbook; import jxl.WorkbookSettings; import jxl.read.biff.BiffException; import org.apache.poi.POIXMLException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.ss.usermodel.WorkbookFactory; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.nio.Excel2007SheetTableModel; import com.rapidminer.operator.nio.ExcelExampleSource; import com.rapidminer.operator.nio.ExcelSheetTableModel; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.I18N; import com.rapidminer.tools.LogService; import com.rapidminer.tools.ProgressListener; import com.rapidminer.tools.Tools; import com.rapidminer.tools.io.Encoding; /** * A class holding information about configuration of the Excel Result Set * * @author Sebastian Land, Marco Boeck */ public class ExcelResultSetConfiguration implements DataResultSetFactory { private int rowOffset = -1; private int columnOffset = -1; private int rowLast = Integer.MAX_VALUE; private int columnLast = Integer.MAX_VALUE; /** Numbering starts at 0. */ private int sheet = -1; private Charset encoding; private org.apache.poi.ss.usermodel.Workbook workbookPOI; private InputStream workbookPOIInputStream; private jxl.Workbook workbookJXL; private File workbookFile; private boolean isEmulatingOldNames; private String timezone; private String datePattern; /** * This constructor must read in all settings from the parameters of the given operator. * * @throws OperatorException */ public ExcelResultSetConfiguration(ExcelExampleSource excelExampleSource) throws OperatorException { if (excelExampleSource.isParameterSet(ExcelExampleSource.PARAMETER_IMPORTED_CELL_RANGE)) { parseExcelRange(excelExampleSource.getParameterAsString(ExcelExampleSource.PARAMETER_IMPORTED_CELL_RANGE)); } // else { // throw new UserError(null, 205, ExcelExampleSource.PARAMETER_IMPORTED_CELL_RANGE, excelExampleSource.getName()); // } if (excelExampleSource.isParameterSet(PARAMETER_SHEET_NUMBER)) { this.sheet = excelExampleSource.getParameterAsInt(PARAMETER_SHEET_NUMBER) - 1; } if (excelExampleSource.isFileSpecified()) { this.workbookFile = excelExampleSource.getSelectedFile(); } else { String excelParamter; try { excelParamter = excelExampleSource.getParameter(ExcelExampleSource.PARAMETER_EXCEL_FILE); } catch (UndefinedParameterError e) { excelParamter = null; } if (excelParamter != null && !"".equals(excelParamter)) { File excelFile = new File(excelParamter); if (excelFile.exists()) { this.workbookFile = excelFile; } } } // if (excelExampleSource.isParameterSet(PARAMETER_EXCEL_FILE)) { // this.workbookFile = excelExampleSource.getParameterAsFile(PARAMETER_EXCEL_FILE); // } if (excelExampleSource.isParameterSet(AbstractDataResultSetReader.PARAMETER_DATE_FORMAT)) { datePattern = excelExampleSource.getParameterAsString(AbstractDataResultSetReader.PARAMETER_DATE_FORMAT); } if (excelExampleSource.isParameterSet(AbstractDataResultSetReader.PARAMETER_TIME_ZONE)) { timezone = excelExampleSource.getParameterAsString(AbstractDataResultSetReader.PARAMETER_TIME_ZONE); } encoding = Encoding.getEncoding(excelExampleSource); isEmulatingOldNames = excelExampleSource.getCompatibilityLevel().isAtMost(ExcelExampleSource.CHANGE_5_0_11_NAME_SCHEMA); } /** * This will create a completely empty result set configuration */ public ExcelResultSetConfiguration() { } /** * Returns the RowOffset */ public int getRowOffset() { return rowOffset; } /** * Returns the ColumnOffset */ public int getColumnOffset() { return columnOffset; } /** * Returns if there is an active workbook. * */ public boolean hasWorkbook() { return workbookJXL != null || workbookPOI != null; } /** * Creates an excel table model (either {@link ExcelSheetTableModel} or {@link Excel2007SheetTableModel}, depending on file). * @param sheetIndex the index of the sheet (0-based) * @return * @throws BiffException * @throws IOException * @throws InvalidFormatException */ public AbstractTableModel createExcelTableModel(int sheetIndex) throws BiffException, IOException, InvalidFormatException { if (getFile().getAbsolutePath().endsWith(".xlsx")) { // excel 2007 file if (workbookPOI == null) { createWorkbookPOI(); } Excel2007SheetTableModel excelSheetTableModel = new Excel2007SheetTableModel(workbookPOI.getSheetAt(sheetIndex)); return excelSheetTableModel; } else { // excel pre 2007 file if (workbookJXL == null) { createWorkbookJXL(); } ExcelSheetTableModel excelSheetTableModel = new ExcelSheetTableModel(workbookJXL.getSheet(sheetIndex)); return excelSheetTableModel; } } /** * Returns the number of sheets in the excel file * @return * @throws IOException * @throws BiffException * @throws InvalidFormatException */ public int getNumberOfSheets() throws BiffException, IOException, InvalidFormatException { if (getFile().getAbsolutePath().endsWith(".xlsx")) { // excel 2007 file if (workbookPOI == null) { createWorkbookPOI(); } return workbookPOI.getNumberOfSheets(); } else { // excel pre 2007 file if (workbookJXL == null) { createWorkbookJXL(); } return workbookJXL.getNumberOfSheets(); } } /** * Returns the names of all sheets in the excel file * @return * @throws IOException * @throws BiffException * @throws InvalidFormatException */ public String[] getSheetNames() throws BiffException, IOException, InvalidFormatException { if (getFile().getAbsolutePath().endsWith(".xlsx")) { // excel 2007 file if (workbookPOI == null) { createWorkbookPOI(); } String[] sheetNames = new String[getNumberOfSheets()]; for (int i = 0; i < getNumberOfSheets(); i++) { sheetNames[i] = workbookPOI.getSheetName(i); } return sheetNames; } else { // excel pre 2007 file if (workbookJXL == null) { createWorkbookJXL(); } return workbookJXL.getSheetNames(); } } /** * Returns the encoding for this configuration. * @return */ public Charset getEncoding() { return this.encoding; } /** * This returns the file of the referenced excel file */ public File getFile() { return workbookFile; } /** * This will set the workbook file. It will assure that an existing preopened workbook will be closed if files * differ. */ public void setWorkbookFile(File selectedFile) { if (selectedFile.equals(this.workbookFile)) { return; } if (workbookJXL != null) { workbookJXL.close(); workbookJXL = null; } workbookFile = selectedFile; try { if (workbookPOIInputStream != null) { workbookPOIInputStream.close(); } } catch (IOException e) { LogService.getRoot().log(Level.WARNING, I18N.getMessage(LogService.getRoot().getResourceBundle(), "com.rapidminer.operator.nio.model.ExcelResultSetConfiguration.close_workbook_error", e.getMessage()), e); } workbookPOI = null; workbookPOIInputStream = null; rowOffset = 0; columnOffset = 0; rowLast = Integer.MAX_VALUE; columnLast = Integer.MAX_VALUE; sheet = 0; } public int getRowLast() { return rowLast; } public void setRowLast(int rowLast) { this.rowLast = rowLast; } public int getColumnLast() { return columnLast; } public void setColumnLast(int columnLast) { this.columnLast = columnLast; } public int getSheet() { return sheet; } public void setSheet(int sheet) { this.sheet = sheet; } public void setRowOffset(int rowOffset) { this.rowOffset = rowOffset; } public void setColumnOffset(int columnOffset) { this.columnOffset = columnOffset; } @Override public DataResultSet makeDataResultSet(Operator operator) throws OperatorException { if (getFile() == null) { throw new UserError(operator, 205, ExcelExampleSource.PARAMETER_EXCEL_FILE, ""); } if (getFile().getAbsolutePath().endsWith(".xlsx")) { // excel 2007 file return new Excel2007ResultSet(operator, this); } else if (getFile().getAbsolutePath().endsWith(".xls")) { // excel pre 2007 file return new ExcelResultSet(operator, this); } else { // we might also get a file object that has neither .xlsx nor .xls as file ending, // so we have no choice but to try and open the file with the pre 2007 JXL lib to see if it works. // If it does not work, it's an excel 2007 file. try { Workbook.getWorkbook(getFile()); return new ExcelResultSet(operator, this); } catch (Exception e) { return new Excel2007ResultSet(operator, this); } } } /** See class comment on {@link ExcelSheetTableModel} for a comment why that class is not used here. * In fact we are using a {@link DefaultPreview} here as well. */ @Override public TableModel makePreviewTableModel(ProgressListener listener) throws OperatorException { final DataResultSet resultSet = makeDataResultSet(null); try { return new DefaultPreview(resultSet, listener); } catch (ParseException e) { throw new UserError(null, 302, getFile().getPath(), e.getMessage()); } finally { if(resultSet != null) { resultSet.close(); } } } public void closeWorkbook() { if (workbookJXL != null) { workbookJXL.close(); workbookJXL = null; } try { if (workbookPOIInputStream != null) { workbookPOIInputStream.close(); } } catch (IOException e) { LogService.getRoot().log(Level.WARNING, I18N.getMessage(LogService.getRoot().getResourceBundle(), "com.rapidminer.operator.nio.model.ExcelResultSetConfiguration.close_workbook_error", e.getMessage()), e); } workbookPOI = null; workbookPOIInputStream = null; } @Override public void setParameters(AbstractDataResultSetReader source) { String range = Tools.getExcelColumnName(columnOffset) + (rowOffset + 1) + ":" + Tools.getExcelColumnName(columnLast) + (rowLast + 1); source.setParameter(ExcelExampleSource.PARAMETER_IMPORTED_CELL_RANGE, range); source.setParameter(PARAMETER_SHEET_NUMBER, String.valueOf(sheet + 1)); source.setParameter(ExcelExampleSource.PARAMETER_EXCEL_FILE, workbookFile.getAbsolutePath()); } public void parseExcelRange(String range) throws OperatorException { String[] split = range.split(":", 2); try { int[] topLeft = parseExcelCell(split[0]); columnOffset = topLeft[0]; rowOffset = topLeft[1]; if (split.length < 2) { rowLast = Integer.MAX_VALUE; columnLast = Integer.MAX_VALUE; } else { int[] bottomRight = parseExcelCell(split[1]); columnLast = bottomRight[0]; rowLast = bottomRight[1]; } } catch (OperatorException e) { throw new UserError(null, e, 223, range); } } private static int[] parseExcelCell(String string) throws OperatorException { int i = 0; int column = 0; int row = 0; while (i < string.length() && (Character.isLetter(string.charAt(i)))) { char c = string.charAt(i); c = Character.toUpperCase(c); if (c < 'A' || c > 'Z') throw new UserError(null, 224, string); column *= 26; column += (c - 'A') + 1; i++; } if (i < string.length()) { // at least one digit left String columnStr = string.substring(i); try { row = Integer.parseInt(columnStr); } catch (NumberFormatException e) { throw new UserError(null, 224, string); } } return new int[] { column - 1, row - 1 }; } @Override public String getResourceName() { return workbookFile.getAbsolutePath(); } @Override public ExampleSetMetaData makeMetaData() { final ExampleSetMetaData result = new ExampleSetMetaData(); if (rowLast != Integer.MAX_VALUE) { result.setNumberOfExamples(rowLast - rowOffset + 1); } return result; } /** * This returns whether the old naming style should be kept from prior to 5.1.000 versions. */ public boolean isEmulatingOldNames() { return isEmulatingOldNames; } @Override public void close() { if (workbookJXL != null) { workbookJXL.close(); } try { if (workbookPOIInputStream != null) { workbookPOIInputStream.close(); workbookPOIInputStream = null; } } catch (IOException e) { LogService.getRoot().log(Level.WARNING, I18N.getMessage(LogService.getRoot().getResourceBundle(), "com.rapidminer.operator.nio.model.ExcelResultSetConfiguration.close_workbook_error", e.getMessage()), e); } } /** * Creates the JXL workbook. * @throws BiffException * @throws IOException */ private void createWorkbookJXL() throws BiffException, IOException { File file = getFile(); WorkbookSettings workbookSettings = new WorkbookSettings(); if (encoding != null) { workbookSettings.setEncoding(encoding.name()); } workbookJXL = Workbook.getWorkbook(file, workbookSettings); } /** * Creates the POI workbook. * @throws InvalidFormatException * @throws IOException */ private void createWorkbookPOI() throws InvalidFormatException, IOException { workbookPOIInputStream = new FileInputStream(getFile()); try { workbookPOI = WorkbookFactory.create(workbookPOIInputStream); } catch (IllegalArgumentException e) { // Thrown if the selected file is not a valid .xlsx file at all throw new IOException(e.getMessage()); } catch (POIXMLException e) { // Thrown if the selected file is a partially broken .xlsx file throw new IOException(I18N.getMessage(I18N.getErrorBundle(), "import.excel.excel_file_broken")); } } /** * @return the timezone */ public String getTimezone() { return this.timezone; } /** * @return the datePattern */ public String getDatePattern() { return this.datePattern; } /** * @param timezone the timezone to set */ public void setTimezone(String timezone) { this.timezone = timezone; } /** * @param datePattern the datePattern to set */ public void setDatePattern(String datePattern) { this.datePattern = datePattern; } }