/* * RapidMiner * * Copyright (C) 2001-2014 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.io; import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.nio.charset.Charset; import java.util.Date; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Locale; import jxl.Workbook; import jxl.WorkbookSettings; import jxl.write.DateFormat; import jxl.write.DateTime; import jxl.write.Label; import jxl.write.Number; import jxl.write.NumberFormat; import jxl.write.WritableCellFormat; import jxl.write.WritableFont; import jxl.write.WritableSheet; import jxl.write.WritableWorkbook; import jxl.write.WriteException; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.CellStyle; import org.apache.poi.ss.usermodel.CreationHelper; import org.apache.poi.ss.usermodel.Font; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.util.WorkbookUtil; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDateFormat; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.conditions.EqualTypeCondition; import com.rapidminer.tools.DateParser; import com.rapidminer.tools.I18N; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.io.Encoding; /** * <p>This operator can be used to write data into Microsoft Excel spreadsheets. * This operator creates Excel files readable by Excel 95, 97, 2000, XP, 2003 * and newer. Missing data values are indicated by empty cells.</p> * * @author Ingo Mierswa, Nils Woehler */ public class ExcelExampleSetWriter extends AbstractStreamWriter { private static final String RAPID_MINER_DATA = "RapidMiner Data"; /** The parameter name for "The Excel spreadsheet file which should be written." */ public static final String PARAMETER_EXCEL_FILE = "excel_file"; public static final String FILE_FORMAT_XLS = "xls"; public static final String FILE_FORMAT_XLSX = "xlsx"; public static final String[] FILE_FORMAT_CATEGORIES = new String[] { FILE_FORMAT_XLS, FILE_FORMAT_XLSX }; public static final int FILE_FORMAT_XLS_INDEX = 0; public static final int FILE_FORMAT_XLSX_INDEX = 1; public static final String PARAMETER_FILE_FORMAT = "file_format"; public static final String PARAMETER_DATE_FORMAT = "date_format"; public static final String PARAMETER_NUMBER_FORMAT = "number_format"; public static final String PARAMETER_SHEET_NAME = "sheet_name"; public static final String DEFAULT_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss"; public static final String DEFAULT_NUMBER_FORMAT = "#.0"; public ExcelExampleSetWriter(OperatorDescription description) { super(description); } /** * Writes the example set into a excel file with XLS format. * If you want to write it in XLSX format use {@link #writeXLSX(ExampleSet, String, String, String, OutputStream)} */ public static void write(ExampleSet exampleSet, Charset encoding, OutputStream out) throws IOException, WriteException { try { // .xls files can only store up to 256 columns, so throw error in case of more if (exampleSet.getAttributes().allSize() > 256) { throw new IllegalArgumentException(I18N.getMessage(I18N.getErrorBundle(), "export.excel.excel_xls_file_exceeds_column_limit")); } WorkbookSettings ws = new WorkbookSettings(); ws.setEncoding(encoding.name()); ws.setLocale(Locale.US); WritableWorkbook workbook = Workbook.createWorkbook(out, ws); WritableSheet s = workbook.createSheet(RAPID_MINER_DATA, 0); writeDataSheet(s, exampleSet); workbook.write(); workbook.close(); } finally { try { out.close(); } catch (Exception e) { // silent. exception will trigger warning anyway } } } private static void writeDataSheet(WritableSheet s, ExampleSet exampleSet) throws WriteException { // Format the Font WritableFont wf = new WritableFont(WritableFont.ARIAL, 10, WritableFont.BOLD); WritableCellFormat cf = new WritableCellFormat(wf); Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); int counter = 0; while (a.hasNext()) { Attribute attribute = a.next(); s.addCell(new Label(counter++, 0, attribute.getName(), cf)); } NumberFormat nf = new NumberFormat(DEFAULT_NUMBER_FORMAT); WritableCellFormat nfCell = new WritableCellFormat(nf); WritableFont wf2 = new WritableFont(WritableFont.ARIAL, 10, WritableFont.NO_BOLD); WritableCellFormat cf2 = new WritableCellFormat(wf2); DateFormat df = new DateFormat(DateParser.DEFAULT_DATE_TIME_FORMAT); WritableCellFormat dfCell = new WritableCellFormat(df); int rowCounter = 1; for (Example example : exampleSet) { a = exampleSet.getAttributes().allAttributes(); int columnCounter = 0; while (a.hasNext()) { Attribute attribute = a.next(); if (!Double.isNaN(example.getValue(attribute))) { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NOMINAL)) { s.addCell(new Label(columnCounter, rowCounter, replaceForbiddenChars(example.getValueAsString(attribute)), cf2)); } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { DateTime dateTime = new DateTime(columnCounter, rowCounter, new Date((long) example.getValue(attribute)), dfCell); s.addCell(dateTime); } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) { Number number = new Number(columnCounter, rowCounter, example.getValue(attribute), nfCell); s.addCell(number); } else { // default: write as a String s.addCell(new Label(columnCounter, rowCounter, replaceForbiddenChars(example.getValueAsString(attribute)), cf2)); } } columnCounter++; } rowCounter++; } } private static String replaceForbiddenChars(String originalValue) { return originalValue.replace((char) 0, ' '); } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(makeFileParameterType()); types.add(new ParameterTypeCategory(PARAMETER_FILE_FORMAT, "Defines the file format the excel file should be saved as.", FILE_FORMAT_CATEGORIES, FILE_FORMAT_XLS_INDEX, false)); List<ParameterType> encodingTypes = Encoding.getParameterTypes(this); for (ParameterType type : encodingTypes) { type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_FILE_FORMAT, FILE_FORMAT_CATEGORIES, false, new int[] { FILE_FORMAT_XLS_INDEX })); } types.addAll(encodingTypes); List<ParameterType> xlsxTypes = new LinkedList<ParameterType>(); ParameterTypeString sheetName = new ParameterTypeString(PARAMETER_SHEET_NAME, "The name of the created sheet. Note that sheet name is Excel must not exceed 31 characters.", RAPID_MINER_DATA); sheetName.setExpert(false); xlsxTypes.add(sheetName); xlsxTypes.add(new ParameterTypeDateFormat(PARAMETER_DATE_FORMAT, "The parse format of the date values. Default: for example \"yyyy-MM-dd HH:mm:ss\".", DEFAULT_DATE_FORMAT, true)); xlsxTypes.add(new ParameterTypeString(PARAMETER_NUMBER_FORMAT, "Specifies the number format for date entries. Default: \"#.0\"", DEFAULT_NUMBER_FORMAT, true)); for (ParameterType type : xlsxTypes) { type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_FILE_FORMAT, FILE_FORMAT_CATEGORIES, false, new int[] { FILE_FORMAT_XLSX_INDEX })); } types.addAll(xlsxTypes); return types; } @Override String[] getFileExtensions() { return new String[] { FILE_FORMAT_XLS, FILE_FORMAT_XLSX }; } @Override String getFileParameterName() { return PARAMETER_EXCEL_FILE; } @Override void writeStream(ExampleSet exampleSet, OutputStream outputStream) throws OperatorException { File file = getParameterAsFile(PARAMETER_EXCEL_FILE, true); if (getParameterAsString(PARAMETER_FILE_FORMAT).equals(FILE_FORMAT_XLSX)) { String dateFormat = isParameterSet(PARAMETER_DATE_FORMAT) ? getParameterAsString(PARAMETER_DATE_FORMAT) : null; String numberFormat = isParameterSet(PARAMETER_NUMBER_FORMAT) ? getParameterAsString(PARAMETER_NUMBER_FORMAT) : null; String sheetName = getParameterAsString(PARAMETER_SHEET_NAME); if (sheetName.length() > 31) { throw new UserError(this, "excel_sheet_name_too_long", sheetName, sheetName.length()); } try { writeXLSX(exampleSet, sheetName, dateFormat, numberFormat, outputStream); } catch (Exception e) { throw new UserError(this, 303, file.getName(), e.getMessage()); } } else { WorkbookSettings ws = new WorkbookSettings(); Charset encoding = Encoding.getEncoding(this); ws.setEncoding(encoding.name()); ws.setLocale(Locale.US); try { write(exampleSet, encoding, outputStream); } catch (Exception e) { throw new UserError(this, 303, file.getName(), e.getMessage()); } } } /** * Writes the example set into a excel file with XLSX format. * If you want to write it in XLS format use {@link #write(ExampleSet, Charset, OutputStream)}. */ public static void writeXLSX(ExampleSet exampleSet, String sheetName, String dateFormat, String numberFormat, OutputStream outputStream) throws WriteException, IOException { // .xlsx files can only store up to 16384 columns, so throw error in case of more if (exampleSet.getAttributes().allSize() > 16384) { throw new IllegalArgumentException(I18N.getMessage(I18N.getErrorBundle(), "export.excel.excel_xlsx_file_exceeds_column_limit")); } try { XSSFWorkbook workbook = new XSSFWorkbook(); Sheet sheet = workbook.createSheet(WorkbookUtil.createSafeSheetName(sheetName)); dateFormat = dateFormat == null ? DEFAULT_DATE_FORMAT : dateFormat; numberFormat = numberFormat == null ? "#.0" : numberFormat; writeXLSXDataSheet(workbook, sheet, dateFormat, numberFormat, exampleSet); workbook.write(outputStream); } finally { outputStream.flush(); outputStream.close(); } } private static void writeXLSXDataSheet(org.apache.poi.ss.usermodel.Workbook wb, Sheet sheet, String dateFormat, String numberFormat, ExampleSet exampleSet) throws WriteException { Font headerFont = wb.createFont(); headerFont.setBoldweight(Font.BOLDWEIGHT_BOLD); CellStyle headerStyle = wb.createCellStyle(); headerStyle.setFont(headerFont); // create the header Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); int columnCounter = 0; int rowCounter = 0; Row headerRow = sheet.createRow(rowCounter); while (a.hasNext()) { Attribute attribute = a.next(); Cell headerCell = headerRow.createCell(columnCounter); headerCell.setCellValue(attribute.getName()); headerCell.setCellStyle(headerStyle); columnCounter++; } rowCounter++; // body font Font bodyFont = wb.createFont(); bodyFont.setBoldweight(Font.BOLDWEIGHT_NORMAL); CreationHelper createHelper = wb.getCreationHelper(); // number format CellStyle numericalStyle = wb.createCellStyle(); numericalStyle.setDataFormat(createHelper.createDataFormat().getFormat(numberFormat)); numericalStyle.setFont(bodyFont); // date format CellStyle dateStyle = wb.createCellStyle(); dateStyle.setDataFormat(createHelper.createDataFormat().getFormat(dateFormat)); dateStyle.setFont(bodyFont); // create nominal cell style CellStyle nominalStyle = wb.createCellStyle(); nominalStyle.setFont(bodyFont); // fill body for (Example example : exampleSet) { // create new row Row bodyRow = sheet.createRow(rowCounter); // iterate over attributes and save examples a = exampleSet.getAttributes().allAttributes(); columnCounter = 0; while (a.hasNext()) { Attribute attribute = a.next(); Cell currentCell = bodyRow.createCell(columnCounter); if (!Double.isNaN(example.getValue(attribute))) { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { Date dateValue = example.getDateValue(attribute); currentCell.setCellValue(dateValue); currentCell.setCellStyle(dateStyle); } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.NUMERICAL)) { double numericalValue = example.getNumericalValue(attribute); currentCell.setCellValue(numericalValue); currentCell.setCellStyle(numericalStyle); } else { currentCell.setCellValue(replaceForbiddenChars(example.getValueAsString(attribute))); currentCell.setCellStyle(nominalStyle); } } columnCounter++; } rowCounter++; } } }