/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.nio.model.xlsx; import java.util.LinkedList; import java.util.List; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import com.rapidminer.operator.nio.model.xlsx.XlsxWorkbookParser.XlsxWorkbook; /** * SAX parser for XLSX Workbook. * * @see ECMA-376, 4th Edition, 18.2 Workbook (pp. 1537 ff.) * * @author Adrian Wilke, Nils Woehler * @since 6.3.0 */ public class XlsxWorkbookParser extends AbstractXlsxSAXHandler<XlsxWorkbook> { /** * Container for XLSX workbook. * * @see ECMA-376, 4th Edition, 18.2 Workbook (pp. 1537 ff.) */ public static final class XlsxWorkbook { /** List of parsed Workbook Sheet elements */ public List<XlsxWorkbookSheet> xlsxWorkbookSheets = new LinkedList<>(); /** * A boolean value that indicates whether the date systems used in the workbook starts in * 1904. * * The default value is false, meaning that the workbook uses the 1900 date system, where * 1/1/1900 is the first day in the system.. */ public boolean isDate1904 = false; } /** * Container for XLSX workbook sheets. * * @see ECMA-376, 4th Edition, 18.2.19 sheet (pp. 1563 ff.) */ public static final class XlsxWorkbookSheet { /** Sheet name (required) */ public String name; /** Relationship ID (required) */ public String rId; /** Sheet Tab ID (required) */ public int sheetId; } /** Path of the embedded workbook file */ private static final String FILE_WORKBOOK = "xl/workbook.xml"; private static final String ATT_SHEET_ID = "sheetId"; private static final String ATT_SHEET_NAME = "name"; private static final String ATT_SHEET_RID = "r:id"; private static final String TAG_SHEET = "sheet"; /** * This element defines a collection of workbook properties. <br/> * [Example: * * <pre> * {@code * <workbookPr showObjects="none" saveExternalLinkValues="0" defaultThemeVersion="123820"/> * } * </pre> * * end example] */ private static final String TAG_WORKBOOK_PR = "workbookPr"; /** * Value that indicates whether to use a 1900 or 1904 date system when converting serial * date-times in the workbook to dates. <br/> * A value of 1 or true indicates the workbook uses the 1904 date system. <br/> * A value of 0 or false indicates the workbook uses the 1900 date system. <br/> * (See 18.17.4.1 for the definition of the date systems.) <br/> * The default value for this attribute is false. */ private static final String ATT_DATE_1904 = "date1904"; /** The container to access results */ private final XlsxWorkbook xlsxWorkbook = new XlsxWorkbook(); @Override public XlsxWorkbook getResult() { return xlsxWorkbook; } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (qName.equals(TAG_WORKBOOK_PR)) { String date1904 = attributes.getValue(ATT_DATE_1904); if (date1904 != null) { /* * Even though the specification says only 0/1 are allowed as date1904 value some * writers (e.g. Write Excel) write true/false instead so we need to check whether * we have a digit or a string. */ if (date1904.length() == 1 && Character.isDigit(date1904.charAt(0))) { xlsxWorkbook.isDate1904 = Integer.parseInt(date1904) == 1; } else { xlsxWorkbook.isDate1904 = Boolean.parseBoolean(date1904); } } } else if (qName.equals(TAG_SHEET)) { XlsxWorkbookSheet sheet = new XlsxWorkbookSheet(); sheet.name = attributes.getValue(ATT_SHEET_NAME); sheet.rId = attributes.getValue(ATT_SHEET_RID); String sheetId = attributes.getValue(ATT_SHEET_ID); if (sheetId != null) { sheet.sheetId = Integer.parseInt(sheetId); } xlsxWorkbook.xlsxWorkbookSheets.add(sheet); } } @Override protected String getZipEntryPath() { return FILE_WORKBOOK; } }