/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.metamodel.excel; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.List; import org.apache.poi.hssf.usermodel.HSSFDateUtil; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.FillPatternType; import org.apache.poi.ss.usermodel.FontUnderline; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFColor; import org.apache.poi.xssf.usermodel.XSSFFont; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.metamodel.data.Style; import org.apache.metamodel.data.Style.SizeUnit; import org.apache.metamodel.data.StyleBuilder; import org.apache.metamodel.util.DateUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * XML handler for transforming a sheet into rows. Uses an * {@link XlsxRowCallback} to publish identified rows. */ final class XlsxSheetToRowsHandler extends DefaultHandler { private static final Logger logger = LoggerFactory.getLogger(XlsxSheetToRowsHandler.class); private static enum XssfDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, } // global variables private final XlsxRowCallback _callback; private final ExcelConfiguration _configuration; private final StylesTable _stylesTable; private final SharedStringsTable _sharedStringTable; // variables used to hold information about the current rows private int _rowNumber; private final List<String> _rowValues; private final List<Style> _styles; // variables used to hold information about the current visited cells private final StringBuilder _value; private final StyleBuilder _style; private boolean _inCell; private boolean _inFormula; private int _columnNumber; private XssfDataType _dataType; private int _formatIndex; private String _formatString; public XlsxSheetToRowsHandler(XlsxRowCallback callback, XSSFReader xssfReader, ExcelConfiguration configuration) throws Exception { _callback = callback; _configuration = configuration; _sharedStringTable = xssfReader.getSharedStringsTable(); _stylesTable = xssfReader.getStylesTable(); _value = new StringBuilder(); _style = new StyleBuilder(); _rowValues = new ArrayList<String>(); _styles = new ArrayList<Style>(); _rowNumber = -1; _inCell = false; _inFormula = false; } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("row".equals(qName)) { // element is a row // excel row numbers are 1-based int rowNumber = Integer.parseInt(attributes.getValue("r")); rowNumber = rowNumber - 1; if (_configuration.isSkipEmptyLines()) { _rowNumber++; } else { while (_rowNumber + 1 < rowNumber) { // empty lines are not skipped, so dispatch empty lines _rowNumber++; List<String> emptyValues = Collections.emptyList(); List<Style> emptyStyles = Collections.emptyList(); _callback.row(_rowNumber, emptyValues, emptyStyles); } _rowNumber = rowNumber; } } else if ("c".equals(qName)) { // element is a cell _inCell = true; final String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } _columnNumber = nameToColumn(r.substring(0, firstDigit)); // Set up defaults. _dataType = XssfDataType.NUMBER; _formatIndex = -1; _formatString = null; final String cellType = attributes.getValue("t"); if ("b".equals(cellType)) { _dataType = XssfDataType.BOOL; } else if ("e".equals(cellType)) { _dataType = XssfDataType.ERROR; } else if ("inlineStr".equals(cellType)) { _dataType = XssfDataType.INLINESTR; } else if ("s".equals(cellType)) { _dataType = XssfDataType.SSTINDEX; } else if ("str".equals(cellType)) { _dataType = XssfDataType.FORMULA; } String cellStyleStr = attributes.getValue("s"); if (cellStyleStr != null) { // It's a number, but almost certainly one // with a special style or format int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = _stylesTable.getStyleAt(styleIndex); configureStyle(style); if (_dataType == XssfDataType.NUMBER) { this._formatIndex = style.getDataFormat(); this._formatString = style.getDataFormatString(); if (this._formatString == null) { this._formatString = BuiltinFormats.getBuiltinFormat(this._formatIndex); } } } } else if (_inCell && "f".equals(qName)) { // skip the actual formula line _inFormula = true; } } private void configureStyle(XSSFCellStyle style) { XSSFFont font = style.getFont(); if (font.getBold()) { _style.bold(); } if (font.getItalic()) { _style.italic(); } if (font.getUnderline() != FontUnderline.NONE.getByteValue()) { _style.underline(); } if (style.getFillPatternEnum() == FillPatternType.SOLID_FOREGROUND) { XSSFColor fillForegroundXSSFColor = style.getFillForegroundXSSFColor(); String argb = fillForegroundXSSFColor.getARGBHex(); if (argb != null) { _style.background(argb.substring(2)); } } final XSSFFont stdFont = _stylesTable.getStyleAt(0).getFont(); final short fontHeight = style.getFont().getFontHeightInPoints(); if (stdFont.getFontHeightInPoints() != fontHeight) { _style.fontSize(fontHeight, SizeUnit.PT); } XSSFColor fontColor = style.getFont().getXSSFColor(); if (fontColor != null) { String argbHex = fontColor.getARGBHex(); if (argbHex != null) { _style.foreground(argbHex.substring(2)); } } switch (style.getAlignmentEnum()) { case LEFT: _style.leftAligned(); break; case RIGHT: _style.rightAligned(); break; case CENTER: _style.centerAligned(); break; case JUSTIFY: _style.justifyAligned(); break; default: // do nothing break; } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("row".equals(qName)) { // element was a row boolean next = _callback.row(_rowNumber, _rowValues, _styles); if (!next) { throw new XlsxStopParsingException(); } _rowValues.clear(); _styles.clear(); } else if ("c".equals(qName)) { // element was a cell _inCell = false; while (_rowValues.size() < _columnNumber) { _rowValues.add(null); _styles.add(Style.NO_STYLE); } _rowValues.add(createValue()); _styles.add(_style.create()); _value.setLength(0); _style.reset(); } else if (_inFormula && "f".equals(qName)) { // skip the actual formula line _inFormula = false; } } private String createValue() { if (_value.length() == 0) { return null; } switch (_dataType) { case BOOL: char first = _value.charAt(0); return first == '0' ? "false" : "true"; case ERROR: logger.warn("Error-cell occurred: {}", _value); return _value.toString(); case FORMULA: return _value.toString(); case INLINESTR: XSSFRichTextString rtsi = new XSSFRichTextString(_value.toString()); return rtsi.toString(); case SSTINDEX: String sstIndex = _value.toString(); int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString(_sharedStringTable.getEntryAt(idx)); return rtss.toString(); case NUMBER: final String numberString = _value.toString(); if (_formatString != null) { DataFormatter formatter = getDataFormatter(); if (HSSFDateUtil.isADateFormat(_formatIndex, _formatString)) { Date date = DateUtil.getJavaDate(Double.parseDouble(numberString)); return DateUtils.createDateFormat().format(date); } return formatter.formatRawCellContents(Double.parseDouble(numberString), _formatIndex, _formatString); } else { if (numberString.endsWith(".0")) { // xlsx only stores doubles, so integers get ".0" appended // to them return numberString.substring(0, numberString.length() - 2); } return numberString; } default: logger.error("Unsupported data type: {}", _dataType); return ""; } } private DataFormatter getDataFormatter() { return new DataFormatter(); } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (_inCell && !_inFormula) { _value.append(ch, start, length); } } /** * Converts an Excel column name like "C" to a zero-based index. * * @param name * @return Index corresponding to the specified name */ private int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } }