/*! ****************************************************************************** * * Pentaho Data Integration * * Copyright (C) 2002-2015 by Pentaho : http://www.pentaho.com * ******************************************************************************* * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ******************************************************************************/ /** * Author = Shailesh Ahuja */ package org.pentaho.di.trans.steps.excelinput.staxpoi; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.pentaho.di.core.exception.KettleException; import org.pentaho.di.core.logging.KettleLogStore; import org.pentaho.di.core.logging.LogChannelInterface; import org.pentaho.di.core.spreadsheet.KSheet; import org.pentaho.di.core.spreadsheet.KWorkbook; /** * Streaming reader for XLSX files.<br> * Does not open XLS. */ public class StaxPoiWorkbook implements KWorkbook { private static final String RELATION_NS_URI = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"; private LogChannelInterface log; private XSSFReader reader; // maintain the mapping of the sheet name to its ID private Map<String, String> sheetNameIDMap; // sheet names in order private String[] sheetNames; // mapping of the sheet object with its ID/Name private Map<String, StaxPoiSheet> openSheetsMap; private OPCPackage opcpkg; protected StaxPoiWorkbook() { openSheetsMap = new HashMap<String, StaxPoiSheet>(); this.log = KettleLogStore.getLogChannelInterfaceFactory().create( this ); } public StaxPoiWorkbook( String filename, String encoding ) throws KettleException { this(); try { opcpkg = OPCPackage.open( filename ); openFile( opcpkg, encoding ); } catch ( Exception e ) { throw new KettleException( e ); } } public StaxPoiWorkbook( InputStream inputStream, String encoding ) throws KettleException { this(); try { opcpkg = OPCPackage.open( inputStream ); openFile( opcpkg, encoding ); } catch ( Exception e ) { throw new KettleException( e ); } } private void openFile( OPCPackage pkg, String encoding ) throws KettleException { InputStream workbookData = null; XMLStreamReader workbookReader = null; try { reader = new XSSFReader( pkg ); sheetNameIDMap = new LinkedHashMap<String, String>(); workbookData = reader.getWorkbookData(); XMLInputFactory factory = XMLInputFactory.newInstance(); workbookReader = factory.createXMLStreamReader( workbookData ); while ( workbookReader.hasNext() ) { if ( workbookReader.next() == XMLStreamConstants.START_ELEMENT && workbookReader.getLocalName().equals( "sheet" ) ) { String sheetName = workbookReader.getAttributeValue( null, "name" ); String sheetID = workbookReader.getAttributeValue( RELATION_NS_URI, "id" ); sheetNameIDMap.put( sheetName, sheetID ); } } sheetNames = new String[ sheetNameIDMap.size() ]; int i = 0; for ( String sheetName : sheetNameIDMap.keySet() ) { sheetNames[i++] = sheetName; } } catch ( Exception e ) { throw new KettleException( e ); } finally { if ( workbookReader != null ) { try { workbookReader.close(); } catch ( XMLStreamException e ) { throw new KettleException( e ); } } if ( workbookData != null ) { try { workbookData.close(); } catch ( IOException e ) { throw new KettleException( e ); } } } } @Override /** * return the same sheet if it already is created otherwise instantiate a new one */ public KSheet getSheet( String sheetName ) { String sheetID = sheetNameIDMap.get( sheetName ); if ( sheetID == null ) { return null; } StaxPoiSheet sheet = openSheetsMap.get( sheetID ); if ( sheet == null ) { try { sheet = new StaxPoiSheet( reader, sheetName, sheetID ); openSheetsMap.put( sheetID, sheet ); } catch ( Exception e ) { log.logError( sheetName, e ); } } return sheet; } @Override public String[] getSheetNames() { String[] sheets = new String[sheetNameIDMap.size()]; return sheetNameIDMap.keySet().toArray( sheets ); } @Override public void close() { // close all the sheets for ( StaxPoiSheet sheet : openSheetsMap.values() ) { try { sheet.close(); } catch ( IOException e ) { log.logError( "Could not close workbook", e ); } catch ( XMLStreamException e ) { log.logError( "Could not close xmlstream", e ); } } if ( opcpkg != null ) { //We should not save change in xlsx because it is input step. opcpkg.revert(); } } @Override public int getNumberOfSheets() { return sheetNameIDMap.size(); } @Override public KSheet getSheet( int sheetNr ) { if ( sheetNr >= 0 && sheetNr < sheetNames.length ) { return getSheet( sheetNames[sheetNr] ); } return null; } @Override public String getSheetName( int sheetNr ) { if ( sheetNr >= 0 && sheetNr < sheetNames.length ) { return sheetNames[sheetNr]; } return null; } }