/*******************************************************************************
* Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com)
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser Public License 3.0
* which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/lgpl.html
*
******************************************************************************/
package com.opendoorlogistics.core.tables.io;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import com.opendoorlogistics.api.ExecutionReport;
import com.opendoorlogistics.api.ODLApi;
import com.opendoorlogistics.api.components.ProcessingApi;
import com.opendoorlogistics.api.tables.ODLColumnType;
import com.opendoorlogistics.api.tables.ODLDatastoreAlterable;
import com.opendoorlogistics.api.tables.ODLTable;
import com.opendoorlogistics.api.tables.ODLTableAlterable;
import com.opendoorlogistics.api.tables.ODLTableDefinition;
import com.opendoorlogistics.api.tables.ODLTableDefinitionAlterable;
import com.opendoorlogistics.api.tables.ODLTableReadOnly;
import com.opendoorlogistics.core.scripts.execution.ExecutionReportImpl;
import com.opendoorlogistics.core.tables.ODLFactory;
import com.opendoorlogistics.core.tables.io.PoiIO.SchemaSheetInformation;
import com.opendoorlogistics.core.tables.io.SchemaIO.SchemaColumnDefinition;
import com.opendoorlogistics.core.tables.utils.DatastoreCopier;
import com.opendoorlogistics.core.utils.UpdateTimer;
import com.opendoorlogistics.core.utils.strings.Strings;
/**
* Based on example at http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
* Also http://blogs.msdn.com/b/brian_jones/archive/2007/05/29/simple-spreadsheetml-file-part-3-formatting.aspx
* @author Phil
*
*/
public class XmlParserLoader {
private final UpdateTimer timer = new UpdateTimer(250);
private final ExecutionReport report;
private final File file;
private final ODLDatastoreAlterable<ODLTableAlterable> ds;
private final ProcessingApi processingApi;
private String baseMessage;
private XmlParserLoader(File file, ODLDatastoreAlterable<ODLTableAlterable> ds,ProcessingApi processingApi,ExecutionReport report ){
this.file = file;
this.ds = ds;
this.report = report;
this.processingApi = processingApi;
}
private void throwIfUserQuit(){
if(processingApi!=null && processingApi.isCancelled()){
throw new RuntimeException("User cancelled Excel loading.");
}
}
private XMLReader createSheetParser(ContentHandler handler) {
try {
XMLReader parser =
XMLReaderFactory.createXMLReader(
"org.apache.xerces.parsers.SAXParser"
);
parser.setContentHandler(handler);
return parser;
} catch (Exception e) {
throw new RuntimeException(e);
}
// ContentHandler handler2 =
// new XSSFSheetXMLHandler(styles, sst, new SheetContentsHandler(){
//
// @Override
// public void startRow(int rowNum) {
// // TODO Auto-generated method stub
//
// }
//
// @Override
// public void endRow() {
// // TODO Auto-generated method stub
//
// }
//
// @Override
// public void cell(String cellReference, String formattedValue) {
// System.out.println(cellReference + " " + formattedValue);
// }
//
// @Override
// public void headerFooter(String text, boolean isHeader, String tagName) {
// // TODO Auto-generated method stub
//
// }}
// , true);
}
//SchemaIO schema#
private class ReadSchemaSheet implements SheetContentsHandler{
private TreeMap<Integer, TreeMap<Integer, String>> rowsMap = new TreeMap<>();
private TreeMap<Integer, String> currentRow ;
@Override
public void startRow(int rowNum) {
currentRow = new TreeMap<>();
rowsMap.put(rowNum, currentRow);
}
@Override
public void endRow(int row) {
// TODO Auto-generated method stub
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
CellReference ref = new CellReference(cellReference);
currentRow.put((int)ref.getCol(), formattedValue);
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// TODO Auto-generated method stub
}
SchemaSheetInformation finish(ExecutionReport report){
// remove any totally empty rows
Iterator<Map.Entry<Integer, TreeMap<Integer, String>>> it = rowsMap.entrySet().iterator();
while(it.hasNext()){
Map.Entry<Integer, TreeMap<Integer, String>> row = it.next();
boolean remove=false;
TreeMap<Integer, String> r= row.getValue();
if(r.size()==0){
remove = true;
}
if(!remove){
remove = true;
for(String v : r.values()){
if(!Strings.isEmpty(v)){
remove = false;
break;
}
}
}
if(remove){
it.remove();
}
}
class ConsecutiveRows extends ArrayList<Map.Entry<Integer, TreeMap<Integer, String>>>{
ODLTableReadOnly read(String tableName){
// create table definition
ReadTableDefinition dfnHandler = new ReadTableDefinition(tableName, null,false);
process(dfnHandler);
ODLTableDefinition dfn = dfnHandler.createTableDefinition();
// read values
ODLDatastoreAlterable<ODLTableAlterable> dsDfn = ODLFactory.createAlterable();
ODLTableAlterable table = dsDfn.createTable(tableName, -1);
DatastoreCopier.copyTableDefinition(dfn, table);
process(new ReadIntoTableHandler(table, dfnHandler.getHeaderRow()+1, Integer.MAX_VALUE));
return table;
}
void process(SheetContentsHandler handler){
for(Map.Entry<Integer, TreeMap<Integer, String>> row:this){
handler.startRow(row.getKey());
for(Map.Entry<Integer, String> colVal : row.getValue().entrySet()){
CellReference ref = new CellReference(row.getKey(), colVal.getKey());
handler.cell(ref.formatAsString(), colVal.getValue(),null);
}
handler.endRow(row.getKey());
}
}
}
// split into tables of consecutive rows
ArrayList<ConsecutiveRows> split = new ArrayList<>();
it = rowsMap.entrySet().iterator();
ConsecutiveRows current=null;
while(it.hasNext()){
Map.Entry<Integer, TreeMap<Integer, String>> row = it.next();
if(current == null || (current.size() > 0 && current.get(current.size()-1).getKey() != row.getKey()-1)){
current = new ConsecutiveRows();
split.add(current);
}
current.add(row);
}
// process each one
ArrayList<ODLTableReadOnly> tables = new ArrayList<>();
for(ConsecutiveRows cr : split){
tables.add(cr.read("SchemaTable"));
}
return PoiIO.readSchemaFromODLTables(tables, report);
}
}
private static class ColInfo{
SchemaColumnDefinition dfn;
ColumnTypeEstimator estimator;
String name;
}
/**
* Handler which estimates the column types or gets them from the schema
* @author Phil
*
*/
private class ReadTableDefinition implements SheetContentsHandler{
private final String sheetName;
private final SchemaIO schemaIO;
private int headerState=-1;
private final ArrayList<ColInfo> colInfos=new ArrayList<>();
private final boolean useEstimators;
private int headerRow;
ReadTableDefinition(String sheetName, SchemaIO schema, boolean useEstimators) {
this.sheetName = sheetName;
this.schemaIO = schema;
this.useEstimators = useEstimators;
}
@Override
public void startRow(int rowNum) {
throwIfUserQuit();
if(headerState==-1){
// now on header row
headerState = 0;
headerRow = rowNum;
}
if(timer.isUpdate()){
postStatus("row " + (rowNum+1));
}
}
int getHeaderRow() {
return headerRow;
}
@Override
public void endRow(int row) {
if(headerState == 0){
// if finished header row, fill in any missing column infos
for(int i =0 ; i< colInfos.size();i++){
if(colInfos.get(i)==null){
colInfos.set(i, new ColInfo());
if(useEstimators){
colInfos.get(i).estimator = new ColumnTypeEstimator();
}
}
}
}
// and flag that we're now on the data
headerState = 1;
}
@Override
public void cell(String cellReference, String formattedValue,XSSFComment comment) {
CellReference ref = new CellReference(cellReference);
int col = ref.getCol();
if(headerState==0){
// init colinfo for this column
while(col>=colInfos.size()){
colInfos.add(new ColInfo());
}
ColInfo info = colInfos.get(col);
if(schemaIO!=null && Strings.isEmpty(formattedValue)==false){
info.dfn = schemaIO.findDefinition(sheetName,formattedValue);
}
if(info.dfn==null && useEstimators){
info.estimator = new ColumnTypeEstimator();
}
info.name = formattedValue;
}else if (headerState==1){
if(!Strings.isEmpty(formattedValue)){
if(col < colInfos.size()){
ColInfo info = colInfos.get(col);
if(info.estimator!=null){
info.estimator.processValue(formattedValue);
}
}
}
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// TODO Auto-generated method stub
}
ODLTableDefinition createTableDefinition(){
ODLDatastoreAlterable<ODLTableAlterable> dsDfn = ODLFactory.createAlterable();
ODLTableDefinitionAlterable ret= dsDfn.createTable(sheetName, -1);
for(int i =0 ; i <colInfos.size();i++){
ColInfo info = colInfos.get(i);
String name = PoiIO.getValidNewColumnName(info.name, ret);
if(info.dfn!=null){
PoiIO.addColumnFromDfn(info.dfn, name, i, ret);
}else{
ret.addColumn(i, name,useEstimators? info.estimator.getEstimatedType():ODLColumnType.STRING, 0);
}
}
return ret;
}
}
/**
* Handler to read into predefined table
* @author Phil
*
*/
private class ReadIntoTableHandler implements SheetContentsHandler{
private final ODLTable table;
private final int minRowInclusive;
private final int maxRowExclusive;
private int currentOutputRow=-1;
private ReadIntoTableHandler(ODLTable table, int minRowInclusive, int maxRowExclusive) {
this.table = table;
this.minRowInclusive = minRowInclusive;
this.maxRowExclusive = maxRowExclusive;
}
@Override
public void startRow(int rowNum) {
throwIfUserQuit();
if(rowNum>=minRowInclusive && rowNum < maxRowExclusive){
currentOutputRow = table.createEmptyRow(-1);
}else{
currentOutputRow = -1;
}
if(timer.isUpdate()){
postStatus("row " + (rowNum+1));
}
}
@Override
public void endRow(int row) {
}
@Override
public void cell(String cellReference, String formattedValue,XSSFComment comment) {
if(currentOutputRow!=-1){
CellReference ref = new CellReference(cellReference);
int col = ref.getCol();
if(col < table.getColumnCount()){
table.setValueAt(formattedValue, currentOutputRow, col);
}
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// TODO Auto-generated method stub
}
}
private SchemaSheetInformation importSchema(XSSFReader r,StylesTable styles ,ReadOnlySharedStringsTable sst ){
try {
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.getSheetsData();
while(it.hasNext()) {
try(InputStream sheet = it.next()){
String name = it.getSheetName();
if(Strings.equalsStd(PoiIO.SCHEMA_SHEET_NAME, name)){
InputSource sheetSource = new InputSource(sheet);
ReadSchemaSheet readSchemaSheet = new ReadSchemaSheet();
parseSheet(styles, sst, sheetSource, readSchemaSheet);
return readSchemaSheet.finish(report);
}
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return null;
}
private void parseSheet(StylesTable styles, ReadOnlySharedStringsTable sst, InputSource sheetSource, SheetContentsHandler handler) {
try {
createSheetParser( new XSSFSheetXMLHandler(styles, sst, handler, false)).parse(sheetSource);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void postStatus(String s){
if(processingApi!=null && baseMessage!=null){
processingApi.postStatusMessage(baseMessage + (s!=null? " - " + s:""));
}
}
private void doImport() {
if(!file.exists()){
throw new RuntimeException("Excel file does not exist: " + file.getAbsolutePath());
}
OPCPackage pkg = null;
try {
pkg =OPCPackage.open(file);
importOPCPackage(pkg);
// revert for read-only closing
pkg.revert();
if(processingApi!=null){
processingApi.postStatusMessage("Finished loading, now opening file...");
}
} catch (Exception e) {
if(pkg!=null){
// revert for read-only closing
pkg.revert();
}
report.setFailed(e);
throw new RuntimeException(e);
}
}
private void doImport(InputStream stream) {
try (OPCPackage pkg =OPCPackage.open(stream)){
importOPCPackage(pkg);
} catch (Exception e) {
report.setFailed(e);
throw new RuntimeException(e);
}
}
/**
* @param pkg
* @throws IOException
* @throws OpenXML4JException
* @throws InvalidFormatException
* @throws SAXException
*/
private void importOPCPackage(OPCPackage pkg) throws IOException, OpenXML4JException, InvalidFormatException, SAXException {
XSSFReader r = new XSSFReader( pkg );
StylesTable styles = r.getStylesTable();
ReadOnlySharedStringsTable sst =new ReadOnlySharedStringsTable(pkg);
SchemaSheetInformation schema = importSchema(r,styles ,sst );
// read table definitions first
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) r.getSheetsData();
ArrayList<Integer> tableIndices = new ArrayList<>();
ArrayList<Integer> headerRows = new ArrayList<>();
while(it.hasNext()) {
try(InputStream sheet = it.next()){
String name = it.getSheetName();
if(!Strings.equalsStd(PoiIO.SCHEMA_SHEET_NAME, name)){
baseMessage = "Loading Excel, analysing sheet " + name;
postStatus(null);
InputSource sheetSource = new InputSource(sheet);
ReadTableDefinition rtd = new ReadTableDefinition(name,schema!=null? schema.schema:null,true);
parseSheet(styles, sst, sheetSource, rtd);
tableIndices.add(ds.getTableCount());
headerRows.add(rtd.getHeaderRow());
DatastoreCopier.copyTableDefinition(rtd.createTableDefinition(), ds);
}
}
}
// Then actual tables
it = (XSSFReader.SheetIterator) r.getSheetsData();
int i =0 ;
while(it.hasNext()) {
try(InputStream sheet = it.next()){
InputSource sheetSource = new InputSource(sheet);
String name = it.getSheetName();
if(!Strings.equalsStd(PoiIO.SCHEMA_SHEET_NAME, name)){
baseMessage = "Loading sheet " + name + " into memory";
postStatus(null);
ODLTable table = ds.getTableAt(i);
int headerRow = headerRows.get(i);
ReadIntoTableHandler readerHandler = new ReadIntoTableHandler(table, headerRow+1, Integer.MAX_VALUE);
parseSheet(styles, sst, sheetSource, readerHandler);
i++;
}
}
}
}
static void importExcel(InputStream stream, ODLDatastoreAlterable<ODLTableAlterable> ds,ProcessingApi processingApi, ExecutionReport report) {
new XmlParserLoader(null, ds, processingApi, report).doImport(stream);
}
static ODLDatastoreAlterable<ODLTableAlterable> importExcel(File file,ProcessingApi processingApi, ExecutionReport report) {
ODLDatastoreAlterable<ODLTableAlterable> ret = ODLFactory.createAlterable();
importExcel(file, ret,processingApi, report);
return ret;
}
static void importExcel(File file, ODLDatastoreAlterable<ODLTableAlterable> ds,ProcessingApi processingApi, ExecutionReport report) {
new XmlParserLoader(file,ds,processingApi,report).doImport();
}
public static void main(String[] args) throws Exception {
ODLDatastoreAlterable<ODLTableAlterable> ret = ODLFactory.createAlterable();
ExecutionReport report = new ExecutionReportImpl();
//File file = new File("C:\\Users\\Phil\\Dropbox\\Business\\ODL\\Testing\\VRP\\demoVRP.xlsx");
// File file = new File("C:\\temp\\TestFromLibreOffice.xlsx");
File file = new File("C:\\temp\\testloading.xlsx");
System.out.println("Started loading");
importExcel(file, ret,new ProcessingApi() {
@Override
public ODLApi getApi() {
// TODO Auto-generated method stub
return null;
}
@Override
public boolean isFinishNow() {
// TODO Auto-generated method stub
return false;
}
@Override
public boolean isCancelled() {
// TODO Auto-generated method stub
return false;
}
@Override
public void postStatusMessage(String s) {
System.out.println(s);
}
@Override
public void logWarning(String warning) {
// TODO Auto-generated method stub
}
}, report);
System.out.println("Finished loading");
System.out.println(ret);
// XmlParserLoader example = new XmlParserLoader();
// example.processOneSheet(args[0]);
// example.processAllSheets("C:\\Users\\Phil\\Dropbox\\Business\\ODL\\Testing\\VRP\\demoVRP.xlsx");
//example.processAllSheets("C:\\temp\\NumberFormatting.xlsx");
}
}