/**
* Copyright 2015 Eediom Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.araqne.logdb.xlsx;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.araqne.api.Io;
import org.araqne.log.api.Log;
import org.araqne.log.api.LogPipe;
import org.araqne.log.api.Logger;
import org.araqne.log.api.SimpleLog;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class XlsxExtractor {
private File file;
private String sheetName;
private Logger logger;
private LogPipe pipe;
private long offset;
private long limit;
private long skip;
private volatile boolean cancelled;
private SheetHandler handler;
public static List<String> getSheetNames(File f) throws IOException, InvalidFormatException {
List<String> sheetNames = new ArrayList<String>();
XSSFWorkbook wb = null;
try {
wb = new XSSFWorkbook(f);
for (int i = 0; i < wb.getNumberOfSheets(); i++) {
XSSFSheet sheet = wb.getSheetAt(i);
sheetNames.add(sheet.getSheetName());
}
return sheetNames;
} finally {
if (wb != null)
wb.close();
}
}
public XlsxExtractor(File file, String sheetName, Logger logger, LogPipe pipe, long offset, long limit, long skip) {
this.file = file;
this.sheetName = sheetName;
this.logger = logger;
this.pipe = pipe;
this.offset = offset;
this.limit = limit;
this.skip = skip;
}
public long getTotalCount() {
return handler != null ? handler.totalCount : 0;
}
public long getOutputCount() {
return handler != null ? handler.outputCount : 0;
}
public void cancel() {
cancelled = true;
}
public void run() throws IOException, SAXException, OpenXML4JException {
OPCPackage pkg = null;
XSSFWorkbook wb = null;
InputStream is = null;
try {
pkg = OPCPackage.open(file);
wb = new XSSFWorkbook(file);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XSSFSheet sheet = wb.getSheet(sheetName);
String rId = wb.getRelationId(sheet);
is = r.getSheet(rId);
InputSource sheetSource = new InputSource(is);
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
handler = new SheetHandler(sst);
parser.setContentHandler(handler);
parser.parse(sheetSource);
// last line (consider header line for offset)
if (++handler.lineIndex > skip + offset + 1) {
Log log = new SimpleLog(new Date(), logger == null ? null : logger.getFullName(), handler.data);
pipe.onLog(logger, log);
handler.outputCount++;
}
handler.totalCount++;
} catch (SAXException e) {
if (e.getMessage() == null || !e.getMessage().contains("max limit"))
throw e;
} finally {
Io.ensureClose(is);
Io.ensureClose(pkg);
Io.ensureClose(wb);
}
}
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String value;
private boolean nextIsString;
private List<String> headers = new ArrayList<String>();
// current line number
private long lineIndex;
// current column number
private int columnIndex;
private long totalCount;
private long outputCount;
private Map<String, Object> data = new HashMap<String, Object>();
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if (cancelled)
throw new SAXException("cancelled");
// c => cell
if (name.equals("c")) {
// Print the cell reference
String cellId = attributes.getValue("r");
int lineNum = getLineNum(cellId) - 1;
if (lineIndex != lineNum) {
columnIndex = 0;
if (lineIndex >= skip)
totalCount++;
}
if (lineIndex > skip + offset && lineIndex != lineNum) {
Log log = new SimpleLog(new Date(), logger == null ? null : logger.getFullName(), data);
pipe.onLog(logger, log);
outputCount++;
data = new HashMap<String, Object>();
if (limit <= outputCount)
throw new SAXException("max limit reached");
}
lineIndex = lineNum;
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
nextIsString = (cellType != null && cellType.equals("s"));
}
// Clear contents cache
value = null;
}
private int getLineNum(String cellId) {
int len = cellId.length();
int lineNum = 0;
// skip alphabet
int i = 0;
for (i = 0; i < len; i++) {
char c = cellId.charAt(i);
if (c >= '0' && c <= '9')
break;
}
for (; i < len; i++) {
lineNum *= 10;
char c = cellId.charAt(i);
if (c >= '0' && c <= '9')
lineNum += c - '0';
}
return lineNum;
}
public void endElement(String uri, String localName, String name) throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(value);
value = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
if (lineIndex == skip) {
if (value == null || value.trim().isEmpty())
value = "column" + columnIndex;
headers.add(value);
} else {
if (columnIndex < headers.size()) {
String key = headers.get(columnIndex);
data.put(key, value);
} else {
data.put("column" + columnIndex, value);
}
}
columnIndex++;
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
if (value == null)
value = new String(ch, start, length);
else
value += new String(ch, start, length);
}
}
}