/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service.document.file;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.lucene.document.Document;
import org.olat.core.gui.util.CSSHelper;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.io.LimitedContentWriter;
import org.olat.core.util.io.ShieldInputStream;
import org.olat.core.util.vfs.VFSLeaf;
import org.olat.search.service.SearchResourceContext;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
*
* Description:<br>
* Parse the Microsoft Office XML document (.pptx, .docx...) with a SAX parser
*
* <P>
* Initial Date: 5 nov. 2012<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*/
public class ExcelOOXMLDocument extends FileDocument {
private static final long serialVersionUID = 2322994231200065526L;
private static final OLog log = Tracing.createLoggerFor(ExcelOOXMLDocument.class);
private static final String SHEET = "xl/worksheets/sheet";
public final static String EXCEL_FILE_TYPE = "type.file.excel";
public static Document createDocument(SearchResourceContext leafResourceContext, VFSLeaf leaf) throws IOException, DocumentException,
DocumentAccessException {
ExcelOOXMLDocument officeDocument = new ExcelOOXMLDocument();
officeDocument.init(leafResourceContext, leaf);
officeDocument.setFileType(EXCEL_FILE_TYPE);
officeDocument.setCssIcon(CSSHelper.createFiletypeIconCssClassFor(leaf.getName()));
if (log.isDebug()) {
log.debug(officeDocument.toString());
}
return officeDocument.getLuceneDocument();
}
@Override
public FileContent readContent(VFSLeaf leaf) throws IOException, DocumentException {
//first step parse shared strings
Map<String,String> sharedStrings = parseSharedStrings(leaf);
//parse sheets
String content = parseSheets(sharedStrings, leaf);
return new FileContent(content);
}
private String parseSheets(Map<String,String> sharedStrings, VFSLeaf leaf) throws IOException, DocumentException {
try(InputStream stream = leaf.getInputStream();
ZipInputStream zip = new ZipInputStream(stream)) {
ZipEntry entry = zip.getNextEntry();
LimitedContentWriter writer = new LimitedContentWriter(100000, FileDocumentFactory.getMaxFileSize());
while (entry != null) {
if(writer.accept()) {
String name = entry.getName();
if(name.startsWith(SHEET) && name.endsWith(".xml")) {
OfficeDocumentHandler dh = new OfficeDocumentHandler(writer, sharedStrings);
parse(new ShieldInputStream(zip), dh);
}
}
entry = zip.getNextEntry();
}
return writer.toString();
} catch (DocumentException e) {
throw e;
} catch (Exception e) {
throw new DocumentException(e.getMessage());
}
}
private Map<String,String> parseSharedStrings( VFSLeaf leaf) throws IOException, DocumentException {
SharedStringsHandler dh = new SharedStringsHandler();
try(InputStream stream = leaf.getInputStream();
ZipInputStream zip = new ZipInputStream(stream)) {
ZipEntry entry = zip.getNextEntry();
while (entry != null) {
String name = entry.getName();
if(name.endsWith("xl/sharedStrings.xml")) {
parse(new ShieldInputStream(zip), dh);
break;
}
entry = zip.getNextEntry();
}
return dh.getMap();
} catch (DocumentException e) {
throw e;
} catch (Exception e) {
throw new DocumentException(e.getMessage());
}
}
private void parse(InputStream stream, DefaultHandler handler) throws DocumentException {
try {
XMLReader parser = XMLReaderFactory.createXMLReader();
parser.setContentHandler(handler);
parser.setEntityResolver(handler);
try {
parser.setFeature("http://xml.org/sax/features/validation", false);
} catch(Exception e) {
log.error("Cannot deactivate validation", e);
}
parser.parse(new InputSource(stream));
} catch (Exception e) {
throw new DocumentException("XML parser configuration error", e);
}
}
private static class OfficeDocumentHandler extends DefaultHandler {
private boolean row = false;
private boolean sharedStrings = false;
private final Map<String,String> strings;
private final LimitedContentWriter sb;
public OfficeDocumentHandler(LimitedContentWriter sb, Map<String,String> strings) {
this.sb = sb;
this.strings = strings;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if("row".equals(qName)) {
row = true;
} else if (row && "c".equals(qName)) {
String t = attributes.getValue("t");
if("s".equals(t)) {
sharedStrings = true;
}
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if("row".equals(qName)) {
row = false;
if(sb .length() > 0 && sb.charAt(sb.length() - 1) != '\n'){
sb.append('\n');
}
} else if (row && "c".equals(qName)) {
sharedStrings = false;
}
}
@Override
public void characters(char[] ch, int start, int length) {
if(sharedStrings) {
String key = new String(ch, start, length);
String value = strings.get(key);
if(value != null) {
if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
sb.append(' ');
}
sb.append(value);
}
} else {
if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
sb.append(' ');
}
sb.write(ch, start, length);
}
}
}
private class SharedStringsHandler extends DefaultHandler {
private int position = 0;
private StringBuilder sb;
private Map<String,String> strings = new HashMap<String,String>();
public Map<String,String> getMap() {
return strings;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if("t".equals(qName)) {
sb = new StringBuilder();
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if("si".equals(qName)) {
String string = sb.toString().trim();
strings.put(Integer.toString(position), string);
position++;
}
}
@Override
public void characters(char[] ch, int start, int length) {
if(sb .length() > 0 && sb.charAt(sb.length() - 1) != ' '){
sb.append(' ');
}
sb.append(ch, start, length);
}
}
}