package org.bbaw.wsp.cms.dochandler.parser.text.parser;
import java.io.InputStream;
import java.util.TreeMap;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.odf.OpenDocumentParser;
import org.bbaw.wsp.cms.dochandler.parser.document.CharCodeManager;
import org.bbaw.wsp.cms.dochandler.parser.document.GeneralDocument;
import org.xml.sax.ContentHandler;
import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
/**
* The ODFParser. It uses the Singleton pattern. Only one instance can exist.
*
* @author Sascha Feldmann (wsp-shk1)
* @date 08.08.2012
*
*/
public class OdfParserImpl extends ResourceParser {
private static OdfParserImpl instance;
/**
* Return the only existing instance. The instance uses an Apache TIKA
* OpenDocument parser.
*
* @return
*/
public static OdfParserImpl getInstance() {
if (instance == null) {
return new OdfParserImpl();
}
return instance;
}
private OdfParserImpl() {
super(new OpenDocumentParser());
}
public Object parse(final String startUri, final String uri) throws ApplicationException {
if (uri == null || uri.isEmpty()) {
throw new IllegalArgumentException("The value for the parameter parser in the method parse() in ResourceParser mustn't be empty.");
}
if (this.saveStrategy == null) {
throw new IllegalStateException("You must define a saveStategy before calling the parse()-method in ResourceParser.");
}
InputStream input;
try {
input = this.resourceReader.read(uri);
Metadata metadata = new Metadata();
TreeMap<Integer, String> footnotes = new TreeMap<Integer, String>();
ContentHandler footnoteHandler = new OdfHandler(footnotes);
ParseContext context = new ParseContext();
this.parser.parse(input, footnoteHandler, metadata, context); // Recieve
// footnotes
input.close();
final GeneralDocument doc = (GeneralDocument) super.parse(startUri, uri);
String textOrig = doc.getTextOrig(); // recieve fulltext through a
// BodyContentHandler
for (final int key : footnotes.keySet()) {
final String toReplace = (key + footnotes.get(key));
textOrig = textOrig.replace(toReplace, "" + CharCodeManager.returnNumberSuperscript(key)); // Replace
// footnote
// marks
// thorugh
// superscript
// characters
}
final StringBuilder newTextOrigBuilder = new StringBuilder();
newTextOrigBuilder.append(textOrig + "\n\n");
for (final int key : footnotes.keySet()) { // append footnotes
newTextOrigBuilder.append(CharCodeManager.returnNumberSuperscript(key) + " " + footnotes.get(key) + "\n");
}
doc.setTextOrig(newTextOrigBuilder.toString());
return doc;
} catch (Exception e) {
throw new ApplicationException("Problem while parsing file " + uri + " -- exception: " + e.getMessage() + "\n");
}
}
}