/*
* $Id$
*
* Copyright 2008 Glencoe Software, Inc. All rights reserved.
* Use is subject to license terms supplied in LICENSE.txt
*/
package ome.services.fulltext;
import java.io.File;
import java.io.IOException;
import java.io.PipedReader;
import java.io.PipedWriter;
import java.io.Reader;
import ome.services.messages.ParserOpenFileMessage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
/**
* {@link FileParser} for "application/pdf" files using <a
* href="http://pdfbox.org/">PDFBox</a>.
*
* @author Josh Moore, josh at glencoesoftware.com
*/
public class PdfParser extends FileParser {
private final static Logger log = LoggerFactory.getLogger(PdfParser.class);
@Override
public Iterable<Reader> doParse(File file) throws Exception {
final PdfThread pdfThread = new PdfThread(file);
this.context.publishEvent(new ParserOpenFileMessage(this,
pdfThread) {
@Override
public void close() {
try {
pdfThread.close();
} catch (Exception e) {
log.warn("Error closing PdfThread " + pdfThread, e);
}
}
});
pdfThread.start();
return wrap(pdfThread.getReader());
}
}
class PdfThread extends Thread {
private final static Logger log = LoggerFactory.getLogger(PdfThread.class);
final File file;
final PipedWriter writer;
final PipedReader reader;
PDDocument document = null;
PdfThread(File file) throws IOException {
this.file = file;
reader = new PipedReader();
writer = new PipedWriter(reader);
}
Reader getReader() {
return reader;
}
@Override
public void run() {
try {
document = PDDocument.load(file);
} catch (IOException e) {
log.warn("Could not load Pdf " + file, e);
try {
writer.close();
} catch (IOException ioe) {
// What can we do?
}
}
try {
if (document != null && !document.isEncrypted()) {
try {
PDFTextStripper stripper = null;
stripper = new PDFTextStripper();
stripper.writeText(document, writer);
} finally {
close();
}
}
} catch (IOException e) {
log.warn("Error reading pdf file", e);
}
}
public void close() {
if (writer != null) {
try {
writer.close();
} catch (Exception e) {
log.warn("Error closing writer", e);
}
}
if (document != null) {
try {
document.close();
} catch (Exception e) {
log.warn("Error closing PDF document", e);
}
}
}
}