/**
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at the
* <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a>
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Initial code contributed and copyrighted by<br>
* frentix GmbH, http://www.frentix.com
* <p>
*/
package org.olat.search.service.document.file.pdf;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.vfs.LocalFileImpl;
import org.olat.core.util.vfs.VFSLeaf;
import org.olat.search.SearchModule;
import org.olat.search.service.document.file.DocumentAccessException;
/**
* The extractor call an extern process: command pdf txt
*
*
* Initial date: 19.07.2013<br>
* @author srosse, stephane.rosse@frentix.com, http://www.frentix.com
*
*/
public class PdfExternalExtractor implements PdfExtractor {
private static final OLog log = Tracing.createLoggerFor(PdfExternalExtractor.class);
private SearchModule searchModule;
/**
* [used by Spring]
* @param searchModule
*/
public void setSearchModule(SearchModule searchModule) {
this.searchModule = searchModule;
}
@Override
public void extract(VFSLeaf document, File bufferFile)
throws IOException, DocumentAccessException {
if(!(document instanceof LocalFileImpl)) {
log.warn("Can only index local file");
return;
}
List<String> cmds = new ArrayList<String>();
cmds.add(searchModule.getPdfExternalIndexerCmd());
cmds.add(((LocalFileImpl)document).getBasefile().getAbsolutePath());
cmds.add(bufferFile.getAbsolutePath());
CountDownLatch doneSignal = new CountDownLatch(1);
ProcessWorker worker = new ProcessWorker(cmds, doneSignal);
worker.start();
try {
doneSignal.await(3000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
log.error("", e);
}
worker.destroyProcess();
}
private final void executeProcess(Process proc) {
StringBuilder errors = new StringBuilder();
StringBuilder output = new StringBuilder();
InputStream stderr = proc.getErrorStream();
InputStreamReader iserr = new InputStreamReader(stderr);
BufferedReader berr = new BufferedReader(iserr);
String line = null;
try {
while ((line = berr.readLine()) != null) {
errors.append(line);
}
} catch (IOException e) {
//
}
InputStream stdout = proc.getInputStream();
InputStreamReader isr = new InputStreamReader(stdout);
BufferedReader br = new BufferedReader(isr);
line = null;
try {
while ((line = br.readLine()) != null) {
output.append(line);
}
} catch (IOException e) {
//
}
try {
int exitValue = proc.waitFor();
if(log.isDebug()) {
log.info("PDF extracted: " + exitValue);
}
} catch (InterruptedException e) {
//
}
if(log.isDebug()) {
log.error(errors.toString());
log.info(output.toString());
}
}
private class ProcessWorker extends Thread {
private volatile Process process;
private final List<String> cmd;
private final CountDownLatch doneSignal;
public ProcessWorker(List<String> cmd, CountDownLatch doneSignal) {
this.cmd = cmd;
this.doneSignal = doneSignal;
}
public void destroyProcess() {
if (process != null) {
process.destroy();
process = null;
}
}
@Override
public void run() {
try {
if(log.isDebug()) {
log.debug(cmd.toString());
}
ProcessBuilder builder = new ProcessBuilder(cmd);
process = builder.start();
executeProcess(process);
doneSignal.countDown();
} catch (IOException e) {
log.error ("Could not spawn convert sub process", e);
destroyProcess();
}
}
}
}