package de.uni_goettingen.sub.commons.ocr.abbyy.server; /* Copyright 2010 SUB Goettingen. All rights reserved. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ import java.util.List; import java.util.Properties; import java.util.Queue; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import de.uni_goettingen.sub.commons.ocr.api.AbstractEngine; import de.uni_goettingen.sub.commons.ocr.api.OcrEngine; import de.uni_goettingen.sub.commons.ocr.api.OcrProcess; public class AbbyyEngine extends AbstractEngine implements OcrEngine { private final static Logger logger = LoggerFactory.getLogger(AbbyyEngine.class); private Queue<AbbyyProcess> processesQueue = new ConcurrentLinkedQueue<AbbyyProcess>(); private Properties props; // for unit tests ProcessSplitter createProcessSplitter() { return new ProcessSplitter(); } protected ThreadPoolExecutor createPool(int maxParallelThreads) { return new OcrExecutor(maxParallelThreads); } protected LockFileHandler createLockHandler() { return new LockFileHandler(); } public void initialize(Properties initProps) { props = initProps; } @Override public void addOcrProcess(OcrProcess process) { AbbyyProcess abbyyProcess = (AbbyyProcess) process; if (abbyyProcess.hasImagesAndOutputs()) { processesQueue.add(abbyyProcess); } } @Override public void recognize() { // This is not about multi-threading, it is just to prevent a second call of the method in the same thread if (started) { logger.warn("Recognition is already running and cannot be started a second time."); return; } else { started = true; } if (processesQueue.isEmpty()) { logger.warn("Cannot start recognition, there are no processes."); return; } performRecognition(); } private void performRecognition() { String overwrite = props.getProperty("lock.overwrite"); boolean overwriteLock = "true".equals(overwrite); LockFileHandler lockHandler = createLockHandler(); lockHandler.initConnection(props.getProperty("serverUrl"), props.getProperty("user"), props.getProperty("password")); lockHandler.createOrOverwriteLock(overwriteLock); ThreadPoolExecutor pool = createPool(Integer.parseInt(props.getProperty("maxParallelProcesses"))); while (!processesQueue.isEmpty()) { AbbyyProcess process = processesQueue.poll(); boolean split = "true".equals(props.getProperty("books.split")); if (split) { int splitSize = Integer.parseInt(props.getProperty("maxImagesInSubprocess")); ProcessSplitter processSplitter = createProcessSplitter(); List<AbbyyProcess> subProcesses = processSplitter.split(process, splitSize); for (AbbyyProcess subProcess : subProcesses) { pool.execute(subProcess); } } else { pool.execute(process); } } pool.shutdown(); try { pool.awaitTermination(100, TimeUnit.DAYS); } catch (InterruptedException e) { logger.error("Got a problem with thread pool: ", e); } lockHandler.deleteLockAndCleanUp(); started = false; } @Override public int getEstimatedDurationInSeconds() { long durationInMillis = 0; for (OcrProcess process : processesQueue) { long imagesInProcess = process.getNumberOfImages(); durationInMillis += imagesInProcess * Integer.parseInt(props.getProperty("minMillisPerFile")); } return (int) (durationInMillis / 1000); } }