package de.uni_goettingen.sub.commons.ocr.abbyy.server; import java.util.ArrayList; import java.util.List; import de.uni_goettingen.sub.commons.ocr.api.OcrFormat; import de.uni_goettingen.sub.commons.ocr.api.OcrImage; public class ProcessSplitter { private ProcessMergingObserver mergingObserver = new ProcessMergingObserver(); // for unit tests void setProcessMergingObserver(ProcessMergingObserver newObserver) { mergingObserver = newObserver; } public List<AbbyyProcess> split(AbbyyProcess process, int splitSize) { if (process.getNumberOfImages() == 0) { throw new IllegalArgumentException("Cannot split the process, it has no images: " + process.getName()); } if (process.getNumberOfImages() <= splitSize) { List<AbbyyProcess> sp = new ArrayList<AbbyyProcess>(); sp.add(process); return sp; } else { mergingObserver.setParentProcess(process); List<AbbyyProcess> subProcesses = createSubProcesses(process, splitSize); for(AbbyyProcess subProcess : subProcesses){ subProcess.setMerger(mergingObserver); mergingObserver.addSubProcess(subProcess); } return subProcesses; } } private List<AbbyyProcess> createSubProcesses(AbbyyProcess process, int splitSize) { List<AbbyyProcess> subProcesses = new ArrayList<AbbyyProcess>(); List<List<OcrImage>> imageChunks = splitImages(process.getImages(), splitSize); int chunkIndex = 1; int numberOfChunks = imageChunks.size(); for(List<OcrImage> chunk : imageChunks){ AbbyyProcess subProcess = process.createSubProcess(); for (OcrImage imageFromChunk : chunk) { subProcess.addImage(imageFromChunk.getLocalUri()); } String subProcessName = process.getName() + "_" + chunkIndex + "of" + numberOfChunks; subProcess.setName(subProcessName); subProcess.setProcessId(process.getProcessId() + subProcessName); addOutputsToSubProcess(subProcess, process); subProcesses.add(subProcess); chunkIndex++; } return subProcesses; } private List<List<OcrImage>> splitImages(List<OcrImage> allImages, int chunkSize){ List<List<OcrImage>> allChunks = new ArrayList<List<OcrImage>>(); for (int from = 0; from < allImages.size(); from += chunkSize) { int to = Math.min(from + chunkSize, allImages.size()); List<OcrImage> chunk = new ArrayList<OcrImage>(allImages.subList(from, to)); allChunks.add(chunk); } return allChunks; } private void addOutputsToSubProcess(AbbyyProcess subProcess, AbbyyProcess process) { for (OcrFormat outputFormat : process.getAllOutputFormats()) { subProcess.addOutput(outputFormat); } } }