package mj.ocraptor.extraction.image_processing;
import static mj.ocraptor.database.dao.ResultError.KILLED_FORCED;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.ZipEntry;
import mj.ocraptor.configuration.Config;
import mj.ocraptor.file_handler.utils.FileTools;
import mj.ocraptor.rmi_client.RMIClientImpl;
import mj.ocraptor.rmi_server.RMIServer;
import mj.ocraptor.tools.St;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;
public class TikaImageHelper {
public static final String IMAGE_CONTAINER_CLASS = "imageContainer";
private List<String> imageText;
private final ImageTextExtractor ocrEngine;
private String filePath;
/**
*
*/
public TikaImageHelper(Metadata metadata) {
this.imageText = new ArrayList<String>();
this.ocrEngine = new ImageTextExtractorTess4j();
if (metadata != null) {
this.filePath = metadata.get(Config.META_FILE_PATH) + File.separator
+ metadata.get(Config.META_FILE_NAME);
}
}
/**
*
*
*/
public void close() {
if (this.ocrEngine != null) {
this.ocrEngine.close();
}
}
/**
*
*
* @param imageText
*/
public void addImage(BufferedImage image) {
if (image != null) {
if (!Config.inst().isClientDelayedShutdown()) {
final String extractedText = ocrEngine.extractText(image, this.filePath);
this.addImageText(extractedText);
// ------------------------------------------------ //
final RMIClientImpl client = RMIClientImpl.instance();
final RMIServer server = client.getServer();
try {
server.incrementImageCount(client);
} catch (RemoteException e) {
e.printStackTrace();
}
// ------------------------------------------------ //
} else {
this.addImageText(KILLED_FORCED.getErrorCode());
}
}
}
/**
*
*
* @param imageText
*/
public void addImage(File image) {
if (image != null) {
if (!Config.inst().isClientDelayedShutdown()) {
final String extractedText = ocrEngine.extractText(image, this.filePath);
this.addImageText(extractedText);
// ------------------------------------------------ //
final RMIClientImpl client = RMIClientImpl.instance();
final RMIServer server = client.getServer();
try {
server.incrementImageCount(client);
} catch (RemoteException e) {
e.printStackTrace();
}
// ------------------------------------------------ //
} else {
this.addImageText(KILLED_FORCED.getErrorCode());
}
}
}
/**
*
*
* @param extractedText
*/
public void addImageText(String extractedText) {
if (extractedText != null && extractedText.length() > 5) {
this.imageText.add(extractedText);
}
}
/**
*
*
* @param handler
*
* @throws SAXException
*/
public void addTextToHandler(XHTMLContentHandler handler) throws SAXException {
this.addTextToHandler(handler, null, null);
}
/**
*
*
* @param imageText
* @param page
* @param allPagesCount
* @throws SAXException
*/
public void addTextToHandler(XHTMLContentHandler handler, Integer page, Integer allPagesCount)
throws SAXException {
if (!this.imageText.isEmpty()) {
// handler.startElement("p", "class", "page_indicator");
// // TODO: text
// handler.characters("Images on page: " + page);
// handler.endElement("p");
boolean endImageContainer = false;
for (int i = 0; i < this.imageText.size(); i++) {
String text = this.imageText.get(i);
text = St.removeRareCharacters(text);
if (text.length() > 5) {
if (i == 0) {
handler.startElement("div", "class", IMAGE_CONTAINER_CLASS);
endImageContainer = true;
}
// TODO: pagination
if (page != null && allPagesCount != null) {
handler.startElement("span", "page", page + ":" + allPagesCount);
handler.characters(" ");
handler.endElement("span");
}
handler.characters(text);
// --- //
if (i >= 0 && i != this.imageText.size() - 1) {
handler.startElement("span", "class", "imageDivider");
handler.characters(" ");
handler.endElement("span");
}
}
}
if (endImageContainer) {
handler.characters(" ");
handler.endElement("div");
}
}
this.imageText.clear();
}
/**
*
*
* @param stream
* @param zipentry
* @return
*/
public static File saveZipEntryToTemp(InputStream stream, ZipEntry zipentry) {
try {
File outputFile = FileTools.getTempFile("ocraptor_zip", new File(zipentry.getName())
.getName(), true);
byte[] buf = new byte[1024];
if (zipentry != null) {
int n;
FileOutputStream fileoutputstream;
fileoutputstream = new FileOutputStream(outputFile);
while ((n = stream.read(buf, 0, 1024)) > -1)
fileoutputstream.write(buf, 0, n);
fileoutputstream.close();
return outputFile;
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}