package de.unigoettingen.sub.commons.ocrComponents.webservice;
/*
Copyright 2010 SUB Goettingen. All rights reserved.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Properties;
import javax.annotation.Resource;
import javax.jws.WebService;
import javax.xml.ws.WebServiceContext;
import javax.xml.ws.handler.MessageContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import de.unigoettingen.sub.commons.ocr.util.FileAccess;
import de.unigoettingen.sub.ocr.controller.OcrEngineStarter;
import de.unigoettingen.sub.ocr.controller.OcrParameters;
/**
* IMPACT Abbyy Fine Reader 8.0 Service. This service provides the basic
* functionality of the Abbyy Fine Reader 8.0 text recogntion engine for
* applying OCR to an image file.
*
* @author mabergn
*
*/
@WebService(endpointInterface = "de.unigoettingen.sub.commons.ocrComponents.webservice.OcrService")
public class OcrServiceImpl implements OcrService {
@Resource
private WebServiceContext wsContext;
static String ocrEngineId = "abbyy-multiuser";
private final String appName = "ws";
private final static Logger LOGGER = LoggerFactory
.getLogger(OcrServiceImpl.class);
FileAccess getFileAccess() {
return new FileAccess();
}
OcrEngineStarter getEngineStarter() {
return new OcrEngineStarter();
}
String getJobName() {
int randomNumber = Math.abs((int) ((Math.random()*((int) System.currentTimeMillis()))+1));
return "OcrServiceImplService_outputUrl"+ "_" + randomNumber;
}
@Override
public ByUrlResponseType ocrImageFileByUrl(ByUrlRequestType request) {
Date stampStart = new Date();
ByUrlResponseType response = new ByUrlResponseType();
FileAccess fileAccess = getFileAccess();
Properties props = fileAccess.getPropertiesFromFile("webservice-config.properties");
String inputTempDir = props.getProperty("localpath");
if(inputTempDir == null || inputTempDir.equals("")){
inputTempDir = System.getProperty("java.io.tmpdir");
}
if(!inputTempDir.endsWith("/")){
inputTempDir += "/";
}
String webserverPath = props.getProperty("webserverpath");
if(webserverPath == null || webserverPath.equals("")){
webserverPath = System.getProperty("ocrWebservice.root");
}
if(!webserverPath.endsWith("/")){
webserverPath += "/";
}
String jobName = getJobName();
RecognitionLanguages langs = request.getOcrlanguages();
List<String> langStringsList = new ArrayList<String>();
for (RecognitionLanguage lang : langs.getRecognitionLanguage()) {
langStringsList.add(lang.toString());
}
File imageTempFile = new File(inputTempDir + jobName + "/input.tif");
try {
fileAccess.copyUrlToFile(request.getInputUrl(), imageTempFile);
} catch (IOException e) {
String error = "ERROR CANNOT COPY URL: " + request.getInputUrl()
+ " To Local File";
LOGGER.error(error);
return getErrorResponse(webserverPath, error, response);
}
OcrParameters params = new OcrParameters();
params.ocrEngine = ocrEngineId;
params.outputFormats = new String[]{request.getOutputFormat().toString()};
params.inputFolder = inputTempDir + jobName;
params.inputFormats = new String[]{"tif"};
params.inputLanguages = langStringsList.toArray(new String[]{});
params.inputTextType = request.getTextType().toString();
params.priority = "3";
params.outputFolder = webserverPath + "ocrresults";
params.props = new Properties();
getEngineStarter().startOcrWithParams(params);
String resultsDir = "ocrresults";
try {
fileAccess.deleteFile(imageTempFile);
fileAccess.deleteDir(imageTempFile.getParentFile());
} catch (IOException e) {
LOGGER.error("Error while cleaning temp data.", e);
}
File resultFile = new File(webserverPath + resultsDir + "/" + jobName
+ "." + params.outputFormats[0].toLowerCase());
if( !fileAccess.fileExists(resultFile)){
LOGGER.error("ERROR. CANNOT Find File: "+ resultFile.toString());
String error = "File could not be processed: " + request.getInputUrl();
return getErrorResponse(webserverPath, error, response);
}
Date stampFinish = new Date();
long duration = stampFinish.getTime() - stampStart.getTime();
String webserverHostname = "";
if(props.getProperty("hostname") == null || props.getProperty("hostname").equals("no")){
MessageContext mc = wsContext.getMessageContext();
URI url = (URI) mc.get("javax.xml.ws.wsdl.description");
String hostname = url.getHost();
webserverHostname = "http://" + hostname + "/" + appName + "/";
}else {
webserverHostname = props.getProperty("hostname");
}
String newLine = ".\n";
response.setMessage("Process finished successfully after " + duration + " milliseconds.");
response.setOutputUrl(webserverHostname + resultsDir + "/"+ jobName + "." + params.outputFormats[0].toLowerCase());
response.setProcessingLog("========= PROCESSING REQUEST (by URL) =========. "+ "\n" +
"Using service: OcrServiceImplService. "+ "\n" +
"Parameter processingUnit: "+ webserverHostname + newLine +
"URL of input image: "+ request.getInputUrl()+ newLine +
"Wrote file " + imageTempFile.toString()+ newLine +
"OUTFORMAT substitution variable value: "+params.outputFormats[0].toLowerCase()+ newLine +
"OUTFILE substitution variable value: " + resultFile.getAbsolutePath()+ newLine +
"LANGUAGES substitution variable value: "+ params.inputLanguages + newLine +
"INFILE substitution variable value: "+ imageTempFile.toString()+ newLine +
"INTEXTTYPE substitution variable value: "+ request.getTextType()+ newLine +
"Process finished successfully with code 0."+ "\n" +
"Output file has been created successfully.."+ "\n" +
"Output Url: " + webserverHostname + resultsDir + "/" + jobName + "." + params.outputFormats[0].toLowerCase()+ newLine +
"Output Url-Abbyy-Result : " + webserverHostname + resultsDir + "/" + jobName + ".xml.result.xml" + newLine +
"Output Url-Summary-File : " + webserverHostname + resultsDir + "/" + jobName + "-textMD.xml" + newLine +
"Process finished successfully after " + duration + " milliseconds.."
);
response.setProcessingUnit(webserverHostname);
response.setReturncode(0);
response.setSuccess(true);
response.setToolProcessingTime(duration);
return response;
}
private ByUrlResponseType getErrorResponse(String webserverPath, String error, ByUrlResponseType byUrlResponseType) {
byUrlResponseType.setMessage("Process finished unsuccessfully ");
byUrlResponseType.setOutputUrl("");
byUrlResponseType.setProcessingLog(error);
byUrlResponseType.setProcessingUnit(webserverPath);
byUrlResponseType.setReturncode(1);
byUrlResponseType.setSuccess(false);
byUrlResponseType.setToolProcessingTime(0L);
return byUrlResponseType;
}
}