package jav.correctionBackend;
import java.io.File;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import org.h2.jdbcx.JdbcConnectionPool;
/**
* Copyright (c) 2012, IMPACT working group at the Centrum für Informations- und
* Sprachverarbeitung, University of Munich. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer. Redistributions in binary
* form must reproduce the above copyright notice, this list of conditions and
* the following disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* This file is part of the ocr-postcorrection tool developed by the IMPACT
* working group at the Centrum für Informations- und Sprachverarbeitung,
* University of Munich. For further information and contacts visit
* http://ocr.cis.uni-muenchen.de/
*
* @author thorsten (thorsten.vobl@googlemail.com)
*/
public class DefaultDocument extends Document {
public DefaultDocument(JdbcConnectionPool jc) {
super(jc);
}
@Override
protected int addToken(Token t) {
try {
Connection conn = jcp.getConnection();
return this.addToken(t, conn);
} catch (SQLException ex) {
return 0;
}
}
/**
* Adds token to the document. The tokenid is set to auto increment
*
* @param t the {
* @see jav.correctionBackend.Token} to be added
*/
@Override
protected int addToken(Token t, Connection conn) {
try {
try (PreparedStatement prep = conn.prepareStatement("INSERT INTO TOKEN VALUES( null,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,? )")) {
prep.setInt(1, t.getIndexInDocument());
prep.setInt(2, t.getOrigID());
prep.setString(3, t.getWOCR());
prep.setString(4, t.getWCOR());
prep.setBoolean(5, t.isNormal());
prep.setBoolean(6, t.isCorrected());
prep.setInt(7, t.getNumberOfCandidates());
TokenImageInfoBox tiib = t.getTokenImageInfoBox();
if (tiib != null) {
prep.setInt(8, tiib.getCoordinateLeft());
prep.setInt(9, tiib.getCoordinateRight());
prep.setInt(10, tiib.getCoordinateTop());
prep.setInt(11, tiib.getCoordinateBottom());
prep.setString(13, tiib.getImageFileName());
} else {
prep.setInt(8, -1);
prep.setInt(9, -1);
prep.setInt(10, -1);
prep.setInt(11, -1);
prep.setString(13, "");
}
prep.setString(12, t.getSpecialSeq().toString());
prep.setBoolean(14, t.isSuspicious());
prep.setInt(15, t.getPageIndex());
prep.setString(16, t.getTopSuggestion());
prep.setInt(17, t.getTopCandDLev());
prep.addBatch();
prep.executeBatch();
}
Statement psIdentity = conn.createStatement();
ResultSet result = psIdentity.executeQuery("CALL SCOPE_IDENTITY()");
result.next();
int identity = result.getInt(1);
result.close();
psIdentity.close();
conn.close();
return identity;
} catch (SQLException ex) {
ex.printStackTrace();
return 0;
}
}
@Override
protected void loadNumberOfTokensFromDB() {
try {
Connection conn = jcp.getConnection();
Statement s = conn.createStatement();
ResultSet rs = s.executeQuery("SELECT MAX(indexInDocument) FROM TOKEN");
if (rs.next()) {
this.numTokens = rs.getInt(1) + 1;
}
System.out.println("Num of tokens: " + this.numTokens);
rs.close();
s.close();
conn.close();
} catch (SQLException ex) {
this.numTokens = 0;
}
}
@Override
public ArrayList<Integer> deleteToken(int iDFrom, int iDTo) throws SQLException {
Connection conn = null;
PreparedStatement setIndex = null;
PreparedStatement moveIndex = null;
PreparedStatement undo_redo = null;
Token from = this.getTokenByID(iDFrom);
Token to = this.getTokenByID(iDTo);
int indexFrom = from.getIndexInDocument();
int indexTo = to.getIndexInDocument();
try {
ArrayList<Integer> retval = new ArrayList<>();
if (indexTo < indexFrom) {
return null;
// throw new OCRCException("JAV.DOCUMENT.DELETETOKEN invalid range");
}
if (indexFrom == indexTo) {
return null;
}
if ( from.getPageIndex() != to.getPageIndex()) {
return null;
// throw new OCRCException("JAV.DOCUMENT.DELETETOKEN: cannot erase across page borders");
}
conn = jcp.getConnection();
conn.setAutoCommit(false);
//reserve undo_redo_parts for the starting token
setIndex = conn.prepareStatement("UPDATE token SET indexInDocument=-1 WHERE tokenID=?");
undo_redo = conn.prepareStatement("INSERT INTO undoredo VALUES( ?,?,?,?,? )");
int i;
for (i = indexFrom; i <= indexTo; i++) {
Token temp = this.getTokenByIndex(i);
retval.add(temp.getID());
setIndex.setInt(1, temp.getID());
setIndex.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.DELETE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=" + i + " WHERE tokenID=" + temp.getID());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.DELETE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=-1 WHERE tokenID=" + temp.getID());
undo_redo.addBatch();
undo_redo_part++;
}
// move token index and prepare undoredo
moveIndex = conn.prepareStatement("UPDATE token SET indexInDocument=indexInDocument-? WHERE indexInDocument>?");
moveIndex.setInt(1, retval.size());
moveIndex.setInt(2, indexFrom);
moveIndex.executeUpdate();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.DELETE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=indexInDocument+" + retval.size() + " WHERE indexInDocument>=" + indexFrom);
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.DELETE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=indexInDocument-" + retval.size() + " WHERE indexInDocument>" + indexFrom);
undo_redo.addBatch();
undo_redo_part = 0;
undo_redo_id++;
this.numTokens -= retval.size();
setIndex.executeBatch();
undo_redo.executeBatch();
conn.commit();
return retval;
} catch (SQLException ex) {
if (conn != null) {
conn.rollback();
}
return null;
} finally {
if (setIndex != null) {
setIndex.close();
}
if (moveIndex != null) {
moveIndex.close();
}
if (undo_redo != null) {
undo_redo.close();
}
conn.setAutoCommit(true);
conn.close();
}
}
@Override
public ArrayList<Integer> splitToken(int tokenID, String editString) throws SQLException {
Connection conn = null;
PreparedStatement setIndex = null;
PreparedStatement undo_redo = null;
PreparedStatement moveIndex = null;
try {
editString = editString.replaceAll("\\s{2,}", " ");
editString = editString.replaceAll("^ ", "");
editString = editString.replaceAll(" $", "");
ArrayList<Integer> retval = new ArrayList<>();
conn = jcp.getConnection();
conn.setAutoCommit(false);
undo_redo = conn.prepareStatement("INSERT INTO undoredo VALUES( ?,?,?,?,? )");
moveIndex = conn.prepareStatement("UPDATE token SET indexInDocument=indexInDocument+? WHERE indexInDocument>?");
setIndex = conn.prepareStatement("UPDATE token SET indexInDocument=? WHERE tokenID=?");
java.util.regex.Pattern myAlnum = java.util.regex.Pattern.compile("[\\pL\\pM\\p{Nd}\\p{Nl}\\p{Pc}[\\p{InEnclosedAlphanumerics}&&\\p{So}]]+");
StringTokenizer strTok = new StringTokenizer(editString, " ", true);
Token temp;
TokenImageInfoBox b;
int tokensAdded = 0;
int imgwidth = 0;
int left = 0;
int charwidth = 0;
int tokensToAdd = strTok.countTokens() - 1;
Token atIndex = this.getTokenByID(tokenID);
if (atIndex.getTokenImageInfoBox() != null) {
imgwidth = atIndex.getTokenImageInfoBox().getCoordinateRight() - atIndex.getTokenImageInfoBox().getCoordinateLeft();
left = atIndex.getTokenImageInfoBox().getCoordinateLeft();
charwidth = imgwidth / editString.length();
}
// reserve undo_redo_part
undo_redo_part = 1;
// move token index and prepare undoredo
moveIndex.setInt(1, tokensToAdd);
moveIndex.setInt(2, atIndex.getIndexInDocument());
moveIndex.executeUpdate();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.SPLIT.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=indexInDocument-" + tokensToAdd + " WHERE indexInDocument>" + atIndex.getIndexInDocument());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.SPLIT.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=indexInDocument+" + tokensToAdd + " WHERE indexInDocument>" + atIndex.getIndexInDocument());
undo_redo.addBatch();
undo_redo_part++;
setIndex.setInt(1, -1);
setIndex.setInt(2, atIndex.getID());
setIndex.addBatch();
// undo_redo_for original token
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, 0);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.SPLIT.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=" + atIndex.getIndexInDocument() + " WHERE tokenID=" + atIndex.getID());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, 0);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.SPLIT.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=-1 WHERE tokenID=" + atIndex.getID());
undo_redo.addBatch();
while (strTok.hasMoreTokens()) {
String corr = strTok.nextToken();
if (corr.equals(" ")) {
b = null;
left += charwidth;
} else {
if (imgwidth == 0) {
b = null;
} else {
b = new TokenImageInfoBox();
b.setImageFileName(atIndex.getImageFilename().substring(atIndex.getImageFilename().lastIndexOf(File.separator) + 1, atIndex.getImageFilename().length()));
b.setCoordinateBottom(atIndex.getTokenImageInfoBox().getCoordinateBottom());
b.setCoordinateTop(atIndex.getTokenImageInfoBox().getCoordinateTop());
b.setCoordinateLeft(left);
left += charwidth * corr.length();
b.setCoordinateRight(left + 2);
}
}
temp = new Token(atIndex.getWOCR());
temp.setIndexInDocument(-1);
temp.setWCOR(corr);
temp.setIsCorrected(false);
temp.setIsSuspicious(false);
temp.setNumberOfCandidates(0);
temp.setPageIndex(atIndex.getPageIndex());
if (myAlnum.matcher(corr).matches()) {
temp.setIsNormal(true);
} else {
temp.setIsNormal(false);
}
if (corr.equals(" ")) {
temp.setSpecialSeq(SpecialSequenceType.SPACE);
} else {
temp.setSpecialSeq(SpecialSequenceType.NORMAL);
}
temp.setTokenImageInfoBox(b);
int identity = this.addToken(temp);
retval.add(identity);
setIndex.setInt(1, (atIndex.getIndexInDocument() + tokensAdded));
setIndex.setInt(2, identity);
setIndex.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.SPLIT.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=-1 WHERE tokenID=" + identity);
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.SPLIT.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=" + (atIndex.getIndexInDocument() + tokensAdded) + " WHERE tokenID=" + identity);
undo_redo.addBatch();
undo_redo_part++;
tokensAdded++;
}
undo_redo_part = 0;
undo_redo_id++;
this.numTokens += tokensAdded;
setIndex.executeBatch();
undo_redo.executeBatch();
conn.commit();
return retval;
} catch (SQLException ex) {
if (conn != null) {
conn.rollback();
}
return null;
} finally {
if (setIndex != null) {
setIndex.close();
}
if (moveIndex != null) {
moveIndex.close();
}
if (undo_redo != null) {
undo_redo.close();
}
conn.setAutoCommit(true);
conn.close();
}
}
@Override
public ArrayList<Integer> mergeRightward(int tokenID, int numTok) throws SQLException {
System.out.println("Beginning database transaction");
long now = System.currentTimeMillis();
Connection conn = null;
PreparedStatement setIndex = null;
PreparedStatement undo_redo = null;
PreparedStatement moveIndex = null;
try {
ArrayList<Integer> retval = new ArrayList<>();
Token atIndex = getTokenByID(tokenID);
Token newToken = new Token("");
newToken.setWCOR(atIndex.getWDisplay());
TokenImageInfoBox b = null;
Token rightToken;
int i = 0;
conn = jcp.getConnection();
conn.setAutoCommit(false);
setIndex = conn.prepareStatement("UPDATE token SET indexInDocument=? WHERE tokenID=?");
undo_redo = conn.prepareStatement("INSERT INTO undoredo VALUES( ?,?,?,?,? )");
moveIndex = conn.prepareStatement("UPDATE token SET indexInDocument=indexInDocument-? WHERE indexInDocument>?");
// reserve undo_redo_part for unsetting the token indices
undo_redo_part = 2;
for (; i < numTok; ++i) {
rightToken = getTokenByIndex(atIndex.getIndexInDocument() + (i + 1));
if (rightToken.getWDisplay().equals("\n")) {
break;
}
retval.add(rightToken.getID());
setIndex.setInt(1, -1);
setIndex.setInt(2, rightToken.getID());
setIndex.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=" + rightToken.getIndexInDocument() + " WHERE tokenID=" + rightToken.getID());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=-1 WHERE tokenID=" + rightToken.getID());
undo_redo.addBatch();
undo_redo_part++;
if ((atIndex.getTokenImageInfoBox() != null) && (rightToken.getTokenImageInfoBox() != null)) {
if (b == null) {
b = new TokenImageInfoBox();
b.setCoordinateBottom(atIndex.getTokenImageInfoBox().getCoordinateBottom());
b.setCoordinateTop(atIndex.getTokenImageInfoBox().getCoordinateTop());
b.setCoordinateLeft(atIndex.getTokenImageInfoBox().getCoordinateLeft());
b.setImageFileName(atIndex.getImageFilename().substring(atIndex.getImageFilename().lastIndexOf(File.separator) + 1, atIndex.getImageFilename().length()));
}
b.setCoordinateRight(java.lang.Math.max(atIndex.getTokenImageInfoBox().getCoordinateRight(), rightToken.getTokenImageInfoBox().getCoordinateRight()));
}
if (!rightToken.getWDisplay().equals(" ")) {
newToken.setWCOR((newToken.getWCOR() + rightToken.getWDisplay()));
}
}
// move token index and prepare undoredo
moveIndex.setInt(1, i);
moveIndex.setInt(2, atIndex.getIndexInDocument());
moveIndex.executeUpdate();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=indexInDocument+" + i + " WHERE indexInDocument>" + atIndex.getIndexInDocument());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, undo_redo_part);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=indexInDocument-" + i + " WHERE indexInDocument>" + atIndex.getIndexInDocument());
undo_redo.addBatch();
undo_redo_part++;
newToken.setIsCorrected(true);
newToken.setIsNormal(true);
newToken.setIsSuspicious(false);
newToken.setNumberOfCandidates(0);
newToken.setPageIndex(atIndex.getPageIndex());
newToken.setSpecialSeq(SpecialSequenceType.NORMAL);
newToken.setTokenImageInfoBox(b);
newToken.setIndexInDocument(-1);
int identity = this.addToken(newToken);
retval.add(0, identity);
setIndex.setInt(1, atIndex.getIndexInDocument());
setIndex.setInt(2, identity);
setIndex.addBatch();
setIndex.setInt(1, -1);
setIndex.setInt(2, atIndex.getID());
setIndex.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, 0);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=-1 WHERE tokenID=" + identity);
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, 0);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=-1 WHERE tokenID=" + atIndex.getID());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, 1);
undo_redo.setString(3, "undo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=" + atIndex.getIndexInDocument() + " WHERE tokenID=" + atIndex.getID());
undo_redo.addBatch();
undo_redo.setInt(1, undo_redo_id);
undo_redo.setInt(2, 1);
undo_redo.setString(3, "redo");
undo_redo.setString(4, MyEditType.MERGE.toString());
undo_redo.setString(5, "UPDATE token SET indexInDocument=" + atIndex.getIndexInDocument() + " WHERE tokenID=" + identity);
undo_redo.addBatch();
undo_redo_part = 0;
undo_redo_id++;
this.numTokens -= i;
setIndex.executeBatch();
undo_redo.executeBatch();
conn.commit();
long then = System.currentTimeMillis();
System.out.println("Database transaction finished. Time taken: " + (then - now));
return retval;
} catch (SQLException ex) {
ex.printStackTrace();
if (conn != null) {
conn.rollback();
}
return null;
} finally {
if (setIndex != null) {
setIndex.close();
}
if (moveIndex != null) {
moveIndex.close();
}
if (undo_redo != null) {
undo_redo.close();
}
conn.setAutoCommit(true);
conn.close();
}
}
}