/* * NOTE: This copyright does *not* cover user programs that use Hyperic * program services by normal system calls through the application * program interfaces provided as part of the Hyperic Plug-in Development * Kit or the Hyperic Client Development Kit - this is merely considered * normal use of the program, and does *not* fall under the heading of * "derived work". * * Copyright (C) [2004-2011], VMware, Inc. * This file is part of Hyperic. * * Hyperic is free software; you can redistribute it and/or modify * it under the terms version 2 of the GNU General Public License as * published by the Free Software Foundation. This program is distributed * in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A * PARTICULAR PURPOSE. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA. */ /** * Custom ant task responsible for the encryption of one or more table column data.<br> * Encryption is done using a standard PBEWithMD5AndDES algorithm and the database password * Encryption password.<br> * * The columnar data encryption process is heavyweight due to the possible size of * the dataset as well as the complex nature of the actual values encyption.<br> * To speed up the process, the logic partitions the database into pages and spawns workers to * process the former.<br> * * <b>Note:</b> At the moment there are max of 4 workers (less if there are less partitions) <br> * as a small environment would probably have that many CPUs as well as a possible local <br> * database (more would max out the CPU utilization).<br> * <br> * <b>Note:</b> Each partition operation is atomic (committed separately). failure in one<br> * would not rollack other partitions commits * * @author guys */ package org.hyperic.tools.ant.dbupgrade; import java.io.PrintStream; import java.io.PrintWriter; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Project; import org.hyperic.tools.ant.utils.DatabaseType; import org.hyperic.util.security.SecurityUtil; import org.jasypt.encryption.pbe.PBEStringEncryptor; public class SST_ColumnEncyptor extends SchemaSpecTask{ private String table ; private String pkColumn ; private int noOfUpdateColumns ; private String columnsClause; private String updateColumnsClause ; private int batchSize ; private int iNoOfchunks ; private PBEStringEncryptor encryptor; private DatabaseType enumDatabaseType ; private static AtomicInteger pages ; private static AtomicInteger totalnoOfRecords = new AtomicInteger(0) ; private static final int DEFUALT_BATCH_SIZE = 1000 ; public SST_ColumnEncyptor(){ }//EOM public final void setTable(final String table) { this.table = table ; }//EOM public final void setPkColumn(final String pkColumn) { this.pkColumn = pkColumn; }//EOM public final void setBatchSize(final String batchSize) { this.batchSize = (batchSize == null ? DEFUALT_BATCH_SIZE : Integer.parseInt(batchSize)) ; }//EOM /** * Processes the columns list and weeds out duplicates. * * @param sColumns Comma delimited columns list. */ public final void setColumns(final String sColumns) { if(sColumns == null || sColumns.length() == 0) return ; final String[] arrColumns = sColumns.split(",") ; final int iOrigSize = arrColumns.length ; if(iOrigSize == 1) { this.columnsClause = sColumns ; this.updateColumnsClause = sColumns + "=?" ; this.noOfUpdateColumns = 1 ; }else { final HashSet<String> columns = new HashSet<String>(arrColumns.length) ; final StringBuilder updateStatementBuilder = new StringBuilder() ; final StringBuilder selectStatementbuilder = new StringBuilder() ; for(final String column : arrColumns) { if(column.length() == 0 || columns.contains(columns)) continue ; //else columns.add(column) ; selectStatementbuilder.append(column).append(',') ; updateStatementBuilder.append(column).append("=?,") ; }//EO while there are more columns selectStatementbuilder.deleteCharAt(selectStatementbuilder.length()-1) ; updateStatementBuilder.deleteCharAt(updateStatementBuilder.length()-1) ; this.columnsClause = selectStatementbuilder.toString() ; this.updateColumnsClause = updateStatementBuilder.toString() ; this.noOfUpdateColumns = columns.size() ; }//EO if multiple columns }//EOM @Override public final void initialize(final Connection conn, final DBUpgrader upgrader) { super.initialize(conn, upgrader); try{ this.encryptor = upgrader.getEncryptor() ; this._conn.setAutoCommit(false) ; //initialize the database type strategy. this.enumDatabaseType = DatabaseType.valueOf(conn.getMetaData().getDatabaseProductName()) ; }catch(Throwable t) { throw new BuildException(t) ; }//EO catch block }//EOM /** * Invoked from the {@link SchemaSpec#execute()}. * * Partitions the dataset into logical pages by dividing the number of records by the defined * batchSize<br> and spawns workers to handle individual pages in a separate thread of * execution<br> * * <b>Note:</b> at the moment there are max of 4 workers (less if there are less partitions)<br> * as a small environment would probably have that many CPUs as well as a possible local<br> * database (more would max out the CPU utilization).<br> * * <b>Note:</b> As there might be more partitions than there are worker instances,<br> * the latter will keep consuming page processing requests until non are left (multiple <br> * per worker instance). * * <b>Note:</b> Each partition operation is atomic (committed separately). failure in one<br> * would not rollack other partitions commits * * <br> * Main thread awaits the completion of all consumer threads before terminating. */ @Override public final void execute() throws BuildException { PreparedStatement ps = null ; ResultSet rs = null ; NestedBuildException thrownExcpetion = null ; ExecutorService executorPool = null ; try{ //ensure batchsize is set if non was defined in the xml. if(this.batchSize == 0) this.batchSize = DEFUALT_BATCH_SIZE ; final long before = System.currentTimeMillis() ; //determine the dataset size first. ps = this._conn.prepareStatement("select count("+this.pkColumn+") from " + this.table) ; rs = ps.executeQuery() ; rs.next() ; final int iNoOfExistingRecords = rs.getInt(1) ; //if the table is empty abort. if(iNoOfExistingRecords == 0) return ; //calculate the number of partitions taking into account the remainder... this.iNoOfchunks = (iNoOfExistingRecords+this.batchSize-1)/this.batchSize ; this.log("[SST_ColumnEncryptor.execute()]: No of records: " + iNoOfExistingRecords + " No of chunks: " + iNoOfchunks, Project.MSG_WARN); rs.close() ; ps.close(); rs = null ; ps = null ; //initialize the decrementing shemaphore (waitgate) and the consumer buffer. final CountDownLatch inverseSemaphore = new CountDownLatch(iNoOfchunks) ; pages = new AtomicInteger(iNoOfchunks-1) ; Connection conn = null ; int iNoOfWorkers = 4 ; if(iNoOfWorkers > iNoOfchunks) iNoOfWorkers = iNoOfchunks ; this.log("[SST_ColumnEncryptor.execute()]: Starting update"); //construct the paginated select statement for the given database product using //the databaseType strategy. final String selectStatement = this.enumDatabaseType.generatePagedQuery( this.table, this.columnsClause, this.pkColumn) ; //construct the update statement for the given database product using //the databaseType strategy. final String updateStatement = this.enumDatabaseType.generateUpdateQuery( this.table, this.updateColumnsClause, this.pkColumn) ; final List<Future<String>> workersFutures = new ArrayList<Future<String>>(iNoOfWorkers) ; executorPool = Executors.newFixedThreadPool(iNoOfWorkers) ; Future<String> workerFuture = null ; //spawn the workers ensuring each gets its own database connection and encryptor //instances so as to minimize concurrency friction for(int i=0 ; i < iNoOfWorkers; i++) { conn = this.getNewConnection() ; conn.setAutoCommit(false) ; workerFuture = executorPool.submit(new Worker(inverseSemaphore, conn, selectStatement, updateStatement, this.newEncryptor())) ; workersFutures.add(workerFuture) ; }//EO while there are more exeuctors //wait until the countdown latch reaches 0 (all workers are finished) before //terminating inverseSemaphore.await() ; //now verify that there no exceptions were returned (thrown) from the workers for(Future<String> workerResponse : workersFutures) { //should throw an exceptions if one was thrown from a worker thread workerResponse.get() ; }//EO while there are more worker responses this.log("[SST_ColumnEncryptor.execute()]: after all workers are finished encrypting " + totalnoOfRecords.get() + " records in an overall time in millis: " + (System.currentTimeMillis()-before)); }catch(Throwable t) { //must keep record of the exception as more can occur during the finally block thrownExcpetion = new NestedBuildException(t) ; }finally{ try{ //ensure all threads are killed if(executorPool != null) executorPool.shutdown() ; if(rs != null) rs.close() ; if(ps != null) ps.close() ; }catch(Throwable t){ //if an exception was previously thrown, add this one as a nested otherwise create //a new one if(thrownExcpetion == null) { thrownExcpetion = new NestedBuildException(t) ; }else { thrownExcpetion.addThrowable(t) ; }//EO if an exception was already thrown }//EO catch block //if an error had occurred, throw the exception (might contain multiple nested //exceptions) if(thrownExcpetion != null) { log(thrownExcpetion, Project.MSG_ERR) ; throw thrownExcpetion ; }//EO if there was an error }//EO catch block }//EOM /** * Asynchronous worker responsible for the encryption of one or more columnar dataset * partitions.<br> * Inner instance class (so that it would have access to outer class instance members). */ private class Worker implements Callable<String> { private final CountDownLatch countdownSemaphore ; private final Connection conn ; private final PBEStringEncryptor encryptor ; private final String selectStatement ; private final String updateStatement ; Worker(final CountDownLatch countdownSemaphore, final Connection conn, final String selectStatement, final String updateStatement, PBEStringEncryptor encryptor) { this.countdownSemaphore = countdownSemaphore ; this.conn = conn ; this.encryptor = encryptor ; this.selectStatement = selectStatement ; this.updateStatement = updateStatement ; }//EOM /** * Encrypts one or more columnar dataset partitions.<br> * The method acts as a consumer to the {@link SST_ColumnEncyptor#pages} buffer.<br> * <br> * it Iterates over the buffer and processs dataset partitions base on the buffer * value.<br> * <br> * Each partition processing is an atomic operation which would be committed separately.<br> * <br> * Update is performed by selecting the records from the table using the calculated<br> * pagination information and for each record, create an update batch statement. * <br> encryption would only occur IFF the value was not already encrypted. */ public String call() throws Exception { final String msgPrefix = "[Encryptor Worker ("+Thread.currentThread().getName()+")]: " ; ResultSet rs = null ; PreparedStatement selectStatement = null, updateStatement = null ; NestedBuildException thrownExcpetion = null ; final DatabaseType enumDatabaseType = SST_ColumnEncyptor.this.enumDatabaseType ; final int iNoOfEncryptableColumns = SST_ColumnEncyptor.this.noOfUpdateColumns ; int iCurrentPageNumber = 0 ; final int iBatchSize = SST_ColumnEncyptor.this.batchSize ; final int iNoOfChunks = SST_ColumnEncyptor.this.iNoOfchunks ; String colVal = null ; try{ //iterate over the partitions buffer and process until there are non (buffer < 0) //Note: cannot use the countDownLatch as the buffer as the countDown & getCount //are not bound as one atomic operations. while((iCurrentPageNumber = pages.getAndDecrement()) >= 0) { long total = 0 ; try{ long before = System.currentTimeMillis() ; long beforeSelect = System.currentTimeMillis() ; selectStatement = this.conn.prepareStatement(this.selectStatement) ; enumDatabaseType.bindPageInfo(selectStatement, iCurrentPageNumber, iBatchSize, iNoOfChunks) ; rs = selectStatement.executeQuery() ; rs.setFetchSize(iBatchSize) ; long afterSelect = (System.currentTimeMillis()-beforeSelect) ; long beforeBatch = System.currentTimeMillis() ; updateStatement = conn.prepareStatement(this.updateStatement) ; long beforeLoop= System.currentTimeMillis() ; boolean isDirty = false ; while(true) { long beforeSingleLoop = System.currentTimeMillis() ; if(!rs.next()) break ; long afterSingleLoop = (System.currentTimeMillis()-beforeSingleLoop) ; //index starts from 2 for(int i=1; i <= iNoOfEncryptableColumns; i++) { colVal = rs.getString(i+1) ; if(!SecurityUtil.isMarkedEncrypted(colVal)){ colVal = encryptor.encrypt(colVal) ; updateStatement.setString(i, colVal) ; isDirty = true ; }//EO if should encrypt }//EO while there are more columns to encrypt if(isDirty) { //set the where clause binding param to the next binding param index updateStatement.setString(iNoOfEncryptableColumns+1, rs.getString(1)) ; updateStatement.addBatch() ; }//EO if dirty isDirty = false ; }///EO while there are more records long afterLoop = (System.currentTimeMillis()-beforeLoop) ; long beforeExecuteBatch= System.currentTimeMillis() ; final int[] arrResults = updateStatement.executeBatch() ; long afterExecuteBatch = (System.currentTimeMillis()-beforeExecuteBatch) ; final int iLength = arrResults.length ; for(int i=0; i<iLength; i++) { if(arrResults[i] == PreparedStatement.EXECUTE_FAILED) { log(msgPrefix + " Failed batch sequence: " + i) ; }//EO if failure }//EO while there are more results long beforeCommit = System.currentTimeMillis() ; this.conn.commit() ; long afterCommit = (System.currentTimeMillis()-beforeCommit) ; long afterBatch = (System.currentTimeMillis()-beforeBatch) ; rs.close() ; selectStatement.close() ; updateStatement.close() ; total += (System.currentTimeMillis()-before) ; totalnoOfRecords.addAndGet(iLength) ; log(msgPrefix + "Batch No: " + iCurrentPageNumber + " No of Records: " + iLength + " Total millis: " + (total) + " select: " + afterSelect + " batch update: " + afterBatch + " commit time: " + afterCommit + " execute Batch: " + afterExecuteBatch + " loop: " + afterLoop ) ; }catch(Throwable t) { //must keep record of the exception as more can occur during the finally block thrownExcpetion = new NestedBuildException(msgPrefix, t) ; try{ this.conn.rollback() ; }catch(Throwable innerT) { thrownExcpetion.addThrowable(t) ; }//EO catch block }finally{ try{ if(rs != null) rs.close() ; if(selectStatement != null) selectStatement.close() ; if(updateStatement != null) updateStatement.close() ; }catch(Throwable t){ //if an exception was previously thrown, add this one as a nested //otherwise create a new one if(thrownExcpetion == null) { thrownExcpetion = new NestedBuildException(msgPrefix, t) ; }else { thrownExcpetion.addThrowable(t) ; }//EO if an exception was already thrown }//EO catch block //decrement the gate semaphore this.countdownSemaphore.countDown() ; log(msgPrefix +" after chunk countdown " + this.countdownSemaphore.getCount()); if(thrownExcpetion != null) { log(thrownExcpetion, Project.MSG_ERR) ; throw thrownExcpetion ; }//EO if there was an error }//EO catch block }//EO while there are more pages to work on }finally{ this.conn.close() ; }//EO catch block log(msgPrefix +" exiting with chunks left: " + this.countdownSemaphore.getCount()) ; return null; }//EOM }//EO inner class Worker /** * Exception container which delegates to its nested exceptions */ private static final class NestedBuildException extends BuildException { private final List<Throwable> nestedExcpetions ; public NestedBuildException(Throwable t) { super() ; this.nestedExcpetions = new ArrayList<Throwable>() ; this.nestedExcpetions.add(t) ; }//EOC public NestedBuildException(final String message, Throwable t) { super(message) ; this.nestedExcpetions = new ArrayList<Throwable>() ; this.nestedExcpetions.add(t) ; }//EOC public final NestedBuildException addThrowable(final Throwable t) { this.nestedExcpetions.add(t) ; return this ; }//EOM @Override public String getMessage() { String origMsg = (super.getMessage() == null ? "" : super.getMessage() + "\n") ; final StringBuilder builder = new StringBuilder(origMsg). append(this.nestedExcpetions.size()).append(" Excpetions had occured:") ; for(Throwable nested : this.nestedExcpetions) { builder.append("\n--- Nested Exception ---\n").append(nested.getMessage()) ; }//EO while there are more exceptions return builder.toString() ; }//EOM @Override public void printStackTrace(PrintStream ps) { synchronized (ps) { super.printStackTrace(ps); for(Throwable nested : this.nestedExcpetions){ ps.println("--- Nested Exception ---"); nested.printStackTrace(ps); }//while there are more nested exceptions }//EO sync block }//EOM /** * Prints the stack trace of this exception and any nested * exception to the specified PrintWriter. * * @param pw The PrintWriter to print the stack trace to. * Must not be <code>null</code>. */ @Override public void printStackTrace(PrintWriter pw) { synchronized (pw) { super.printStackTrace(pw); for(Throwable nested : this.nestedExcpetions){ pw.println("--- Nested Exception ---"); nested.printStackTrace(pw); }//while there are more nested exceptions }//EO sync block }//EOM }//EO inner class NestedBuildException public static void main(String[] args) throws Throwable { final int iNoOfExistingRecords = 950000 ; final int iBatchSize = 10000 ; final int iNoOfchunks = (iNoOfExistingRecords+iBatchSize-1)/iBatchSize ; System.out.println(iNoOfchunks); }//EOM }//EOC