package gr.ntua.ivml.mint.persistent; import gr.ntua.ivml.mint.db.DB; import gr.ntua.ivml.mint.util.InputIterator; import gr.ntua.ivml.mint.util.Tuple; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.Date; import java.util.Iterator; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; import org.hibernate.Hibernate; /** * This class summarizes all information needed to export a selection of * DataUploads. Need to encapsulate special target schema logic * Subclass? How do I do the Hibernate stuff ?? * * @author Arne Stabenau * */ public class Publication { public static final Logger log = Logger.getLogger( Publication.class ); public static final int ERROR=-1; public static final int OK=0; public static final int IDLE=1; public static final int CONSOLIDATE=2; public static final int VERSION=3; public static final int PROCESS=4; public static final int POSTPROCESS=5; Long dbID; // all affected DataUpload objects List<DataUpload> inputUploads = new ArrayList<DataUpload>(); // example stats on the this publication, more could be collected long itemCount; // which user did the publication User publishingUser; Organization publishingOrganization; // status information on the progress of publication String statusMessage; int statusCode; String report; // when the publication was initiated Date lastProcess; // the final output in zipped form // either one or many files, possible millions BlobWrap zippedOutput; // name of output. With this the correct Transformations are selected String targetSchema; // transient only valid while in progress File tmpFile; public Long getDbID() { return dbID; } public void setDbID(Long dbID) { this.dbID = dbID; } public List<DataUpload> getInputUploads() { return inputUploads; } public void setInputUploads(List<DataUpload> inputUploads) { this.inputUploads = inputUploads; } public long getItemCount() { return itemCount; } public void setItemCount(long itemCount) { this.itemCount = itemCount; } public User getPublishingUser() { return publishingUser; } public void setPublishingUser(User publishingUser) { this.publishingUser = publishingUser; } public Organization getPublishingOrganization() { return publishingOrganization; } public void setPublishingOrganization(Organization publishingOrganization) { this.publishingOrganization = publishingOrganization; } public String getStatusMessage() { return statusMessage; } public void setStatusMessage(String statusMessage) { this.statusMessage = statusMessage; } public int getStatusCode() { return statusCode; } public void setStatusCode(int statusCode) { this.statusCode = statusCode; } public Date getLastProcess() { return lastProcess; } public void setLastProcess(Date lastProcess) { this.lastProcess = lastProcess; } public Iterator<Tuple<DataUpload, XMLNode>> inputItemIterator() { return new InputIterator( getInputUploads().iterator()); } public String getReport() { return report; } public void setReport(String report) { this.report = report; } public void appendReport( String report ) { this.report += report; } public void appendReport( String report, int limit ) { if( this.report.length() < limit ) { this.report += report; } } public BlobWrap getZippedOutput() { return zippedOutput; } public void setZippedOutput(BlobWrap zippedOutput) { this.zippedOutput = zippedOutput; } public String getTargetSchema() { return targetSchema; } public void setTargetSchema(String targetSchema) { this.targetSchema = targetSchema; } /** * Call this to check if the publication is still valid. * It should check whether changes in the input data (transformations, mappings) * are not reflected here. * * return true if Publication is still valid. */ public boolean validate() { // go through all relevant transformations and check if any have // dates after the process of this Publication. return true; } /** * Check if the current state is still valid. * Check if a new processing round has to be done. * Do it (version, apply changes, pullup of changes, consolidate in one xml-object ) */ public void process() { /*should be customized for every project */ try { setStatusCode(OK); setStatusMessage("Customized message"); setReport(""); } catch( Exception e ) { if( getStatusCode() != ERROR ) { setStatusCode(ERROR); setStatusMessage("Publication processing failed with: " + e.getMessage()); } // didn't work, remove transformations from upload getInputUploads().clear(); log.error( "processing of Publication failed.", e ); } finally { DB.commit(); } } /** * Overwrite if you want specific behavior in your project. * Is called when the Publication is removed ... */ public void unpublish() { } /** * Convenience function to remove an upload. No processing is started. * @param du */ public void removeUpload( DataUpload du ) { Iterator<DataUpload> i = getInputUploads().iterator(); while( i.hasNext() ) { DataUpload du2 = i.next(); if( du2.getDbID() == du.getDbID()) { i.remove(); return; } } } /** * * @param du */ public boolean containsUpload( DataUpload du ) { Iterator<DataUpload> i = getInputUploads().iterator(); while( i.hasNext() ) { DataUpload du2 = i.next(); if( du2.getDbID() == du.getDbID()) { return true; } } return false; } /** * Convenience function to add an upload, no reprocessing is started. * @param du */ public void addUpload( DataUpload du ) { getInputUploads().add( du ); } /** * Create the List of items with available newer versions. * @throws Exception */ public void version() throws Exception { // do nothing for now } /** * Apply the changeset to the latest version of an item. * @throws Exception */ public void applyChanges() throws Exception { // do nothing for now } public File postProcess( File input ) throws Exception { return input; } public Iterator<XMLNode> itemize() throws Exception { return null; } public long sumInputItems() { long result = 0l; try { for( DataUpload du: getInputUploads()) { result += du.getItemCount(); } } catch( Exception e ) { log.error( "Exception during item counting.", e ); return -1l; } return result; } /** * The given file (which needs to be a ZIP archive) is written back as * BLOB to the database. * @param result */ public void writeBack( File result ) { try { zippedOutput = new BlobWrap(); zippedOutput.data = Hibernate.createBlob( new FileInputStream( result ), (int) result.length()); setStatusCode(OK); DB.commit(); // result.delete(); } catch( Exception e ) { log.error( "Writeback failed!", e ); try { setStatusCode(ERROR); setStatusMessage(e.getMessage()); DB.commit(); } catch( Exception e2 ) { log.error( "Status update failed as well!!", e2 ); } } } public File getTmpFile(){ return this.tmpFile; } public void unloadToTmpFile() { try { tmpFile = File.createTempFile("unloadPublication", ".zip"); tmpFile.deleteOnExit(); log.info( "Unloading to " + tmpFile.getAbsolutePath()); FileOutputStream fos = new FileOutputStream( tmpFile ); BufferedOutputStream bos = new BufferedOutputStream( fos,4096 ); InputStream is = getZippedOutput().getData().getBinaryStream(); IOUtils.copy(is, bos); is.close(); bos.flush(); bos.close(); DB.commit(); } catch( Exception e ) { log.error( "Cannot copy BLOB to tmp file", e ); } } /** * Returns a stream to a zip archive. Please cleanup after finished with the Stream. * @return */ public InputStream getDownloadStream() { InputStream is = null; if( tmpFile == null ) unloadToTmpFile(); try { is = new FileInputStream(tmpFile); } catch( Exception e ) { log.error( "File unload problem", e); } return is; } /** * delete the tmp file after using the Download Stream. This will be automated later. */ public void cleanup() { tmpFile.delete(); } } /* * How should the process work? * a) Collect all the items from the transformations, building an index of each item, which should allow for the following: * - access each item * - score items against each other, the index might contain many columns with scores on certain metrics * scores between items are only build from neighboring items in the index (avoid n^2 complexity) * - the collection is happening as XML in files! - current approach, one ZIP archive, but this might not work for * millions of items * * b) .. skip other steps so far .. * c) post process by XSL transform to ESE * d) final result is uploaded as ZIP archive to database. */