Publication.java example

Explorer

Mint-Athena-master
- WEB-INF
  - src
    - java
      - gr
        ntua
        ivml
        athena
        XSDDocumentationGeneration.java
        actions
        DoMapping.java
        Download.java
        GeneralAction.java
        Home.java
        Import.java
        ImportStatus.java
        ImportSummary.java
        ImportsPanel.java
        ItemPanel.java
        LockRelease.java
        LockSummary.java
        LoggedInInterceptor.java
        Login.java
        Logout.java
        ManagementAction.java
        Mapselection.java
        OAIHandler.java
        PreviewError.java
        PreviewReport.java
        Profile.java
        Publish.java
        PublishStatus.java
        Register.java
        Reminder.java
        ReportSummary.java
        ScriptTester.java
        Stats.java
        ThesaurusAction.java
        Transform.java
        TransformStatus.java
        XMLPreview.java
        package-info.java
        concurrent
        PublicationProcessor.java
        Queues.java
        Ticker.java
        UploadIndexer.java
        XSLTransform.java
        db
        AsyncNodeStore.java
        DAO.java
        DB.java
        DataUploadDAO.java
        GlobalPrefixStore.java
        LockManager.java
        MappingDAO.java
        OrganizationDAO.java
        PublicationDAO.java
        StatisticsDAO.java
        StatisticsRandomDAO.java
        TestSetup.java
        ThesaurusAssignmentDAO.java
        ThesaurusDAO.java
        TransformationDAO.java
        UserDAO.java
        XMLNodeDAO.java
        XmlObjectDAO.java
        XpathHolderDAO.java
        harvesting
        Harvester.java
        RepositoryValidator.java
        SingleHarvester.java
        concurrent
        HarvestingExecutor.java
        ScheduledHarvestingExecutor.java
        io
        FileImporter.java
        MultipleRecordsFileImporter.java
        MultipleRecordsZipImporter.java
        util
        Test.java
        XMLDbHandler.java
        xml
        schema
        AboutType.java
        DeletedRecordType.java
        DescriptionType.java
        GetRecordType.java
        GranularityType.java
        HeaderType.java
        IdentifyType.java
        ListIdentifiersType.java
        ListMetadataFormatsType.java
        ListRecordsType.java
        ListSetsType.java
        MetadataFormatType.java
        MetadataType.java
        OAIPMHerrorType.java
        OAIPMHerrorcodeType.java
        OAIPMHtype.java
        ObjectFactory.java
        RecordType.java
        RequestType.java
        ResumptionTokenType.java
        SetType.java
        StatusType.java
        VerbType.java
        package-info.java
        mapping
        JSONMappingHandler.java
        MappingElement.java
        MappingManager.java
        MappingSummary.java
        MappingVersionControl.java
        Schema.java
        persistent
        AccessAuthenticationLogic.java
        BlobWrap.java
        DataUpload.java
        EsePublication.java
        Lock.java
        Lockable.java
        Mapping.java
        Organization.java
        Publication.java
        ReportI.java
        SecurityEnabled.java
        Thesaurus.java
        ThesaurusAssignment.java
        Transformation.java
        User.java
        XMLNode.java
        XmlObject.java
        XpathHolder.java
        test
        ExportXmlTest.java
        IndexTest.java
        MappingTest.java
        NodeTest.java
        OrganizationTests.java
        PublicationTest.java
        StatisticTests.java
        TestDbConnection.java
        TestUploadIndexer.java
        TransformationTest.java
        Upload.java
        UploadDeleteTest.java
        UserTest.java
        util
        Config.java
        ConnectionCheckoutLog.java
        EncodingFilter.java
        ExternalSort.java
        HibernateSessionFilter.java
        Import.java
        MailSender.java
        NodeReader.java
        NodeStoreI.java
        OAIStats.java
        Publish.java
        SessionClose.java
        StringUtils.java
        TabbedStringComparator.java
        Transform.java
        TraversableI.java
        VisitorI.java
        xml
        Handler.java
        IndexingHandler.java
        SchemaValidator.java
        StandardIOHandler.java
        Statistics.java
        TreeGenerationParser.java
        UniqueXPathHandler.java
        transform
        XMLFormatter.java
        XMLNodeTransform.java
        XSLTGenerator.java
        XSLTransform.java
        util
        ElementValueMap.java
        Namespaces.java
        XPathUtils.java
        xsd
        XSDParser.java

package gr.ntua.ivml.athena.persistent;

import gr.ntua.ivml.athena.concurrent.Ticker;
import gr.ntua.ivml.athena.db.DB;
import gr.ntua.ivml.athena.util.Config;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.hibernate.Hibernate;

import de.schlichtherle.util.zip.ZipEntry;
import de.schlichtherle.util.zip.ZipOutputStream;

/**
 * This class summarizes all information needed to export a selection of
 * DataUploads. Need to encapsulate special target schema logic
 * Subclass? How do I do the Hibernate stuff ??
 * 
 * @author Arne Stabenau 
 *
 */
public class Publication {
	public static class NodeContainer {
		public static final int LIDOVS09 = 1;
		public static final int LIDOVS10 = 2;
		
		public XMLNode node;
		public int schema;	
	}
	
	
	public static class PathIterator implements Iterator<NodeContainer> {
		private static final XMLNode[] templatePage = new XMLNode[0];
		
		List<Transformation> transformations;
		Iterator<Transformation> iterTransform;
		String path;
		XpathHolder currentHolder;
		
		Transformation currentTransformation;
		XMLNode nextItem;
		XMLNode[] page;
		int nextInPage;
		
		
		public PathIterator( List<Transformation> l, String path ) {
			transformations = l;
			iterTransform = transformations.iterator();
			this.path = path;
			//next();
			nextItem = nextInPage();
		}
		
		@Override
		public boolean hasNext() {
			return nextItem != null;
		}

		
		private boolean nextHolder() {
			if( iterTransform.hasNext() ) {
				currentTransformation = iterTransform.next();
				XmlObject xo = currentTransformation.getParsedOutput();
				currentHolder =  xo.getRoot().getByRelativePath(path);
				if( currentHolder != null )
					log.debug( "Current transformation has " + currentHolder.getCount() + " items." );
			} else {
				currentHolder = null;
			}
			return currentHolder != null;
		}

		/**
		 * Retrieve next page from current holder or first from next
		 * @return if there is stuff left
		 */
		private boolean nextPage() {
			List<XMLNode> l = null;
			if( page != null )
				l= currentHolder.getNodes( page[page.length-1], 100);
			if(( page== null ) || ( l.size() == 0 ))  {
				nextHolder();
				if( currentHolder == null ) return false;
				l= currentHolder.getNodes( 0, 100);
				if( l.size() == 0 ) throw new RuntimeException( "Unexpected result, should have nodes");				
			}
			page = l.toArray(templatePage);
			nextInPage = 0;			
			return true;
		}
		
		private XMLNode nextInPage() {
			XMLNode result=null;
			if(( page==null ) || ( nextInPage==page.length )) {
				if( ! nextPage()) return null;
			} 
			result = page[nextInPage];
			nextInPage+=1;
			return result;
		}
		
		@Override
		public NodeContainer next() {
			NodeContainer result = new NodeContainer();
			result.node = nextItem;
			if( currentTransformation.getDataUpload().isLido10())
				result.schema = NodeContainer.LIDOVS10;
			else
				result.schema = NodeContainer.LIDOVS09;
			nextItem = nextInPage();
			return result;
		}

		@Override
		public void remove() {
			throw new NoSuchMethodError();
		}
		
	}
	
	public static final Logger log = Logger.getLogger( Publication.class );
	public static final int ERROR=-1;
	public static final int OK=0;
	public static final int IDLE=1;
	public static final int CONSOLIDATE=2;
	public static final int VERSION=3;
	public static final int POSTPROCESS=4;
	
	Long dbID;
	
	// all affected DataUpload objects
	List<DataUpload> inputUploads = new ArrayList<DataUpload>();
	
	// example stats on the this publication, more could be collected
	long itemCount;
	
	// which user did the publication
	User publishingUser;
	Organization publishingOrganization;
	
	// status information on the progress of publication
	String statusMessage;
	int statusCode;
	String report;
	
	// when the publication was initiated
	Date lastProcess;
	
	// the final output in zipped form
	// either one or many files, possible millions
	BlobWrap zippedOutput;
	
	// name of output. With this the correct Transformations are selected
	String targetSchema;

	// transient only valid while in progress
	File workdir;
	File tmpFile;
	
	public Long getDbID() {
		return dbID;
	}

	public void setDbID(Long dbID) {
		this.dbID = dbID;
	}

	public List<DataUpload> getInputUploads() {
		return inputUploads;
	}

	public void setInputUploads(List<DataUpload> inputUploads) {
		this.inputUploads = inputUploads;
	}

	public long getItemCount() {
		return itemCount;
	}

	public void setItemCount(long itemCount) {
		this.itemCount = itemCount;
	}

	public User getPublishingUser() {
		return publishingUser;
	}

	public void setPublishingUser(User publishingUser) {
		this.publishingUser = publishingUser;
	}

	public Organization getPublishingOrganization() {
		return publishingOrganization;
	}

	public void setPublishingOrganization(Organization publishingOrganization) {
		this.publishingOrganization = publishingOrganization;
	}

	public String getStatusMessage() {
		return statusMessage;
	}

	public void setStatusMessage(String statusMessage) {
		this.statusMessage = statusMessage;
	}

	public int getStatusCode() {
		return statusCode;
	}

	public void setStatusCode(int statusCode) {
		this.statusCode = statusCode;
	}

	public Date getLastProcess() {
		return lastProcess;
	}

	public void setLastProcess(Date lastProcess) {
		this.lastProcess = lastProcess;
	}

	
	
	public String getReport() {
		return report;
	}

	public void setReport(String report) {
		this.report = report;
	}

	public BlobWrap getZippedOutput() {
		return zippedOutput;
	}

	public void setZippedOutput(BlobWrap zippedOutput) {
		this.zippedOutput = zippedOutput;
	}

	public String getTargetSchema() {
		return targetSchema;
	}

	public void setTargetSchema(String targetSchema) {
		this.targetSchema = targetSchema;
	}
	
	/**
	 * Call this to check if the publication is still valid.
	 * It should check whether changes in the input data (transformations, mappings)
	 * are not reflected here.
	 * 
	 * return true if Publication is still valid. 
	 */
	public boolean validate() {
		// go through all relevant transformations and check if any have 
		// dates after the process of this Publication.
		
		return true;
	}
	
	/**
	 * Check if the current state is still valid.
	 * Check if a new processing round has to be done.
	 * Do it (version, apply changes, pullup of changes, consolidate in one xml-object )
	 */
	public void process() {
		File consolidated = null;
		File processed = null;
		try {
			if( !upToDateCheck()) {
				version();
				applyChanges();
				consolidated = consolidate();

				processed = postProcess( consolidated );

				writeBack( processed );
				setLastProcess(new Date());
				setStatusCode(OK);
				setStatusMessage("Processed and ready for download");
			}
		} catch( Exception e ) {
			if( getStatusCode() != ERROR ) {
				setStatusCode(ERROR);
				setStatusMessage("Publication processing failed with: " + e.getMessage());
			}
			// didn't work, remove transformations from upload
			getInputUploads().clear();
			log.error( "processing of Publication failed.", e );
		} finally {
			if( consolidated != null ) consolidated.delete();
			if( processed != null ) processed.delete();
			DB.commit();
		}
	}
	
	/**
	 * Convenience function to remove an upload. No processing is started.
	 * @param du
	 */
	public void removeUpload( DataUpload du ) {
		Iterator<DataUpload> i = getInputUploads().iterator();
		while( i.hasNext() ) {
			DataUpload du2 = i.next();
			if( du2.getDbID() == du.getDbID()) {
				i.remove();
				return;
			}
		}
	}
	
	/**
	 * 
	 * @param du
	 */
	public boolean containsUpload( DataUpload du ) {
		Iterator<DataUpload> i = getInputUploads().iterator();
		while( i.hasNext() ) {
			DataUpload du2 = i.next();
			if( du2.getDbID() == du.getDbID()) {
				return true;
				
			}
		}
		return false;
	}
	
	/**
	 * Convenience function to remove an upload, no reprocessing is started.
	 * @param du
	 */
	public void addUpload( DataUpload du ) {
		getInputUploads().add( du );
	}
	
	public List<Transformation> getTransformations() throws Exception {
		ArrayList<Transformation> al = new ArrayList<Transformation>();
		// input uploads need sorting
		List<DataUpload> l = getInputUploads();
		Collections.sort(l, new Comparator<DataUpload>() {
			public int compare( DataUpload a, DataUpload b ) {
				if( a.getUploadDate().before(b.getUploadDate())) return -1;
				if( a.getUploadDate().after( b.getUploadDate())) return 1;
				return 0;
			}
		});
		for( DataUpload du: l ) {
			boolean hasTransformation = false;
			for( Transformation tr: du.getTransformations()) {
				String target1 = tr.getMapping().getTargetSchema();
				if( target1 != null ) {
					if( target1.equals( getTargetSchema())) {
						if( tr.getStatusCode() == Transformation.OK) {
							al.add( tr );
							hasTransformation = true;
						}
					}
				} else { 
					if( getTargetSchema() == null ) {
						if( tr.getStatusCode() == Transformation.OK) {
							al.add( tr );
							hasTransformation = true;
						}						
					}
				}
			}
			if( ! hasTransformation ) throw new Exception( "Upload has no suitable Transformation" );
		}
		return al;
	}

	// Section with real work
	
	/**
	 * All involved Transformations happened before the last process date.
	 * @return
	 */
	public boolean upToDateCheck() throws Exception {
		List<Transformation> l = getTransformations();
		Date lastProcess = getLastProcess();
		if( lastProcess == null ) return false;
		for( Transformation tr: l ) {
			if( ! tr.getEndTransform().before( lastProcess ))
				return false;
		}
		// all happened before last process, we are up to date
		return true;
	}
	
	/**
	 * Create the List of items with available newer versions.
	 * @throws Exception
	 */
	public void version() throws Exception {
		// do nothing for now
	}
	
	/**
	 * Apply the changeset to the latest version of an item.
	 * @throws Exception
	 */
	public void applyChanges() throws Exception {
		// do nothing for now
	}
	
	/**
	 * Create the output XML object / files (not sure yet)
	 * @throws Exception
	 */
	public File consolidate() throws Exception {
		long counter = 0l;
		File consolidated = File.createTempFile("consolidated", ".zip");
		log.debug( "Consolidate into " + consolidated.getAbsolutePath());
		ZipOutputStream zos = new ZipOutputStream(new FileOutputStream( consolidated ));
		Iterator<NodeContainer> i = itemize();
		// every 20 seconds is set
		Ticker t = new Ticker( 20 );
		while( i.hasNext() ) {
			NodeContainer nc = i.next();
			
			XMLNode n = nc.node;
			XMLNode tree = DB.getXMLNodeDAO().wrappedDOMTree(n);
			counter += 1;
			String entryname;
			if( nc.schema == NodeContainer.LIDOVS09) 
				entryname = "lido09_"+n.getNodeId() + ".xml";
			else 
				entryname = "lido10_"+n.getNodeId() + ".xml";
			zos.putNextEntry(new ZipEntry(entryname ));
			PrintWriter pw = new PrintWriter( new OutputStreamWriter(zos, "UTF8" ) {
				public void close() {};
			} );
			tree.toXml(pw);
			pw.flush();
			zos.closeEntry();
			if( t.isSet() ) {
				t.reset();
				log.debug( "Consolidated " + counter + " items for " + getPublishingOrganization().getName() );
				setStatusMessage("Consolidated " + counter + " items." );
				DB.commit();
			}
			DB.getSession().evict(n);
		}
		zos.flush();
		zos.close();
		t.cancel();
		log.debug( "Consolidated " + counter + " items for " + getPublishingOrganization().getName() );
		return consolidated;
	}
	
	/**
	 * Do some processing after the consolidation??
	 * @throws Exception
	 */
	public File postProcess( File input ) throws Exception {
		return input;
	}
	
	/**
	 * Not sure what I need this yet. Somehow the ZIP approach to store item XML
	 * might be performance desaster, I don't trust it will last. Final version
	 * probably is memory mapped big file with index file. 
	 * @return
	 * @throws IOException
	 */
	public static File createTempDirectory() throws IOException {
		final File temp;

		temp = File.createTempFile("temp", Long.toString(System.nanoTime()));

		if(!(temp.delete()))
		{
			throw new IOException("Could not delete temp file: " + temp.getAbsolutePath());
		}

		if(!(temp.mkdir()))
		{
			throw new IOException("Could not create temp directory: " + temp.getAbsolutePath());
		}

		return (temp);
	}

	
	/**
	 * Write all input items into the workdir in some form. It could be a million, so files in one dir
	 * is not a good approach ( doesn't scale ).
	 *  - try zip archive??
	 *  - subdir approach ?
	 *  - one file with index file ?
	 *  
	 * @throws Exception
	 */
	public Iterator<NodeContainer> itemize() throws Exception {
		return null;
	}
	
	/**
	 * This method write a line with different mapping of item and item parts
	 * to sortable (float, string) values. Sorting by these values and comparing
	 * adjacient values establishes how similar the items
	 * are for that indexed value and this similarity will be used for item to item
	 * scoring.
	 * 
	 * Standard is to write item_id and checksum out, so that duplication can be eliminated.
	 * 
	 * @param tree
	 * @param out
	 */
	public void writeIndexLine( XMLNode tree, PrintWriter out ) {
		out.write( tree.getNodeId()+"\t" );
		out.write( tree.getChecksum() + "\n");
	}
	
	/**
	 * Score two values in given column against each other. If they are considered too far
	 * for scoring throw Exception to advance the scoring window.
	 * 
	 * Default is identity scoring (1 if identical, Exception if not)
	 * @param column
	 * @param item1Value
	 * @param item2Value
	 * @return
	 * @throws Exception
	 */
	public float partialScore( int column, String item1Value, String item2Value ) throws
		Exception {
		if( item1Value.equals( item2Value )) return 1f;
		throw new Exception();
	}
	
	/**
	 * If a column is numeric, the sorting will happen numerically otherwise
	 * lexically. Usually indices will be numeric unless the score is an identity check.
	 * 
	 * The default is the identity check index.
	 * @param column
	 * @return
	 */
	public boolean isNumericIndex( int column ) {
		return false;
	}
	
	/**
	 * First creates the scores for each attribute that needs to be scored.
	 * Then combines the scores for each pair. 
	 */
	public void buildScoringMatrix() {
		/*
		 * for column=1 to index count
		 *   sort index file by column
		 *   create scoring file
		 *   for line in index file
		 *     addLineToWindow( window, line, scoreColumn, scoreWriter )
		 * sort scoring files on first and second id
		 * open each scoring file and walk through, calling the 
		 * score accumulating function with all scores for one pair of 
		 * ids.
		 */
		
	}
	
	/**
	 * Scores added line against all other lines in the window and removes
	 * lines from the window that are no longer in scoring range.
	 * Writes scores to partialScores and assumes tab delimited files with
	 * node id on first position (0-column)
	 * @param window
	 * @param line
	 * @param scoreColumn
	 * @param partialScores
	 */
	private void addLineToWindow( LinkedList<String[]> window, String line, int scoreColumn, PrintWriter partialScores ) {
		String[] fields = line.split("\\t");
		window.add(fields);
		ListIterator<String[]> i = window.listIterator();
		while( i.nextIndex() < window.size()-1 ) {
			String comp[] = i.next();
			try {
				float score = partialScore( scoreColumn, fields[scoreColumn], comp[scoreColumn]);
				long id1 = Long.parseLong(fields[0]);
				long id2 = Long.parseLong( comp[0] );
				if( id1 < id2 )
					partialScores.println( fields[0]+ "\t"+ comp[0] + "\t" + score );
				else
					partialScores.println( comp[0]+ "\t"+ fields[0] + "\t" + score );
			} catch( Exception e ) {
				// this line is out of the window
				i.remove();
			}
		}		
	}
	
	
	/**
	 * How many partial scores do you want to produce? Default is 
	 * 1 for the checksum identity of item.
	 * @return
	 */
	public int getIndexCount() {
		return 1;
	}
	
	/**
	 * The given file (which needs to be a ZIP archive) is written back as 
	 * BLOB to the database.
	 * @param result
	 */
	public void writeBack( File result ) {
		try {
		zippedOutput = new BlobWrap();
		zippedOutput.data = Hibernate.createBlob( new FileInputStream( result ), (int) result.length());
		setStatusCode(OK);
		DB.commit();
		// result.delete();
		} catch( Exception e  ) {
			log.error( "Writeback failed!", e );
			try {
				setStatusCode(ERROR);
				setStatusMessage(e.getMessage());
				DB.commit();
			} catch( Exception e2 ) {
				log.error( "Status update failed as well!!", e2 );
			}
		}
	}
	
	public File getTmpFile(){
		return this.tmpFile;
	}
	
	public void unloadToTmpFile() {
		try {
			tmpFile = File.createTempFile("unloadPublication", ".zip");
			tmpFile.deleteOnExit();
			log.info( "Unloading to " + tmpFile.getAbsolutePath());
			FileOutputStream fos = new FileOutputStream( tmpFile );
			BufferedOutputStream bos = new BufferedOutputStream( fos,4096 );
			
			InputStream is = getZippedOutput().getData().getBinaryStream();
			IOUtils.copy(is, bos);
			is.close();
			bos.flush();
			bos.close();
			DB.commit();
		} catch( Exception e ) {
			log.error( "Cannot copy BLOB to tmp file", e );
		}
	}
	
	/**
	 * Returns a stream to a zip archive. Please cleanup after finished with the Stream. 
	 * @return
	 */
	public InputStream getDownloadStream() {
		InputStream is = null;		
		if( tmpFile == null )
			unloadToTmpFile();
		try {
			is = new FileInputStream(tmpFile);
		} catch( Exception e ) {
			log.error( "File unload problem", e);
		}
		return is;
	}

	/**
	 * delete the tmp file after using the Download Stream. This will be automated later.
	 */
	public void cleanup() {
		tmpFile.delete();
	}

}

/*
 * How should the process work?
 *  a) Collect all the items from the transformations, building an index of each item, which should allow for the following:
 *    - access each item 
 *    - score items against each other, the index might contain many columns with scores on certain metrics
 *      scores between items are only build from neighboring items in the index (avoid n^2 complexity)
 *    - the collection is happening as XML in files! - current approach, one ZIP archive, but this might not work for
 *      millions of items
 *    
 *  b) .. skip other steps so far ..
 *  c) post process by XSL transform to ESE
 *  d) final result is uploaded as ZIP archive to database. 
*/