package gr.ntua.ivml.athena.persistent;
import gr.ntua.ivml.athena.concurrent.Queues;
import gr.ntua.ivml.athena.concurrent.Ticker;
import gr.ntua.ivml.athena.db.DB;
import gr.ntua.ivml.athena.persistent.Transformation.MyZipOutputStream;
import gr.ntua.ivml.athena.util.Config;
import gr.ntua.ivml.athena.xml.SchemaValidator;
import gr.ntua.ivml.athena.xml.transform.XMLFormatter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.util.Enumeration;
import java.util.Iterator;
import javax.xml.validation.Schema;
import javax.xml.validation.ValidatorHandler;
import org.apache.commons.io.FileUtils;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import de.schlichtherle.util.zip.ZipEntry;
import de.schlichtherle.util.zip.ZipFile;
/**
* Subclass with the logic for the lido athena logic.
* Uses superclass where convenient.
*
* The mapping will rely on the name of the targetSchema, see
* Publication.hbm.xml
* @author Arne Stabenau
*
*/
public class EsePublication extends Publication {
File toProcess;
String xsl09, xsl10;
gr.ntua.ivml.athena.xml.transform.XSLTransform t = new gr.ntua.ivml.athena.xml.transform.XSLTransform();
private static class Counter {
int count = 0;
void inc() { count += 1; }
int get() { return count; }
void reset() { count = 0; };
};
@Override
public Iterator<NodeContainer> itemize() throws Exception {
return new PathIterator( getTransformations(), "/lidoWrap/lido");
}
/**
* Convert from Lido to ESE.
*/
public File postProcess( File input ) throws Exception {
File result = File.createTempFile("PubPostProcess", ".zip");
MyZipOutputStream postProcessOutput = new MyZipOutputStream(new FileOutputStream(result));
setStatusCode(POSTPROCESS);
setStatusMessage("Starting post process" );
DB.commit();
File xslFile = new File( Config.getRealPath(Config.get( "lido_to_ese_xsl" )));
xsl09 = FileUtils.readFileToString( xslFile , "UTF-8");
xslFile = new File( Config.getRealPath( Config.get( "lido1.0_to_ese_xsl" )));
xsl10 = FileUtils.readFileToString( xslFile , "UTF-8");
ZipFile bz=null;
try {
bz = new ZipFile( input );
int count = bz.size();
log.info( "Postprocessing " + count + " items started.");
int eseItems = 0;
int lidoItems = 0;
int failed = 0;
Enumeration<ZipEntry> entries = bz.entries();
StringBuilder processReport = new StringBuilder();
if( report != null ) processReport.append(report);
long errorNodeId = -1l;
String errorSrc = "";
// report every 20 seconds on progress
Ticker t = new Ticker( 20 );
// prepare the validating parser and handler
Schema eseSchema = SchemaValidator.getEseSchema();
ValidatorHandler vh = eseSchema.newValidatorHandler();
final Counter eseCount = new Counter();
ContentHandler eseCountHandler = new DefaultHandler() {
public void startElement( String uri, String localName, String qName, Attributes atts ) {
if( "record".equals( localName ) || "record".equals( qName )) eseCount.inc();
}
};
vh.setContentHandler(eseCountHandler);
XMLReader parser = org.xml.sax.helpers.XMLReaderFactory.createXMLReader();
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
parser.setContentHandler(vh);
while( entries.hasMoreElements() ) {
ZipEntry ze = (de.schlichtherle.util.zip.ZipEntry) entries.nextElement();
InputStream zis = bz.getInputStream(ze);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
if( ze.isDirectory() ) continue;
if( !ze.getName().endsWith("xml")) continue;
lidoItems += 1;
String num = ze.getName();
String xsl;
if( num.startsWith("lido09_"))
xsl = xsl09;
else
xsl = xsl10;
num = num.substring(7, num.length()-4);
errorNodeId= Long.parseLong(num);
errorSrc = "Lido_to_Ese transformation";
transformEntry(ze.getName(), zis, bos,xsl);
errorSrc = "Ese validation";
InputSource ins = new InputSource();
ins.setByteStream(new ByteArrayInputStream(bos.toByteArray()));
// check ese is valid
// I hope this throws when things are not valid
parser.parse( ins );
postProcessOutput.putNextEntry(new ZipEntry( ze.getName()));
bos.writeTo(postProcessOutput);
postProcessOutput.close();
eseItems += eseCount.get();
// some quick way out if things don't go well
if(( lidoItems > 100 ) && (eseItems == 0 )) {
setReport( processReport.toString());
DB.commit();
throw new Exception( "Publication aborted, no ese records are produced.");
}
} catch( Exception e ) {
failed += 1;
if( processReport.length() < 50000 ) {
if( errorNodeId != -1l ) {
String[] ids = resolveNode( errorNodeId );
processReport.append( "\nItem " + ze.getName() );
processReport.append( " URL:(PreviewError?nodeId="+errorNodeId+"&transformationId="+ids[1]);
processReport.append( "&uploadId="+ids[2]+"&errorSrc="+errorSrc +")" );
processReport.append( " had problems: \n" );
processReport.append( e.getMessage() + "\n");
} else {
// not related to a specific node, we are done with an error
setReport( processReport.toString());
DB.commit();
throw e;
}
}
if(( lidoItems == 100 ) && (eseItems == 0 )) {
setReport( processReport.toString());
DB.commit();
throw new Exception( "Publication aborted after 100 consecutive failures.");
}
} finally {
eseCount.reset();
}
if( t.isSet() ) {
t.reset();
setStatusMessage( "Postprocessed " + lidoItems + " items of " + count + " (failed " + failed + ")");
log.debug( "Postprocessed " + lidoItems + " items of " + count + " (failed " + failed + ")");
DB.commit();
}
}
if( eseItems > 0 ) {
processReport.append( "\nTransformed " + lidoItems + " lido records to " + eseItems + " ese records.\n" );
setItemCount(eseItems);
if( failed != 0 ) {
processReport.append( failed + " items were excluded due to problems.\n" );
}
postProcessOutput.putNextEntry( new ZipEntry( "lido_to_ese_report.txt" ));
postProcessOutput.write( processReport.toString().getBytes("UTF-8"));
postProcessOutput.close();
postProcessOutput.finished();
postProcessOutput = null;
log.info( "Finished creating " + result.getAbsolutePath());
setStatusMessage( "Postprocessed " + lidoItems + " items.");
setReport( processReport.toString());
DB.commit();
} else {
setReport( processReport.toString());
throw new Exception( "No item could be transformed!" );
}
// not sure this is needed
t.cancel();
return result;
} catch( Exception e ) {
log.error( "General post processing problem ", e );
if( getStatusCode() != ERROR ) {
setStatusMessage(e.getMessage());
setStatusCode(ERROR);
DB.commit();
}
throw e;
} finally {
if( bz!= null ) bz.close();
if( postProcessOutput != null ) postProcessOutput.finished();
}
}
public void transformEntry(String name, InputStream is, OutputStream bos, String xsl) throws Exception {
// transform into pipe (pos) and format from pipe (pis) into the zipped output (os)
final OutputStream os = bos;
PipedOutputStream pos = new PipedOutputStream();
final PipedInputStream pis = new PipedInputStream( pos );
Runnable formatter = new Runnable() {
public void run() {
XMLFormatter.format(pis, os);
}
};
try {
Queues.queue(formatter, "now");
t.transform(is, xsl, pos );
pos.flush();
pos.close();
Queues.join( formatter );
} finally {
os.close();
pis.close();
pos.close();
}
}
/**
* Get the relevant data for this node
* - xml object id
* - transformation_id
* - upload_id
* @param nodeId
* @return Strings with the ids
*/
String[] resolveNode( long nodeId ) {
String[] res = new String[3];
res[0] = "-1";
res[1] = "-1";
res[2] = "-1";
XmlObject xo = DB.getXmlObjectDAO().findByNodeId(nodeId);
if( xo != null ) {
res[0] = xo.getDbID().toString();
Transformation tr = DB.getTransformationDAO().findByXmlObject( xo );
if( tr != null ) {
res[1] = tr.getDbID().toString();
DataUpload du = tr.getDataUpload();
res[2] = du.getDbID().toString();
}
}
return res;
}
}