package gr.ntua.ivml.mint.persistent;
import gr.ntua.ivml.mint.concurrent.Ticker;
import gr.ntua.ivml.mint.db.DB;
import gr.ntua.ivml.mint.util.Config;
import gr.ntua.ivml.mint.xml.PathIterator;
import gr.ntua.ivml.mint.xsd.SchemaValidator;
import gr.ntua.ivml.mint.persistent.Transformation.MyZipOutputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import javax.xml.validation.Schema;
import javax.xml.validation.ValidatorHandler;
import org.apache.commons.io.FileUtils;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import de.schlichtherle.io.FileOutputStream;
import de.schlichtherle.util.zip.ZipEntry;
/**
* What is done when publishing to given schema. Set publication xsl in conf file of transformation schema.
*/
public class SchemaPublication extends Publication {
gr.ntua.ivml.mint.xml.transform.XSLTransform transformXSL = new gr.ntua.ivml.mint.xml.transform.XSLTransform();
private static class Counter {
int count = 0;
void inc() { count += 1; }
int get() { return count; }
void reset() { count = 0; };
};
public List<Transformation> getTransformations() throws Exception {
ArrayList<Transformation> al = new ArrayList<Transformation>();
// input uploads need sorting
List<DataUpload> l = getInputUploads();
Collections.sort(l, new Comparator<DataUpload>() {
public int compare( DataUpload a, DataUpload b ) {
if( a.getUploadDate().before(b.getUploadDate())) return -1;
if( a.getUploadDate().after( b.getUploadDate())) return 1;
return 0;
}
});
// make the List of Transformations
List<Transformation> lt = new ArrayList<Transformation>();
boolean hasTransformation = false;
for(DataUpload du: getInputUploads()) {
for( Transformation tr: du.getTransformations()) {
al.add( tr );
hasTransformation = true;
break;
}
}
if( ! hasTransformation ) throw new Exception( "Upload has no suitable Transformation" );
return al;
}
public void process() {
File processed = null;
try {
processed = postProcess();
writeBack( processed );
setLastProcess(new Date());
setStatusCode(Publication.OK);
setStatusMessage("Processed and ready for download");
} catch( Exception e ) {
if( getStatusCode() != ERROR ) {
setStatusCode(ERROR);
setStatusMessage("Publication processing failed with: " + e.getMessage());
}
// didn't work, remove transformations from upload
getInputUploads().clear();
log.error( "processing of Publication failed.", e );
} finally {
if( processed != null ) processed.delete();
DB.commit();
}
}
/**
* Iterate over involved uploads and
* - check if they have a successful transformation
* - and each transformation has xsl
* - collect all items in a zip together xsl and output.
*/
public File postProcess() throws Exception {
Ticker t = new Ticker(30);
long currentItemNo = 1l;
tmpFile = File.createTempFile("final_pub_", ".zip" );
MyZipOutputStream zos = new MyZipOutputStream(new FileOutputStream(tmpFile));
List<DataUpload> toberemovedUploads = new ArrayList<DataUpload>();
try {
//parser = new Builder();
setStatusCode(Publication.PROCESS);
DB.commit();
StringBuilder processReport = new StringBuilder();
if( report != null ) processReport.append(report);
long totalItemCount = sumInputItems();
setStatusMessage("Publishing " + totalItemCount + " input items.");
if( tmpFile != null ) {
tmpFile.delete();
}
Schema publicationSchema = SchemaValidator.getSchema( Config.getRealPath(Config.get("publicationSchema")));
ValidatorHandler vh = publicationSchema.newValidatorHandler();
final Counter publishedCount = new Counter();
ContentHandler publishedCountHandler = new DefaultHandler() {
public void startElement( String uri, String localName, String qName, Attributes atts ) {
if( "record".equals( localName ) || "record".equals( qName )) publishedCount.inc();
}
};
vh.setContentHandler(publishedCountHandler);
XMLReader parser = org.xml.sax.helpers.XMLReaderFactory.createXMLReader();
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
parser.setContentHandler(vh);
log.debug( "Publication: " + getDbID() + " Items:" + currentItemNo + "/" + totalItemCount );
int publishedItems = 0;
int transformedItems = 0;
int failed = 0;
for( DataUpload du: getInputUploads()) {
Transformation tr = null;
for( Transformation tr2: du.getTransformations()) {
if(( tr2.getStatusCode() == Transformation.OK ) )
tr = tr2;
}
if( tr == null )
{
processReport.append("Upload " + du.getOriginalFilename() + " does not contain suitable Transformation!\n\n");
throw new Exception( "Upload " + du.getOriginalFilename() + " does not contain suitable Transformation!");
}
String xslfile="";
XmlSchema xsch;
if(du.isDirect()){xslfile=du.getDirectSchema().getPublicationXSL();xsch=du.getDirectSchema();}
else{xslfile=tr.getMapping().getTargetSchema().getPublicationXSL();xsch=tr.getMapping().getTargetSchema();}
if(xslfile==null){
throw new Exception( "Can't find a publication target schema for upload " + du.getOriginalFilename() + ".");
}
File xslFile = new File(Config.getXSLPath(xslfile));
String xsl = FileUtils.readFileToString( xslFile , "UTF-8");
Iterator<XMLNode> iter = PathIterator.fromTransform(tr,xsch);
long errorNodeId = -1l;
int published_transformed=0;
int maxerror=100;
int cur_transformed=0;
if(du.getItemCount()<100){maxerror=(int)du.getItemCount();}
while( iter.hasNext()) {
transformedItems++;
cur_transformed++;
OutputStreamWriter writer = new OutputStreamWriter( zos, "UTF8" );
try{
XMLNode node = iter.next();
errorNodeId=node.getNodeId();
XMLNode wrappedNode = XMLNode.buildItemWrapTree(node);
String transformedItem=transformItemEntry(wrappedNode.toXml(),xsl);
//now parse output to see if correct
InputSource ins = new InputSource();
ins.setByteStream(new ByteArrayInputStream(transformedItem.getBytes("UTF-8")));
// check publication is valid
parser.parse( ins );
publishedItems += publishedCount.get();
if(publishedCount.get()>0){
published_transformed++;
}
zos.putNextEntry(new ZipEntry( "output_"+node.getNodeId()+".xml"));
writer.write(transformedItem);
writer.flush();
zos.closeEntry();
zos.close();
} catch( Exception e ) {
failed += 1;
if( processReport.length() < 50000 ) {
if( errorNodeId != -1l ) {
processReport.append( "\nItem output_" +errorNodeId+".xml from import '"+du.getOriginalFilename()+"'" );
processReport.append( " URL:(PreviewError?transformedNodeId="+errorNodeId+")");
processReport.append( " had problems: \n" );
processReport.append( e.getMessage() + "\n");
} else {
// not related to a specific node, we are done with an error
setReport( processReport.toString());
DB.commit();
throw e;
}
}
if(( cur_transformed == maxerror ) && (published_transformed == 0 )) {
processReport.append("\n\nPublication aborted for import '"+du.getOriginalFilename()+"' after "+maxerror+" consecutive failures.\n\n");
failed=failed+(int)(du.getItemCount()-maxerror);
transformedItems=transformedItems+((int)du.getItemCount()-maxerror);
setReport( processReport.toString());
toberemovedUploads.add(du);
DB.commit();
}
}finally {
publishedCount.reset();
}
if( t.isSet()) {
t.reset();
setStatusMessage( "Postprocessed " + transformedItems + " items of " + totalItemCount + " (failed " + failed + ")");
log.debug( "Postprocessed " + transformedItems + " items of " + totalItemCount + " (failed " + failed + ")");
DB.commit();
}
currentItemNo+=1;
if(( cur_transformed == maxerror ) && (published_transformed == 0 )) {break;}
}//end while
}//end for
if( publishedItems > 0 ) {
processReport.append( "\nTransformed " + transformedItems + " records to " + publishedItems + " records.\n" );
setItemCount(publishedItems);
if( failed != 0 ) {
processReport.append( failed + " items were excluded due to problems.\n" );
}
zos.putNextEntry( new ZipEntry( "publication_report.txt" ));
zos.write( processReport.toString().getBytes("UTF-8"));
zos.close();
zos.finished();
zos = null;
log.info( "Finished creating " + tmpFile.getAbsolutePath());
setStatusMessage( "Postprocessed " + transformedItems + " items.");
setReport( processReport.toString());
if(toberemovedUploads.size()>0){
for(DataUpload d:toberemovedUploads){
this.removeUpload(d);
}
DB.getPublicationDAO().makePersistent(this);
}
DB.commit();
} else {
setReport( processReport.toString());
throw new Exception( "No item could be transformed!" );
}
// not sure this is needed
t.cancel();
return tmpFile;
} catch( Exception e ) {
log.error( "Publication: " + getDbID() + "CurrentItemNo:" + currentItemNo + "\nError: " , e );
if( getStatusCode() != Publication.ERROR) {
setStatusCode(Publication.ERROR);
setStatusMessage( "Publication:"+ getDbID() + " Error:" + e.getMessage() );
DB.commit();
}
throw e;
} finally {
t.cancel();
if( zos != null ) zos.finished();
}
}
public String transformItemEntry(String item,String xsl) throws Exception {
String result="";
result=transformXSL.transform(item, xsl);
return result;
}
}