package org.juxtasoftware.service;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.StringReader;
import javax.xml.stream.XMLStreamException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.input.ReaderInputStream;
import org.juxtasoftware.dao.JuxtaXsltDao;
import org.juxtasoftware.dao.NoteDao;
import org.juxtasoftware.dao.PageMarkDao;
import org.juxtasoftware.dao.SourceDao;
import org.juxtasoftware.dao.WitnessDao;
import org.juxtasoftware.model.JuxtaXslt;
import org.juxtasoftware.model.Note;
import org.juxtasoftware.model.PageMark;
import org.juxtasoftware.model.RevisionInfo;
import org.juxtasoftware.model.Source;
import org.juxtasoftware.model.Witness;
import org.juxtasoftware.util.HtmlUtils;
import org.juxtasoftware.util.WikiTextUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.config.BeanDefinition;
import org.springframework.context.annotation.Scope;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import eu.interedition.text.Text;
import eu.interedition.text.TextConsumer;
import eu.interedition.text.TextRepository;
@Service
@Scope(BeanDefinition.SCOPE_PROTOTYPE)
@Transactional
public class SourceTransformer {
@Autowired private TextRepository textRepository;
@Autowired private WitnessDao witnessDao;
@Autowired private JuxtaXsltDao xsltDao;
@Autowired private NoteDao noteDao;
@Autowired private PageMarkDao pbDao;
@Autowired private SourceDao sourceDao;
/**
* RE-run the transform of <code>srcDoc</code> into a prior witness
* identified as <code>witness</>. The orignal witness text and annotations
* will be replaced with new versions that result from appling the parse template
* to the source again.
*
* @param srcDoc
* @param witness
* @throws XMLStreamException
* @throws IOException
* @throws TransformerException
* @throws TransformerFactoryConfigurationError
* @throws TransformerConfigurationException
*/
public void redoTransform( Source srcDoc, Witness origWit ) throws SAXException, IOException, TransformerException {
// get original parse template
JuxtaXslt xslt = null;
if (srcDoc.getType().equals(Source.Type.XML)) {
xslt = this.xsltDao.find( origWit.getXsltId());
}
// clear out old witness stuff; annotations, page breaks and notes - BUT NOT text
// can't kill it yet cuz witness refers to it. Must wait til after witness text is updated!
this.noteDao.deleteAll( origWit.getId() );
this.pbDao.deleteAll( origWit.getId() );
this.witnessDao.clearRevisions( origWit );
// redo the transform
Text parsedContent = srcDoc.getText();
if (srcDoc.getType().equals(Source.Type.XML)) {
parsedContent = doTransform(srcDoc, xslt);
} else if ( srcDoc.getType().equals(Source.Type.HTML) ) {
parsedContent = doHtmlTransform(srcDoc);
} else if ( srcDoc.getType().equals(Source.Type.WIKI) ) {
parsedContent = doWikiTransform(srcDoc);
}else {
NullTransformReader rdr = new NullTransformReader();
this.textRepository.read(srcDoc.getText(), rdr);
parsedContent = rdr.getContent();
}
// dump the transform results
this.witnessDao.updateContent(origWit, parsedContent);
// extract pb, note and revision tags of xml documents
if ( xslt != null ) {
extractSpecialTags(srcDoc, this.witnessDao.find(origWit.getId()), xslt );
}
}
/**
* Transform <code>srcDoc</code> into a witness with the name <code>finalName</code>
* using XSLT contained in <code>xslt</code>. The resulting witness ID
* is returned.
*
* @param srcDoc The JuxtaSource to be transformed into a witness
* @param template The parse template used to do the transform
* @param revSet
* @param finalName The name of the resulting witness (optional)
* @return The new witness ID
* @throws SAXException
* @throws IOException
* @throws TransformerException
*/
public Long transform(final Source srcDoc, final JuxtaXslt xslt, final String finalName) throws SAXException, IOException, TransformerException {
String witnessName = finalName;
// transform into a new text_content object
Text parsedContent = null;
if (srcDoc.getType().equals(Source.Type.XML)) {
parsedContent = doTransform(srcDoc, xslt);
} else if ( srcDoc.getType().equals(Source.Type.HTML) ) {
parsedContent = doHtmlTransform(srcDoc);
} else if ( srcDoc.getType().equals(Source.Type.WIKI) ) {
parsedContent = doWikiTransform(srcDoc);
} else {
NullTransformReader rdr = new NullTransformReader();
this.textRepository.read(srcDoc.getText(), rdr);
parsedContent = rdr.getContent();
}
// use the transformed content to create a juxta witness
Witness witness = new Witness();
witness.setName(witnessName);
witness.setSourceId(srcDoc.getId());
if ( xslt != null ) {
witness.setXsltId(xslt.getId());
}
witness.setText(parsedContent);
witness.setWorkspaceId( srcDoc.getWorkspaceId() );
Long id = this.witnessDao.create(witness);
witness.setId(id);
// extract pb, note and revision tags of xml documents
if ( xslt != null ) {
extractSpecialTags(srcDoc, witness, xslt);
}
return id;
}
private Text doWikiTransform(Source srcDoc) throws IOException {
File htmlOut = WikiTextUtils.toTxt( new ReaderInputStream(this.sourceDao.getContentReader(srcDoc), "UTF-8") );
FileInputStream fis = new FileInputStream(htmlOut);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
Text parsedContent = this.textRepository.create( isr );
isr.close();
htmlOut.delete();
return parsedContent;
}
private Text doHtmlTransform(Source srcDoc) throws IOException {
File htmlOut = HtmlUtils.toTxt( new ReaderInputStream(this.sourceDao.getContentReader(srcDoc), "UTF-8") );
FileInputStream fis = new FileInputStream(htmlOut);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
Text parsedContent = this.textRepository.create( isr );
isr.close();
htmlOut.delete();
return parsedContent;
}
private Text doTransform(Source srcDoc, JuxtaXslt xslt) throws IOException, TransformerException, FileNotFoundException, SAXException {
// setup source, xslt and result
File outFile = File.createTempFile("xform"+srcDoc.getId(), "xml");
outFile.deleteOnExit();
XMLReader reader = XMLReaderFactory.createXMLReader();
reader.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
if (systemId.endsWith(".dtd") || systemId.endsWith(".ent")) {
StringReader stringInput = new StringReader(" ");
return new InputSource(stringInput);
}
else {
return null; // use default behavior
}
}
});
SAXSource xmlSource = new SAXSource(reader, new InputSource( this.sourceDao.getContentReader(srcDoc) ));
//javax.xml.transform.Source xmlSource = new StreamSource( this.sourceDao.getContentReader(srcDoc) );
javax.xml.transform.Source xsltSource = new StreamSource( new StringReader(xslt.getXslt()) );
javax.xml.transform.Result result = new StreamResult( new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8"));
// create an instance of TransformerFactory and do the transform
TransformerFactory factory = TransformerFactory.newInstance( );
Transformer transformer = factory.newTransformer(xsltSource);
transformer.setOutputProperty(OutputKeys.INDENT, "no");
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.setOutputProperty(OutputKeys.MEDIA_TYPE, "text");
transformer.transform(xmlSource, result);
// create a text repo entry for the new text
FileInputStream fis = new FileInputStream(outFile);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
Text parsedContent = this.textRepository.create( isr );
isr.close();
outFile.delete();
return parsedContent;
}
/**
* Extract tags that require special handling in juxta: Notes, PageBreaks and revisions
* @param source
* @param witnessId
* @param xslt
* @throws SAXException
* @throws IOException
*/
public void extractSpecialTags(final Source source, final Witness w, final JuxtaXslt xslt ) throws SAXException, IOException {
JuxtaTagExtractor extractor = new JuxtaTagExtractor( );
extractor.extract( this.sourceDao.getContentReader(source), xslt);
for (Note note : extractor.getNotes() ) {
note.setWitnessId(w.getId());
}
this.noteDao.create(extractor.getNotes());
for (PageMark pb : extractor.getPageMarks() ) {
pb.setWitnessId(w.getId());
}
this.pbDao.create( extractor.getPageMarks() );
for (RevisionInfo rev : extractor.getRevisions() ) {
rev.setWitnessId(w.getId());
}
this.witnessDao.addRevisions( extractor.getRevisions() );
}
/**
* Helper class to stream content from an existing plain txt source to
* a new text_content entry.
* @author loufoster
*
*/
private class NullTransformReader implements TextConsumer {
private Text content;
public NullTransformReader() {
this.content = textRepository.create( Text.Type.TXT );
}
public Text getContent() {
return this.content;
}
@Override
public void read(Reader rdrContent, long contentLength) throws IOException {
textRepository.write(this.content, rdrContent, contentLength);
}
}
}