package org.juxtasoftware.service; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Reader; import java.io.StringReader; import javax.xml.stream.XMLStreamException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactoryConfigurationError; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.apache.commons.io.input.ReaderInputStream; import org.juxtasoftware.dao.JuxtaXsltDao; import org.juxtasoftware.dao.NoteDao; import org.juxtasoftware.dao.PageMarkDao; import org.juxtasoftware.dao.SourceDao; import org.juxtasoftware.dao.WitnessDao; import org.juxtasoftware.model.JuxtaXslt; import org.juxtasoftware.model.Note; import org.juxtasoftware.model.PageMark; import org.juxtasoftware.model.RevisionInfo; import org.juxtasoftware.model.Source; import org.juxtasoftware.model.Witness; import org.juxtasoftware.util.HtmlUtils; import org.juxtasoftware.util.WikiTextUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.config.BeanDefinition; import org.springframework.context.annotation.Scope; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.xml.sax.EntityResolver; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; import eu.interedition.text.Text; import eu.interedition.text.TextConsumer; import eu.interedition.text.TextRepository; @Service @Scope(BeanDefinition.SCOPE_PROTOTYPE) @Transactional public class SourceTransformer { @Autowired private TextRepository textRepository; @Autowired private WitnessDao witnessDao; @Autowired private JuxtaXsltDao xsltDao; @Autowired private NoteDao noteDao; @Autowired private PageMarkDao pbDao; @Autowired private SourceDao sourceDao; /** * RE-run the transform of <code>srcDoc</code> into a prior witness * identified as <code>witness</>. The orignal witness text and annotations * will be replaced with new versions that result from appling the parse template * to the source again. * * @param srcDoc * @param witness * @throws XMLStreamException * @throws IOException * @throws TransformerException * @throws TransformerFactoryConfigurationError * @throws TransformerConfigurationException */ public void redoTransform( Source srcDoc, Witness origWit ) throws SAXException, IOException, TransformerException { // get original parse template JuxtaXslt xslt = null; if (srcDoc.getType().equals(Source.Type.XML)) { xslt = this.xsltDao.find( origWit.getXsltId()); } // clear out old witness stuff; annotations, page breaks and notes - BUT NOT text // can't kill it yet cuz witness refers to it. Must wait til after witness text is updated! this.noteDao.deleteAll( origWit.getId() ); this.pbDao.deleteAll( origWit.getId() ); this.witnessDao.clearRevisions( origWit ); // redo the transform Text parsedContent = srcDoc.getText(); if (srcDoc.getType().equals(Source.Type.XML)) { parsedContent = doTransform(srcDoc, xslt); } else if ( srcDoc.getType().equals(Source.Type.HTML) ) { parsedContent = doHtmlTransform(srcDoc); } else if ( srcDoc.getType().equals(Source.Type.WIKI) ) { parsedContent = doWikiTransform(srcDoc); }else { NullTransformReader rdr = new NullTransformReader(); this.textRepository.read(srcDoc.getText(), rdr); parsedContent = rdr.getContent(); } // dump the transform results this.witnessDao.updateContent(origWit, parsedContent); // extract pb, note and revision tags of xml documents if ( xslt != null ) { extractSpecialTags(srcDoc, this.witnessDao.find(origWit.getId()), xslt ); } } /** * Transform <code>srcDoc</code> into a witness with the name <code>finalName</code> * using XSLT contained in <code>xslt</code>. The resulting witness ID * is returned. * * @param srcDoc The JuxtaSource to be transformed into a witness * @param template The parse template used to do the transform * @param revSet * @param finalName The name of the resulting witness (optional) * @return The new witness ID * @throws SAXException * @throws IOException * @throws TransformerException */ public Long transform(final Source srcDoc, final JuxtaXslt xslt, final String finalName) throws SAXException, IOException, TransformerException { String witnessName = finalName; // transform into a new text_content object Text parsedContent = null; if (srcDoc.getType().equals(Source.Type.XML)) { parsedContent = doTransform(srcDoc, xslt); } else if ( srcDoc.getType().equals(Source.Type.HTML) ) { parsedContent = doHtmlTransform(srcDoc); } else if ( srcDoc.getType().equals(Source.Type.WIKI) ) { parsedContent = doWikiTransform(srcDoc); } else { NullTransformReader rdr = new NullTransformReader(); this.textRepository.read(srcDoc.getText(), rdr); parsedContent = rdr.getContent(); } // use the transformed content to create a juxta witness Witness witness = new Witness(); witness.setName(witnessName); witness.setSourceId(srcDoc.getId()); if ( xslt != null ) { witness.setXsltId(xslt.getId()); } witness.setText(parsedContent); witness.setWorkspaceId( srcDoc.getWorkspaceId() ); Long id = this.witnessDao.create(witness); witness.setId(id); // extract pb, note and revision tags of xml documents if ( xslt != null ) { extractSpecialTags(srcDoc, witness, xslt); } return id; } private Text doWikiTransform(Source srcDoc) throws IOException { File htmlOut = WikiTextUtils.toTxt( new ReaderInputStream(this.sourceDao.getContentReader(srcDoc), "UTF-8") ); FileInputStream fis = new FileInputStream(htmlOut); InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); Text parsedContent = this.textRepository.create( isr ); isr.close(); htmlOut.delete(); return parsedContent; } private Text doHtmlTransform(Source srcDoc) throws IOException { File htmlOut = HtmlUtils.toTxt( new ReaderInputStream(this.sourceDao.getContentReader(srcDoc), "UTF-8") ); FileInputStream fis = new FileInputStream(htmlOut); InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); Text parsedContent = this.textRepository.create( isr ); isr.close(); htmlOut.delete(); return parsedContent; } private Text doTransform(Source srcDoc, JuxtaXslt xslt) throws IOException, TransformerException, FileNotFoundException, SAXException { // setup source, xslt and result File outFile = File.createTempFile("xform"+srcDoc.getId(), "xml"); outFile.deleteOnExit(); XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setEntityResolver(new EntityResolver() { @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { if (systemId.endsWith(".dtd") || systemId.endsWith(".ent")) { StringReader stringInput = new StringReader(" "); return new InputSource(stringInput); } else { return null; // use default behavior } } }); SAXSource xmlSource = new SAXSource(reader, new InputSource( this.sourceDao.getContentReader(srcDoc) )); //javax.xml.transform.Source xmlSource = new StreamSource( this.sourceDao.getContentReader(srcDoc) ); javax.xml.transform.Source xsltSource = new StreamSource( new StringReader(xslt.getXslt()) ); javax.xml.transform.Result result = new StreamResult( new OutputStreamWriter(new FileOutputStream(outFile), "UTF-8")); // create an instance of TransformerFactory and do the transform TransformerFactory factory = TransformerFactory.newInstance( ); Transformer transformer = factory.newTransformer(xsltSource); transformer.setOutputProperty(OutputKeys.INDENT, "no"); transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); transformer.setOutputProperty(OutputKeys.MEDIA_TYPE, "text"); transformer.transform(xmlSource, result); // create a text repo entry for the new text FileInputStream fis = new FileInputStream(outFile); InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); Text parsedContent = this.textRepository.create( isr ); isr.close(); outFile.delete(); return parsedContent; } /** * Extract tags that require special handling in juxta: Notes, PageBreaks and revisions * @param source * @param witnessId * @param xslt * @throws SAXException * @throws IOException */ public void extractSpecialTags(final Source source, final Witness w, final JuxtaXslt xslt ) throws SAXException, IOException { JuxtaTagExtractor extractor = new JuxtaTagExtractor( ); extractor.extract( this.sourceDao.getContentReader(source), xslt); for (Note note : extractor.getNotes() ) { note.setWitnessId(w.getId()); } this.noteDao.create(extractor.getNotes()); for (PageMark pb : extractor.getPageMarks() ) { pb.setWitnessId(w.getId()); } this.pbDao.create( extractor.getPageMarks() ); for (RevisionInfo rev : extractor.getRevisions() ) { rev.setWitnessId(w.getId()); } this.witnessDao.addRevisions( extractor.getRevisions() ); } /** * Helper class to stream content from an existing plain txt source to * a new text_content entry. * @author loufoster * */ private class NullTransformReader implements TextConsumer { private Text content; public NullTransformReader() { this.content = textRepository.create( Text.Type.TXT ); } public Text getContent() { return this.content; } @Override public void read(Reader rdrContent, long contentLength) throws IOException { textRepository.write(this.content, rdrContent, contentLength); } } }