/* * eXist Open Source Native XML Database * Copyright (C) 2001-09 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * $Id$ */ package org.exist.xquery.modules.compression; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.Iterator; import org.apache.commons.io.output.ByteArrayOutputStream; import org.exist.collections.Collection; import org.exist.dom.BinaryDocument; import org.exist.dom.DefaultDocumentSet; import org.exist.dom.DocumentImpl; import org.exist.dom.MutableDocumentSet; import org.exist.security.PermissionDeniedException; import org.exist.storage.lock.Lock; import org.exist.storage.serializers.Serializer; import org.exist.util.Base64Decoder; import org.exist.util.LockException; import org.exist.xmldb.XmldbURI; import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.AnyURIValue; import org.exist.xquery.value.Base64Binary; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.Item; import org.exist.xquery.value.NodeValue; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.SAXException; /** * Compresses a sequence of resources and/or collections * * @author Adam Retter <adam@exist-db.org> * @version 1.0 */ public abstract class AbstractCompressFunction extends BasicFunction { protected final static SequenceType SOURCES_PARAM = new FunctionParameterSequenceType("sources", Type.ANY_TYPE, Cardinality.ONE_OR_MORE, "The sequence of URI's and/or Entrys. If a URI points to a collection then the collection, its resources and sub-collections are zipped recursively. An Entry takes the format <entry name=\"filename.ext\" type=\"collection|uri|binary|xml|text\">data</entry>"); protected final static SequenceType COLLECTION_HIERARCHY_PARAM = new FunctionParameterSequenceType("use-collection-hierarchy", Type.BOOLEAN, Cardinality.EXACTLY_ONE, "Indicates whether the Collection hierarchy (if any) should be preserved in the zip file."); protected final static SequenceType STRIP_PREFIX_PARAM = new FunctionParameterSequenceType("strip-prefix", Type.STRING, Cardinality.EXACTLY_ONE, "This prefix is stripped from the Entrys name"); public AbstractCompressFunction(XQueryContext context, FunctionSignature signature) { super(context, signature); } private String removeLeadingOffset(String uri, String stripOffset){ // remove leading offset if (uri.startsWith(stripOffset)) { uri = uri.substring(stripOffset.length()); } // remove leading / if (uri.startsWith("/")) { uri = uri.substring(1); } return uri; } @Override public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { // are there some uri's to tar? if (args[0].isEmpty()) { return Sequence.EMPTY_SEQUENCE; } // use a hierarchy in the tar file? boolean useHierarchy = args[1].effectiveBooleanValue(); // Get offset String stripOffset = ""; if (args.length == 3) { stripOffset = args[2].getStringValue(); } ByteArrayOutputStream baos = new ByteArrayOutputStream(); OutputStream os = stream(baos); // iterate through the argument sequence for (SequenceIterator i = args[0].iterate(); i.hasNext();) { Item item = i.nextItem(); if(item instanceof Element) { Element element = (Element) item; compressElement(os, element, useHierarchy, stripOffset); } else { compressFromUri(os, ((AnyURIValue)item).toXmldbURI(), useHierarchy, stripOffset, null); } } try { os.close(); } catch (IOException ioe) { throw new XPathException(this, ioe.getMessage()); } return new Base64Binary(baos.toByteArray()); } private void compressFromUri(OutputStream os, XmldbURI uri, boolean useHierarchy, String stripOffset, String resourceName) throws XPathException { // try for a doc DocumentImpl doc = null; try { doc = context.getBroker().getXMLResource(uri, Lock.READ_LOCK); if(doc == null) { // no doc, try for a collection Collection col = context.getBroker().getCollection(uri); if(col != null) { // got a collection compressCollection(os, col, useHierarchy, stripOffset); } else { // no doc or collection throw new XPathException(this, "Invalid URI: " + uri.toString()); } } else { // got a doc compressResource(os, doc, useHierarchy, stripOffset, resourceName); } } catch(PermissionDeniedException pde) { throw new XPathException(this, pde.getMessage()); } catch(IOException ioe) { throw new XPathException(this, ioe.getMessage()); } catch(SAXException saxe) { throw new XPathException(this, saxe.getMessage()); } catch(LockException le) { throw new XPathException(this, le.getMessage()); } finally { if(doc != null) doc.getUpdateLock().release(Lock.READ_LOCK); } } /** * Adds a element to a archive * * @param os * The Output Stream to add the element to * @param nodeValue * The element to add to the archive * @param useHierarchy * Whether to use a folder hierarchy in the archive file that * reflects the collection hierarchy */ private void compressElement(OutputStream os, Element element, boolean useHierarchy, String stripOffset) throws XPathException { if(!(element.getNodeName().equals("entry") || element.getNamespaceURI().length() > 0)) throw new XPathException(this, "Item must be type of xs:anyURI or element enry."); if(element.getChildNodes().getLength() > 1) throw new XPathException(this, "Entry content is not valid XML fragment."); String name = element.getAttribute("name"); if(name == null) throw new XPathException(this, "Entry must have name attribute."); String type = element.getAttribute("type"); if("uri".equals(type)) { compressFromUri(os, XmldbURI.create(element.getFirstChild().getNodeValue()), useHierarchy, stripOffset, name); return; } if(useHierarchy) { name = removeLeadingOffset(name, stripOffset); } else { name = name.substring(name.lastIndexOf("/") + 1); } if("collection".equals(type)) name += "/"; Object entry = null; try { entry = newEntry(name); putEntry(os, entry); if(!"collection".equals(type)) { byte[] value; Node content = element.getFirstChild(); if(content == null) { value = new byte[0]; } else { if(content.getNodeType() == Node.TEXT_NODE) { String text = content.getNodeValue(); Base64Decoder dec = new Base64Decoder(); if("binary".equals(type)) { //base64 binary dec.translate(text); value = dec.getByteArray(); } else { //text value = text.getBytes(); } } else { //xml Serializer serializer = context.getBroker().getSerializer(); serializer.setUser(context.getUser()); serializer.setProperty("omit-xml-declaration", "no"); value = serializer.serialize((NodeValue) content).getBytes(); } } os.write(value); } } catch(IOException ioe) { throw new XPathException(this, ioe.getMessage(), ioe); } catch(SAXException saxe) { throw new XPathException(this, saxe.getMessage(), saxe); } finally { if(entry != null) try { closeEntry(os); } catch(IOException ioe) { throw new XPathException(this, ioe.getMessage(), ioe); } } } /** * Adds a document to a archive * * @param os * The Output Stream to add the document to * @param doc * The document to add to the archive * @param useHierarchy * Whether to use a folder hierarchy in the archive file that * reflects the collection hierarchy */ private void compressResource(OutputStream os, DocumentImpl doc, boolean useHierarchy, String stripOffset, String name) throws IOException, SAXException { // create an entry in the Tar for the document Object entry = null; if(name != null) { entry = newEntry(name); } else if (useHierarchy) { String docCollection = doc.getCollection().getURI().toString(); XmldbURI collection = XmldbURI.create(removeLeadingOffset(docCollection, stripOffset)); entry = newEntry(collection.append(doc.getFileURI()).toString()); } else { entry = newEntry(doc.getFileURI().toString()); } putEntry(os, entry); if (doc.getResourceType() == DocumentImpl.XML_FILE) { // xml file Serializer serializer = context.getBroker().getSerializer(); serializer.setUser(context.getUser()); serializer.setProperty("omit-xml-declaration", "no"); String strDoc = serializer.serialize(doc); os.write(strDoc.getBytes()); } else if (doc.getResourceType() == DocumentImpl.BINARY_FILE) { // binary file InputStream is = context.getBroker().getBinaryResource((BinaryDocument)doc); byte[] data = new byte[16384]; int len = 0; while ((len=is.read(data,0,data.length))>0) { os.write(data,0,len); } is.close(); } // close the entry closeEntry(os); } /** * Adds a Collection and its child collections and resources recursively to * a archive * * @param os * The Output Stream to add the document to * @param col * The Collection to add to the archive * @param useHierarchy * Whether to use a folder hierarchy in the archive file that * reflects the collection hierarchy */ private void compressCollection(OutputStream os, Collection col, boolean useHierarchy, String stripOffset) throws IOException, SAXException, LockException { // iterate over child documents MutableDocumentSet childDocs = new DefaultDocumentSet(); col.getDocuments(context.getBroker(), childDocs, true); for (Iterator itChildDocs = childDocs.getDocumentIterator(); itChildDocs .hasNext();) { DocumentImpl childDoc = (DocumentImpl) itChildDocs.next(); childDoc.getUpdateLock().acquire(Lock.READ_LOCK); try { compressResource(os, childDoc, useHierarchy, stripOffset, null); } finally { childDoc.getUpdateLock().release(Lock.READ_LOCK); } } // iterate over child collections for (Iterator itChildCols = col.collectionIterator(); itChildCols.hasNext();) { // get the child collection XmldbURI childColURI = (XmldbURI) itChildCols.next(); Collection childCol = context.getBroker().getCollection(col.getURI().append(childColURI)); // recurse compressCollection(os, childCol, useHierarchy, stripOffset); } } protected abstract OutputStream stream(ByteArrayOutputStream baos); protected abstract Object newEntry(String name); protected abstract void putEntry(Object os, Object entry) throws IOException; protected abstract void closeEntry(Object os) throws IOException; }