/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-09 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.xquery.modules.compression;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Iterator;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.exist.collections.Collection;
import org.exist.dom.BinaryDocument;
import org.exist.dom.DefaultDocumentSet;
import org.exist.dom.DocumentImpl;
import org.exist.dom.MutableDocumentSet;
import org.exist.security.PermissionDeniedException;
import org.exist.storage.lock.Lock;
import org.exist.storage.serializers.Serializer;
import org.exist.util.Base64Decoder;
import org.exist.util.LockException;
import org.exist.xmldb.XmldbURI;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.AnyURIValue;
import org.exist.xquery.value.Base64Binary;
import org.exist.xquery.value.FunctionParameterSequenceType;
import org.exist.xquery.value.Item;
import org.exist.xquery.value.NodeValue;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceIterator;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.Type;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
/**
* Compresses a sequence of resources and/or collections
*
* @author Adam Retter <adam@exist-db.org>
* @version 1.0
*/
public abstract class AbstractCompressFunction extends BasicFunction
{
protected final static SequenceType SOURCES_PARAM = new FunctionParameterSequenceType("sources", Type.ANY_TYPE, Cardinality.ONE_OR_MORE, "The sequence of URI's and/or Entrys. If a URI points to a collection then the collection, its resources and sub-collections are zipped recursively. An Entry takes the format <entry name=\"filename.ext\" type=\"collection|uri|binary|xml|text\">data</entry>");
protected final static SequenceType COLLECTION_HIERARCHY_PARAM = new FunctionParameterSequenceType("use-collection-hierarchy", Type.BOOLEAN, Cardinality.EXACTLY_ONE, "Indicates whether the Collection hierarchy (if any) should be preserved in the zip file.");
protected final static SequenceType STRIP_PREFIX_PARAM = new FunctionParameterSequenceType("strip-prefix", Type.STRING, Cardinality.EXACTLY_ONE, "This prefix is stripped from the Entrys name");
public AbstractCompressFunction(XQueryContext context, FunctionSignature signature)
{
super(context, signature);
}
private String removeLeadingOffset(String uri, String stripOffset){
// remove leading offset
if (uri.startsWith(stripOffset)) {
uri = uri.substring(stripOffset.length());
}
// remove leading /
if (uri.startsWith("/")) {
uri = uri.substring(1);
}
return uri;
}
@Override
public Sequence eval(Sequence[] args, Sequence contextSequence)
throws XPathException {
// are there some uri's to tar?
if (args[0].isEmpty()) {
return Sequence.EMPTY_SEQUENCE;
}
// use a hierarchy in the tar file?
boolean useHierarchy = args[1].effectiveBooleanValue();
// Get offset
String stripOffset = "";
if (args.length == 3) {
stripOffset = args[2].getStringValue();
}
ByteArrayOutputStream baos = new ByteArrayOutputStream();
OutputStream os = stream(baos);
// iterate through the argument sequence
for (SequenceIterator i = args[0].iterate(); i.hasNext();) {
Item item = i.nextItem();
if(item instanceof Element)
{
Element element = (Element) item;
compressElement(os, element, useHierarchy, stripOffset);
}
else
{
compressFromUri(os, ((AnyURIValue)item).toXmldbURI(), useHierarchy, stripOffset, null);
}
}
try {
os.close();
} catch (IOException ioe) {
throw new XPathException(this, ioe.getMessage());
}
return new Base64Binary(baos.toByteArray());
}
private void compressFromUri(OutputStream os, XmldbURI uri, boolean useHierarchy, String stripOffset, String resourceName) throws XPathException
{
// try for a doc
DocumentImpl doc = null;
try
{
doc = context.getBroker().getXMLResource(uri, Lock.READ_LOCK);
if(doc == null)
{
// no doc, try for a collection
Collection col = context.getBroker().getCollection(uri);
if(col != null)
{
// got a collection
compressCollection(os, col, useHierarchy, stripOffset);
}
else
{
// no doc or collection
throw new XPathException(this, "Invalid URI: " + uri.toString());
}
}
else
{
// got a doc
compressResource(os, doc, useHierarchy, stripOffset, resourceName);
}
}
catch(PermissionDeniedException pde)
{
throw new XPathException(this, pde.getMessage());
}
catch(IOException ioe)
{
throw new XPathException(this, ioe.getMessage());
}
catch(SAXException saxe)
{
throw new XPathException(this, saxe.getMessage());
}
catch(LockException le)
{
throw new XPathException(this, le.getMessage());
}
finally
{
if(doc != null)
doc.getUpdateLock().release(Lock.READ_LOCK);
}
}
/**
* Adds a element to a archive
*
* @param os
* The Output Stream to add the element to
* @param nodeValue
* The element to add to the archive
* @param useHierarchy
* Whether to use a folder hierarchy in the archive file that
* reflects the collection hierarchy
*/
private void compressElement(OutputStream os, Element element, boolean useHierarchy, String stripOffset) throws XPathException
{
if(!(element.getNodeName().equals("entry") || element.getNamespaceURI().length() > 0))
throw new XPathException(this, "Item must be type of xs:anyURI or element enry.");
if(element.getChildNodes().getLength() > 1)
throw new XPathException(this, "Entry content is not valid XML fragment.");
String name = element.getAttribute("name");
if(name == null)
throw new XPathException(this, "Entry must have name attribute.");
String type = element.getAttribute("type");
if("uri".equals(type))
{
compressFromUri(os, XmldbURI.create(element.getFirstChild().getNodeValue()), useHierarchy, stripOffset, name);
return;
}
if(useHierarchy)
{
name = removeLeadingOffset(name, stripOffset);
}
else
{
name = name.substring(name.lastIndexOf("/") + 1);
}
if("collection".equals(type))
name += "/";
Object entry = null;
try
{
entry = newEntry(name);
putEntry(os, entry);
if(!"collection".equals(type))
{
byte[] value;
Node content = element.getFirstChild();
if(content == null)
{
value = new byte[0];
}
else
{
if(content.getNodeType() == Node.TEXT_NODE)
{
String text = content.getNodeValue();
Base64Decoder dec = new Base64Decoder();
if("binary".equals(type))
{
//base64 binary
dec.translate(text);
value = dec.getByteArray();
}
else
{
//text
value = text.getBytes();
}
}
else
{
//xml
Serializer serializer = context.getBroker().getSerializer();
serializer.setUser(context.getUser());
serializer.setProperty("omit-xml-declaration", "no");
value = serializer.serialize((NodeValue) content).getBytes();
}
}
os.write(value);
}
}
catch(IOException ioe)
{
throw new XPathException(this, ioe.getMessage(), ioe);
}
catch(SAXException saxe)
{
throw new XPathException(this, saxe.getMessage(), saxe);
}
finally
{
if(entry != null)
try
{
closeEntry(os);
}
catch(IOException ioe)
{
throw new XPathException(this, ioe.getMessage(), ioe);
}
}
}
/**
* Adds a document to a archive
*
* @param os
* The Output Stream to add the document to
* @param doc
* The document to add to the archive
* @param useHierarchy
* Whether to use a folder hierarchy in the archive file that
* reflects the collection hierarchy
*/
private void compressResource(OutputStream os, DocumentImpl doc, boolean useHierarchy, String stripOffset, String name) throws IOException, SAXException {
// create an entry in the Tar for the document
Object entry = null;
if(name != null)
{
entry = newEntry(name);
}
else if (useHierarchy) {
String docCollection = doc.getCollection().getURI().toString();
XmldbURI collection = XmldbURI.create(removeLeadingOffset(docCollection, stripOffset));
entry = newEntry(collection.append(doc.getFileURI()).toString());
} else {
entry = newEntry(doc.getFileURI().toString());
}
putEntry(os, entry);
if (doc.getResourceType() == DocumentImpl.XML_FILE) {
// xml file
Serializer serializer = context.getBroker().getSerializer();
serializer.setUser(context.getUser());
serializer.setProperty("omit-xml-declaration", "no");
String strDoc = serializer.serialize(doc);
os.write(strDoc.getBytes());
} else if (doc.getResourceType() == DocumentImpl.BINARY_FILE) {
// binary file
InputStream is = context.getBroker().getBinaryResource((BinaryDocument)doc);
byte[] data = new byte[16384];
int len = 0;
while ((len=is.read(data,0,data.length))>0) {
os.write(data,0,len);
}
is.close();
}
// close the entry
closeEntry(os);
}
/**
* Adds a Collection and its child collections and resources recursively to
* a archive
*
* @param os
* The Output Stream to add the document to
* @param col
* The Collection to add to the archive
* @param useHierarchy
* Whether to use a folder hierarchy in the archive file that
* reflects the collection hierarchy
*/
private void compressCollection(OutputStream os, Collection col, boolean useHierarchy, String stripOffset) throws IOException, SAXException, LockException {
// iterate over child documents
MutableDocumentSet childDocs = new DefaultDocumentSet();
col.getDocuments(context.getBroker(), childDocs, true);
for (Iterator itChildDocs = childDocs.getDocumentIterator(); itChildDocs
.hasNext();) {
DocumentImpl childDoc = (DocumentImpl) itChildDocs.next();
childDoc.getUpdateLock().acquire(Lock.READ_LOCK);
try {
compressResource(os, childDoc, useHierarchy, stripOffset, null);
} finally {
childDoc.getUpdateLock().release(Lock.READ_LOCK);
}
}
// iterate over child collections
for (Iterator itChildCols = col.collectionIterator(); itChildCols.hasNext();) {
// get the child collection
XmldbURI childColURI = (XmldbURI) itChildCols.next();
Collection childCol = context.getBroker().getCollection(col.getURI().append(childColURI));
// recurse
compressCollection(os, childCol, useHierarchy, stripOffset);
}
}
protected abstract OutputStream stream(ByteArrayOutputStream baos);
protected abstract Object newEntry(String name);
protected abstract void putEntry(Object os, Object entry) throws IOException;
protected abstract void closeEntry(Object os) throws IOException;
}