/* * eXist Open Source Native XML Database * Copyright (C) 2014 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ package org.exist.mongodb.xquery.gridfs; import com.mongodb.DB; import com.mongodb.DBObject; import com.mongodb.MongoClient; import com.mongodb.MongoException; import com.mongodb.gridfs.GridFS; import com.mongodb.gridfs.GridFSDBFile; import java.io.IOException; import java.io.InputStream; import java.util.zip.GZIPInputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringUtils; import org.bson.types.ObjectId; import org.exist.Namespaces; import org.exist.dom.QName; import org.exist.memtree.DocumentImpl; import org.exist.memtree.SAXAdapter; import org.exist.mongodb.shared.Constants; import static org.exist.mongodb.shared.Constants.EXIST_COMPRESSION; import static org.exist.mongodb.shared.Constants.EXIST_DATATYPE; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_BUCKET; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_DATABASE; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_FILENAME; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_MONGODB_CLIENT; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_OBJECTID; import org.exist.mongodb.shared.MongodbClientStore; import org.exist.mongodb.xquery.GridfsModule; import org.exist.validation.ValidationReport; import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.Base64BinaryDocument; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; /** * Functions to retrieve documents from GridFS as a stream. * * @author Dannes Wessels */ public class Get extends BasicFunction { private static final String FIND_BY_OBJECTID = "get-by-objectid"; private static final String FIND_BY_FILENAME = "get-by-filename"; private static final FunctionParameterSequenceType PARAMETER_FORCE_BINARY = new FunctionParameterSequenceType("forceBinary", Type.BOOLEAN, Cardinality.ONE, "Set true() to force binary datatype for XML data."); public final static FunctionSignature signatures[] = { new FunctionSignature( new QName(FIND_BY_FILENAME, GridfsModule.NAMESPACE_URI, GridfsModule.PREFIX), "Retrieve document", new SequenceType[]{ PARAMETER_MONGODB_CLIENT, PARAMETER_DATABASE, PARAMETER_BUCKET, PARAMETER_FILENAME, PARAMETER_FORCE_BINARY }, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_ONE, "The GridFS document") ), new FunctionSignature( new QName(FIND_BY_OBJECTID, GridfsModule.NAMESPACE_URI, GridfsModule.PREFIX), "Retrieve document", new SequenceType[]{ PARAMETER_MONGODB_CLIENT, PARAMETER_DATABASE, PARAMETER_BUCKET, PARAMETER_OBJECTID, PARAMETER_FORCE_BINARY,}, new FunctionReturnSequenceType(Type.ITEM, Cardinality.ZERO_OR_ONE, "The GridFS document") ), }; public Get(XQueryContext context, FunctionSignature signature) { super(context, signature); } @Override public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { try { // Verify clientid and get client String mongodbClientId = args[0].itemAt(0).getStringValue(); MongodbClientStore.getInstance().validate(mongodbClientId); MongoClient client = MongodbClientStore.getInstance().get(mongodbClientId); // Get parameters String dbname = args[1].itemAt(0).getStringValue(); String bucket = args[2].itemAt(0).getStringValue(); String documentId = args[3].itemAt(0).getStringValue(); boolean forceBinary = args[4].itemAt(0).toJavaObject(Boolean.class); // Get database DB db = client.getDB(dbname); // Creates a GridFS instance for the specified bucket GridFS gfs = new GridFS(db, bucket); // Find one document by id or by filename GridFSDBFile gfsFile = (isCalledAs(FIND_BY_OBJECTID)) ? gfs.findOne(new ObjectId(documentId)) : gfs.findOne(documentId); // TODO: find latest if (gfsFile == null) { throw new XPathException(this, GridfsModule.GRFS0004, String.format("Document '%s' could not be found.", documentId)); } Sequence retVal = get(gfsFile, forceBinary); return retVal; } catch (XPathException ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(this, ex.getMessage(), ex); } catch (MongoException ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(this, GridfsModule.GRFS0002, ex.getMessage()); } catch (Throwable ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(this, GridfsModule.GRFS0003, ex.getMessage()); } } /** * Get document from GridFS */ Sequence get(GridFSDBFile gfsFile, boolean forceBinary) throws IOException, XPathException { // Obtain meta-data DBObject metadata = gfsFile.getMetaData(); // Decompress when needed String compression = (metadata == null) ? null : (String) metadata.get(EXIST_COMPRESSION); boolean isGzipped = StringUtils.equals(compression, Constants.GZIP); InputStream is = isGzipped ? new GZIPInputStream(gfsFile.getInputStream()) : gfsFile.getInputStream(); // Find what kind of data is stored int datatype = (metadata == null) ? Type.UNTYPED : (int) metadata.get(EXIST_DATATYPE); boolean hasXMLContentType = StringUtils.contains(gfsFile.getContentType(), "xml"); boolean isXMLtype = (Type.DOCUMENT == datatype || Type.ELEMENT == datatype || hasXMLContentType); // Convert input stream to eXist-db object Sequence retVal; if (forceBinary || !isXMLtype) { retVal = Base64BinaryDocument.getInstance(context, is); } else { retVal = processXML(context, is); } return retVal; } /** * Parse an byte-array containing (compressed) XML data into an eXist-db * document. * * @param data Byte array containing the XML data. * @return Sequence containing the XML as DocumentImpl * * @throws XPathException Something bad happened. */ private Sequence processXML(XQueryContext xqueryContext, InputStream is) throws XPathException { Sequence content = null; try { final ValidationReport validationReport = new ValidationReport(); final SAXAdapter adapter = new SAXAdapter(xqueryContext); final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); final InputSource src = new InputSource(is); final SAXParser parser = factory.newSAXParser(); XMLReader xr = parser.getXMLReader(); xr.setErrorHandler(validationReport); xr.setContentHandler(adapter); xr.setProperty(Namespaces.SAX_LEXICAL_HANDLER, adapter); xr.parse(src); // Cleanup IOUtils.closeQuietly(is); if (validationReport.isValid()) { content = (DocumentImpl) adapter.getDocument(); } else { String txt = String.format("Received document is not valid: %s", validationReport.toString()); LOG.debug(txt); throw new XPathException(txt); } } catch (SAXException | ParserConfigurationException | IOException ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(ex.getMessage()); } return content; } }