/* * eXist Open Source Native XML Database * Copyright (C) 2014 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ package org.exist.mongodb.xquery.gridfs; import com.mongodb.BasicDBObject; import com.mongodb.DB; import com.mongodb.MongoClient; import com.mongodb.MongoException; import com.mongodb.gridfs.GridFS; import com.mongodb.gridfs.GridFSInputFile; import java.io.IOException; import java.io.OutputStream; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.zip.GZIPOutputStream; import org.apache.commons.codec.binary.Hex; import org.apache.commons.io.output.CountingOutputStream; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.exist.dom.QName; import org.exist.mongodb.shared.Constants; import static org.exist.mongodb.shared.Constants.GZIP; import org.exist.mongodb.shared.ContentSerializer; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_BUCKET; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_CONTENT; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_CONTENT_TYPE; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_DATABASE; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_FILENAME; import static org.exist.mongodb.shared.FunctionDefinitions.PARAMETER_MONGODB_CLIENT; import org.exist.mongodb.shared.MongodbClientStore; import org.exist.mongodb.xquery.GridfsModule; import org.exist.util.MimeTable; import org.exist.util.MimeType; import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; /** * Implementation gridfs:store() functions * * @author Dannes Wessels */ public class Store extends BasicFunction { private final static CharSequence[] nonCompressables = { ".zip", ".rar", ".gz", ".7z", ".bz", ".bz2", ".dmg", "gif", ".jpg", ".png", ".svgz", ".mp3", ".mp4", ".mpg", ".mpeg", ".avi", ".mkv", ".wav", ".ogg", ".mov", ".flv", ".wmv" }; public final static FunctionSignature signatures[] = { new FunctionSignature( new QName("store", GridfsModule.NAMESPACE_URI, GridfsModule.PREFIX), "Store document into Gridfs", new SequenceType[]{ PARAMETER_MONGODB_CLIENT, PARAMETER_DATABASE, PARAMETER_BUCKET, PARAMETER_FILENAME, PARAMETER_CONTENT_TYPE, PARAMETER_CONTENT }, new FunctionReturnSequenceType(Type.STRING, Cardinality.ONE, "The document id of the stored document") ), }; public Store(XQueryContext context, FunctionSignature signature) { super(context, signature); } @Override public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { try { // Verify clientid and get client String mongodbClientId = args[0].itemAt(0).getStringValue(); MongodbClientStore.getInstance().validate(mongodbClientId); MongoClient client = MongodbClientStore.getInstance().get(mongodbClientId); // Get parameters String dbname = args[1].itemAt(0).getStringValue(); String bucket = args[2].itemAt(0).getStringValue(); String documentName = args[3].itemAt(0).getStringValue(); String contentType = getMimeType(args[4], documentName); LOG.info(String.format("Storing document %s (%s)", documentName, contentType)); // Actual content: File object, doc() element, base64... Item content = args[5].itemAt(0); // Get database DB db = client.getDB(dbname); // Creates a GridFS instance for the specified bucket GridFS gfs = new GridFS(db, bucket); // Create file GridFSInputFile gfsFile = gfs.createFile(); // Set meta data gfsFile.setFilename(documentName); gfsFile.setContentType(contentType); StopWatch stopWatch = new StopWatch(); // Write data if (StringUtils.endsWithAny(documentName, nonCompressables)) { writeRaw(gfsFile, stopWatch, content); } else { int dataType = content.getType(); writeCompressed(gfsFile, stopWatch, content, dataType); } LOG.info(String.format("serialization time: %s", stopWatch.getTime())); // Report identifier return new StringValue(gfsFile.getId().toString()); } catch (XPathException ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(this, ex.getMessage(), ex); } catch (MongoException ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(this, GridfsModule.GRFS0002, ex.getMessage()); } catch (Throwable ex) { LOG.error(ex.getMessage(), ex); throw new XPathException(this, GridfsModule.GRFS0003, ex.getMessage()); } } void writeCompressed(GridFSInputFile gfsFile, StopWatch stopWatch, Item content, int dataType) throws NoSuchAlgorithmException, IOException, XPathException { // Store data compressed, add statistics try (OutputStream stream = gfsFile.getOutputStream()) { MessageDigest md = MessageDigest.getInstance("MD5"); CountingOutputStream cosGZ = new CountingOutputStream(stream); GZIPOutputStream gos = new GZIPOutputStream(cosGZ); DigestOutputStream dos = new DigestOutputStream(gos, md); CountingOutputStream cosRaw = new CountingOutputStream(dos); stopWatch.start(); ContentSerializer.serialize(content, context, cosRaw); cosRaw.flush(); cosRaw.close(); stopWatch.stop(); long nrBytesRaw = cosRaw.getByteCount(); long nrBytesGZ = cosGZ.getByteCount(); String checksum = Hex.encodeHexString(dos.getMessageDigest().digest()); BasicDBObject info = new BasicDBObject(); info.put(Constants.EXIST_COMPRESSION, GZIP); info.put(Constants.EXIST_ORIGINAL_SIZE, nrBytesRaw); info.put(Constants.EXIST_ORIGINAL_MD5, checksum); info.put(Constants.EXIST_DATATYPE, dataType); info.put(Constants.EXIST_DATATYPE_TEXT, Type.getTypeName(dataType)); gfsFile.setMetaData(info); LOG.info("original_md5:" + checksum); LOG.info("compression ratio:" + ((100l * nrBytesGZ) / nrBytesRaw)); } } void writeRaw(GridFSInputFile gfsFile, StopWatch stopWatch, Item content) throws XPathException, IOException { // Write data as is try (OutputStream stream = gfsFile.getOutputStream()) { stopWatch.start(); ContentSerializer.serialize(content, context, stream); stream.flush(); stopWatch.stop(); } } private String getMimeType(Sequence inputValue, String filename) throws XPathException { String mimeType = null; // Use input when provided if (inputValue.hasOne()) { mimeType = inputValue.itemAt(0).getStringValue(); } // When no data is found get from filename if (StringUtils.isBlank(mimeType) && StringUtils.isNotBlank(filename)) { MimeType mime = MimeTable.getInstance().getContentTypeFor(filename); if (mime != null) { mimeType = mime.getName(); } } // Nothing could be found if (StringUtils.isBlank(mimeType)) { throw new XPathException(this, "Content type could not be retrieved from parameter or document name."); } return mimeType; } }