/** * Copyright (c) 2012, Thilo Planz. All rights reserved. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package v7db.files.mongodb; import static v7db.files.mongodb.QueryUtils._ID; import java.io.IOException; import java.io.InputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.codec.binary.Hex; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.ArrayUtils; import org.bson.BSONObject; import v7db.files.Compression; import v7db.files.MapUtils; import v7db.files.ZipFile; import v7db.files.spi.Content; import v7db.files.spi.ContentConcatenation; import v7db.files.spi.ContentPointer; import v7db.files.spi.ContentSHA; import v7db.files.spi.ContentStorage; import v7db.files.spi.GzippedContent; import v7db.files.spi.InlineContent; import v7db.files.spi.OffsetAndLength; import v7db.files.spi.StorageScheme; import v7db.files.spi.StoredContent; import com.mongodb.BasicDBObject; import com.mongodb.DB; import com.mongodb.DBCollection; import com.mongodb.DBObject; import com.mongodb.WriteConcern; import com.mongodb.gridfs.GridFS; /** * ContentStorage implementation that uses MongoDB documents. * * <ul> * <li>The <code>_id</code> field is the content SHA-1 digest (20 bytes of * binary data) * <li>"Small" content (that does not need to be chunked) is stored in the * binary field <code>in</code>. * <li>If the data can be compressed using gzip, it will be stored in compressed * form as <code>zin</code>. This mode is indicated by setting the value * <code>gz</code> for the <code>store</code> field. The uncompressed length is * given in the <code>length</code> field. * <li>"Large" content is stored as the concatenation of chunks stored * out-of-band (in other documents). For very large documents this can also * become nested. * <li>Other types of "out-of-band" storage schemes are possible and can be * provided by extension code. * </ul> * * @see https://github.com/thiloplanz/v7files/wiki/StorageFormat * * */ public class MongoContentStorage implements ContentStorage { private static final int chunkSize = GridFS.DEFAULT_CHUNKSIZE; private final DBCollection contentCollection; private final Map<String, StorageScheme> storageSchemes = new HashMap<String, StorageScheme>(); public final static String DEFAULT_CONTENT_COLLECTION_NAME = "v7files.content"; public MongoContentStorage(DB db) { this(db.getCollection(DEFAULT_CONTENT_COLLECTION_NAME)); } public MongoContentStorage(DBCollection contentCollection) { this.contentCollection = contentCollection; storageSchemes.put("gz", new GzippedContent()); storageSchemes.put("cat", new ContentConcatenation()); storageSchemes.put("zip", new ZipFile.ContentFromZipFile()); } public Content getContent(byte[] sha) throws IOException { return getContent(contentCollection.findOne(sha)); } public ContentSHA findContentPointerByPrefix(byte[] shaPrefix) throws IOException { if (shaPrefix.length == 20) { DBObject file = contentCollection.findOne(shaPrefix); if (file == null) return null; Content c = getContent(file); return ContentSHA.forDigestAndLength(shaPrefix, c.getLength()); } if (shaPrefix.length > 20) throw new IllegalArgumentException(); byte[] lower = Arrays.copyOf(shaPrefix, 20); // 0-padded byte[] higher = Arrays.copyOf(shaPrefix, 20); // FF-padded for (int i = shaPrefix.length; i < higher.length; i++) { higher[i] = (byte) 0xFF; } List<DBObject> files = contentCollection.find( QueryUtils.between(_ID, lower, higher), new BasicDBObject()) .limit(2).toArray(); if (files.isEmpty()) return null; if (files.size() == 1) { Content c = getContent(files.get(0)); return ContentSHA.forDigestAndLength( (byte[]) files.get(0).get(_ID), c.getLength()); } throw new IllegalArgumentException(Hex.encodeHexString(shaPrefix) + " is not a unique SHA prefix"); } public Content findContentByPrefix(byte[] shaPrefix) throws IOException { if (shaPrefix.length == 20) return getContent(shaPrefix); if (shaPrefix.length > 20) throw new IllegalArgumentException(); byte[] lower = Arrays.copyOf(shaPrefix, 20); // 0-padded byte[] higher = Arrays.copyOf(shaPrefix, 20); // FF-padded for (int i = shaPrefix.length; i < higher.length; i++) { higher[i] = (byte) 0xFF; } List<DBObject> files = contentCollection.find( QueryUtils.between(_ID, lower, higher), new BasicDBObject()) .limit(2).toArray(); if (files.isEmpty()) return null; if (files.size() == 1) return getContent(files.get(0)); throw new IllegalArgumentException(Hex.encodeHexString(shaPrefix) + " is not a unique SHA prefix"); } public Content getContent(ContentPointer pointer) throws IOException { if (pointer == null) return null; if (pointer instanceof InlineContent) return (Content) pointer; if (pointer instanceof ContentSHA) { ContentSHA p = (ContentSHA) pointer; byte[] sha = p.getSHA(); Content base = getContent(sha); if (base == null) throw new IllegalArgumentException("base SHA not found: " + Hex.encodeHexString(sha)); return base; } if (pointer instanceof StoredContent) { StoredContent p = (StoredContent) pointer; byte[] sha = p.getBaseSHA(); Content base = getContent(sha); if (base == null) throw new IllegalArgumentException("base SHA not found: " + Hex.encodeHexString(sha)); if (p.getLength() != base.getLength()) { return new OffsetAndLength(base, 0, p.getLength()); } return base; } throw new IllegalArgumentException(pointer.getClass().toString()); } @SuppressWarnings("unchecked") private Content getContent(BSONObject data) throws IOException { if (data == null) return null; data.removeField("_id"); String store = BSONUtils.getString(data, "store"); if (store == null || "raw".equals(store)) { return InlineContent.deserialize(data.toMap()); } StorageScheme s = storageSchemes.get(store); if (s != null) return s.getContent(this, data.toMap()); throw new UnsupportedOperationException(store); } /** * read into the buffer, continuing until the stream is finished or the * buffer is full. * * @return the number of bytes read, which could be 0 (not -1) * @throws IOException */ private static int readFully(InputStream data, byte[] buffer) throws IOException { int read = data.read(buffer); if (read == -1) { return 0; } while (read < buffer.length) { int added = data.read(buffer, read, buffer.length - read); if (added == -1) return read; read += added; } return read; } public ContentSHA storeContent(InputStream data) throws IOException { try { MessageDigest completeSHA = MessageDigest.getInstance("SHA"); long completeLength = 0; byte[] chunk = new byte[chunkSize]; int read; List<ContentSHA> chunks = new ArrayList<ContentSHA>(); while (0 < (read = readFully(data, chunk))) { completeSHA.update(chunk, 0, read); completeLength += read; chunks.add(storeContentChunk(chunk, 0, read)); } if (chunks.isEmpty()) return storeContentChunk(ArrayUtils.EMPTY_BYTE_ARRAY, 0, 0); if (chunks.size() == 1) return chunks.get(0); List<Map<String, Object>> bases = new ArrayList<Map<String, Object>>( chunks.size()); for (ContentSHA c : chunks) { bases.add(c.serialize()); } ContentSHA result = ContentSHA.forDigestAndLength(completeSHA .digest(), completeLength); long existing = contentCollection.count(new BasicDBObject(_ID, result.getSHA())); if (existing == 0) { contentCollection .insert(new BasicDBObject(_ID, result.getSHA()).append( "store", "cat").append("base", bases), WriteConcern.SAFE); } return result; } catch (NoSuchAlgorithmException e) { throw new RuntimeException(e); } finally { IOUtils.closeQuietly(data); } } private ContentSHA storeContentChunk(byte[] bytes, final int offset, final int length) throws IOException { ContentSHA _sha = ContentSHA.calculate(bytes, offset, length); byte[] sha = _sha.getSHA(); long existing = contentCollection.count(new BasicDBObject(_ID, sha)); if (existing == 0) { byte[] gzipped = Compression.gzip(bytes, offset, length); if (gzipped != null && gzipped.length > chunkSize) gzipped = null; if (gzipped != null) { bytes = null; contentCollection.insert(new BasicDBObject(_ID, sha).append( "zin", gzipped).append("store", "gz"), WriteConcern.SAFE); gzipped = null; } else { if (offset > 0 || bytes.length != length) { bytes = ArrayUtils.subarray(bytes, offset, offset + length); } contentCollection.insert(new BasicDBObject(_ID, sha).append( "in", bytes), WriteConcern.SAFE); } } return _sha; } public ContentPointer storeContent(Map<String, Object> storageScheme) throws IOException { StorageScheme s = storageSchemes.get(storageScheme.get("store")); if (s == null) throw new UnsupportedOperationException(storageScheme.toString()); DBObject x = new BasicDBObject(); for (Map.Entry<String, Object> e : storageScheme.entrySet()) { x.put(e.getKey(), e.getValue()); } long length = BSONUtils.getRequiredLong(x, "length"); byte[] sha = DigestUtils.sha(s.getContent(this, storageScheme) .getInputStream()); long existing = contentCollection.count(new BasicDBObject(_ID, sha)); if (existing == 0) { x.put(_ID, sha); contentCollection.insert(x, WriteConcern.SAFE); } return new StoredContent(sha, length); } /** * Supported formats: 1) Serialized ContentPointers, e.g. * * <pre> * { in: [bytes] } * </pre> * * and * * <pre> * { sha: <sha>, length: 123 } * </pre> * * 2) Internal StorageScheme representations (must have {store: something}") */ public Content getContent(Map<String, Object> data) throws IOException { if (data == null) return null; String store = MapUtils.getString(data, "store"); if (store == null || "raw".equals(store)) { if (data.containsKey("in")) return InlineContent.deserialize(data); if (data.containsKey("sha")) { return new StoredContent((byte[]) data.get("sha"), MapUtils .getRequiredLong(data, "length")).loadOrLazyLoad(this, 8 * 1024); } throw new UnsupportedOperationException(data.toString()); } StorageScheme s = storageSchemes.get(store); if (s == null) throw new UnsupportedOperationException(store); return s.getContent(this, data); } }