/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.value.binary;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.modeshape.common.annotation.NotThreadSafe;
import org.modeshape.common.util.IoUtil;
import org.modeshape.common.util.StringUtil;
import org.modeshape.jcr.JcrI18n;
import org.modeshape.jcr.value.BinaryKey;
import org.modeshape.jcr.value.BinaryValue;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.MongoCredential;
import com.mongodb.ServerAddress;
import com.mongodb.WriteConcern;
/**
* A {@link BinaryStore} implementation that uses a MongoDB for persisting binary values.
*
* @author kulikov
*/
public class MongodbBinaryStore extends AbstractBinaryStore {
// default database name
private static final String DEFAULT_DB_NAME = "ModeShape_BinaryStore";
// field names
private static final String FIELD_CHUNK_TYPE = "chunk-type";
private static final String FIELD_MIME_TYPE = "mime-type";
private static final String FIELD_EXTRACTED_TEXT = "extracted-text";
private static final String FIELD_UNUSED_SINCE = "unused-since";
private static final String FIELD_UNUSED = "unused";
private static final String FIELD_CHUNK_SIZE = "chunk-size";
private static final String FIELD_CHUNK_BUFFER = "chunk-buffer";
private static final String FIELD_CHUNK_POSITION = "chunk-order";
private static final String FIELD_CHUNK_VERSION = "chunk-version";
// chunk types
private static final String CHUNK_TYPE_HEADER = "header";
private static final String CHUNK_TYPE_DATA_CHUNK = "data";
// chunk versions
private static final int VERSION_1 = 1;
// keys for chunks(header or data)
protected static final BasicDBObject HEADER_QUERY = new BasicDBObject().append(FIELD_CHUNK_TYPE, CHUNK_TYPE_HEADER);
protected static final BasicDBObject DATA_CHUNK_QUERY = new BasicDBObject().append(FIELD_CHUNK_TYPE, CHUNK_TYPE_DATA_CHUNK);
protected static final BasicDBObject DATA_CHUNK_SORT_INDEX = new BasicDBObject().append(FIELD_CHUNK_POSITION, 1);
private FileSystemBinaryStore cache;
// database name
private String database;
// credentials
private String username;
private String password;
// server address(es) - note that order is important
private Set<String> hostAddresses = new LinkedHashSet<>();
// database instance
private DB db;
// chunk size in bytes
protected int chunkSize = 1024;
/**
* Creates a new mongo binary store instance using the supplied params.
*
* @param host the mongo primary host; may be null in which case {@code hostAddresses} has to be provided
* @param port the port of the primary host; may be null in which case {@code hostAddresses} has to be provided
* @param database the name of the database; may be null in which case a default will be used
* @param username the username; may be null
* @param password the password; may be null
* @param hostAddresses a {@link List} of (host:port) pairs representing multiple server addresses; may be null
*/
public MongodbBinaryStore(String host, Integer port, String database, String username, String password, List<String> hostAddresses) {
this.cache = TransientBinaryStore.get();
this.database = !StringUtil.isBlank(database) ? database : DEFAULT_DB_NAME;
this.username = username;
this.password = password;
boolean hostAddressesProvided = hostAddresses != null && !hostAddresses.isEmpty();
this.hostAddresses = new LinkedHashSet<>();
String defaultServer = !StringUtil.isBlank(host) && port != null ? host + ":" + port : null;
if (defaultServer == null && !hostAddressesProvided) {
throw new IllegalArgumentException("Invalid Mongo binary store configuration: either (host and port) or host addresses have to provided");
}
if (defaultServer != null) {
this.hostAddresses.add(defaultServer);
}
if (hostAddressesProvided) {
this.hostAddresses.addAll(hostAddresses);
}
}
/**
* Converts list of addresses specified in text format to mongodb specific address.
*
* @param addresses list of addresses in text format
* @return list of mongodb addresses
* @throws IllegalArgumentException if address has bad format or is not valid
*/
private List<ServerAddress> convertToServerAddresses(Set<String> addresses) {
return addresses.stream()
.map(this::stringToServerAddress)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
private ServerAddress stringToServerAddress(String address) {
if (address == null || address.trim().length() == 0) {
return null;
}
// address has format <host:port>
String[] tokens = address.split(":");
// checking tokens number after split
if (tokens.length != 2) {
throw new IllegalArgumentException("Wrong address format: " + address + " (expected host:port)") ;
}
String host = tokens[0];
// convert port number
int port;
try {
port = Integer.parseInt(tokens[1]);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("Wrong address format: " + address + " (expected host:port)");
}
try {
return new ServerAddress(host, port);
} catch (UnknownHostException e) {
throw new IllegalArgumentException(e);
}
}
/**
* Gets the size of the chunk used to store content.
*
* @return chunk size in bytes.
*/
public int getChunkSize() {
return chunkSize;
}
/**
* Modifies chunk size used to store content.
*
* @param chunkSize chunk size in bytes.
*/
public void setChunkSize( int chunkSize ) {
this.chunkSize = chunkSize;
}
@Override
public BinaryValue storeValue( InputStream stream,
boolean markAsUnused ) throws BinaryStoreException {
// store into temporary file system store and get SHA-1
BinaryValue temp = cache.storeValue(stream, markAsUnused);
try {
// prepare new binary key based on SHA-1
BinaryKey key = new BinaryKey(temp.getKey().toString());
// check for duplicate records
if (db.collectionExists(key.toString())) {
return new StoredBinaryValue(this, key, temp.getSize());
}
// store content
DBCollection content = db.getCollection(key.toString());
content.createIndex(DATA_CHUNK_SORT_INDEX);
ChunkOutputStream dbStream = markAsUnused ?
new ChunkOutputStream(content, System.currentTimeMillis()) :
new ChunkOutputStream(content);
try {
IoUtil.write(temp.getStream(), dbStream);
} catch (Exception e) {
throw new BinaryStoreException(e);
}
return new StoredBinaryValue(this, key, temp.getSize());
} finally {
// clean up temp store
cache.markAsUnused(temp.getKey());
}
}
@Override
public InputStream getInputStream( BinaryKey key ) throws BinaryStoreException {
if (!db.collectionExists(key.toString())) {
throw new BinaryStoreException(JcrI18n.unableToFindBinaryValue.text(key, db.getName()));
}
return new ChunkInputStream(db.getCollection(key.toString()));
}
@Override
public void markAsUsed( Iterable<BinaryKey> keys ) {
for (BinaryKey key : keys) {
if (db.collectionExists(key.toString())) {
DBCollection content = db.getCollection(key.toString());
setAttribute(content, FIELD_UNUSED, false);
setAttribute(content, FIELD_UNUSED_SINCE, null);
}
}
}
@Override
public void markAsUnused( Iterable<BinaryKey> keys ) {
for (BinaryKey key : keys) {
// silently ignore if content does not exist
if (db.collectionExists(key.toString())) {
DBCollection content = db.getCollection(key.toString());
setAttribute(content, FIELD_UNUSED, true);
setAttribute(content, FIELD_UNUSED_SINCE, System.currentTimeMillis());
}
}
}
@Override
public void removeValuesUnusedLongerThan( long minimumAge,
TimeUnit unit ) {
long deadline = System.currentTimeMillis() - unit.toMillis(minimumAge);
Set<String> keys = getStoredKeys(false);
for (String key : keys) {
DBCollection content = db.getCollection(key);
if (isExpired(content, deadline)) content.drop();
}
}
@Override
protected void storeMimeType( BinaryValue source,
String mimeType ) throws BinaryStoreException {
if (db.collectionExists(source.getKey().toString())) {
DBCollection content = db.getCollection(source.getKey().toString());
setAttribute(content, FIELD_MIME_TYPE, mimeType);
} else {
throw new BinaryStoreException(JcrI18n.unableToFindBinaryValue.text(source.getKey(), db.getName()));
}
}
@Override
protected String getStoredMimeType( BinaryValue source ) throws BinaryStoreException {
if (!db.collectionExists(source.getKey().toString())) {
throw new BinaryStoreException(JcrI18n.unableToFindBinaryValue.text(source.getKey(), db.getName()));
}
DBCollection content = db.getCollection(source.getKey().toString());
return (String)getAttribute(content, FIELD_MIME_TYPE);
}
@Override
public void storeExtractedText( BinaryValue source,
String extractedText ) throws BinaryStoreException {
if (!db.collectionExists(source.getKey().toString())) {
throw new BinaryStoreException(JcrI18n.unableToFindBinaryValue.text(source.getKey(), db.getName()));
}
DBCollection content = db.getCollection(source.getKey().toString());
setAttribute(content, FIELD_EXTRACTED_TEXT, extractedText);
}
@Override
public String getExtractedText( BinaryValue source ) throws BinaryStoreException {
if (!db.collectionExists(source.getKey().toString())) {
throw new BinaryStoreException(JcrI18n.unableToFindBinaryValue.text(source.getKey(), db.getName()));
}
DBCollection content = db.getCollection(source.getKey().toString());
return (String)getAttribute(content, FIELD_EXTRACTED_TEXT);
}
@Override
public Iterable<BinaryKey> getAllBinaryKeys() {
ArrayList<BinaryKey> list = new ArrayList<BinaryKey>();
Set<String> keys = getStoredKeys(true);
for (String s : keys) {
list.add(new BinaryKey(s));
}
return list;
}
private Set<String> getStoredKeys( boolean onlyUsed ) {
Set<String> storedKeys = new HashSet<String>();
Set<String> collectionNames = db.getCollectionNames();
for (String collectionName : collectionNames) {
// make sure Mongo predefined collections are not taken into account
if (collectionName.toLowerCase().startsWith("system") || collectionName.toLowerCase().startsWith("local")) {
continue;
}
DBCollection collection = db.getCollection(collectionName);
boolean unused = (Boolean)getAttribute(collection, FIELD_UNUSED);
if (!unused || !onlyUsed) {
storedKeys.add(collectionName);
}
}
return storedKeys;
}
@Override
public void start() {
super.start();
// check database name
if (StringUtil.isBlank(database)) {
throw new RuntimeException("Database name is not specified");
}
initMongo(username, password);
}
private void initMongo(String username, String password) {
List<MongoCredential> credentials = new ArrayList<>();
if (!StringUtil.isBlank(username) && !StringUtil.isBlank(password)) {
credentials.add(MongoCredential.createCredential(username, database, password.toCharArray()));
}
// connect to database
MongoClient client = hostAddresses.size() > 1 ?
new MongoClient(convertToServerAddresses(hostAddresses), credentials) :
new MongoClient(stringToServerAddress(hostAddresses.iterator().next()), credentials);
client.setWriteConcern(WriteConcern.ACKNOWLEDGED);
db = client.getDB(database);
}
/**
* Modifies content header.
*
* @param content stored content
* @param fieldName attribute name
* @param value new value for the attribute
*/
private void setAttribute( DBCollection content,
String fieldName,
Object value ) {
DBObject header = content.findOne(HEADER_QUERY);
BasicDBObject newHeader = new BasicDBObject();
// clone header
newHeader.put(FIELD_CHUNK_TYPE, header.get(FIELD_CHUNK_TYPE));
newHeader.put(FIELD_MIME_TYPE, header.get(FIELD_MIME_TYPE));
newHeader.put(FIELD_EXTRACTED_TEXT, header.get(FIELD_EXTRACTED_TEXT));
newHeader.put(FIELD_UNUSED, header.get(FIELD_UNUSED));
newHeader.put(FIELD_UNUSED_SINCE, header.get(FIELD_UNUSED_SINCE));
// modify specified field and update record
newHeader.put(fieldName, value);
content.update(HEADER_QUERY, newHeader);
}
/**
* Gets attribute's value.
*
* @param content stored content
* @param fieldName attribute name
* @return attributes value
*/
private Object getAttribute( DBCollection content,
String fieldName ) {
return content.findOne(HEADER_QUERY).get(fieldName);
}
/**
* Checks status of unused content.
*
* @param content content to check status
* @param deadline moment of time in past
* @return true if content is marked as unused before the deadline
*/
private boolean isExpired( DBCollection content,
long deadline ) {
Long unusedSince = (Long)getAttribute(content, FIELD_UNUSED_SINCE);
return unusedSince != null && unusedSince < deadline;
}
/**
* Provide an OutputStream which will write to a database storage.
*/
@NotThreadSafe
protected class ChunkOutputStream extends OutputStream {
// stored content
private DBCollection content;
// local intermediate chunk buffer
private byte[] buffer = new byte[chunkSize];
// current position in the local buffer
private int offset;
// the position of a chunk with a series of chunks
private int position;
// object for writing chunks into storage
private BasicDBObject chunk = new BasicDBObject();
/**
* Creates new stream.
*
* @param content stored content
*/
public ChunkOutputStream( DBCollection content ) {
this.content = content;
// start from header
// mark first chunk as header and mark it as used
BasicDBObject header = new BasicDBObject();
header.put(FIELD_CHUNK_TYPE, CHUNK_TYPE_HEADER);
header.put(FIELD_UNUSED, false);
header.put(FIELD_CHUNK_VERSION, VERSION_1);
// insert into database
this.content.insert(header);
}
/**
* Creates new stream.
*
* @param content stored content
* @param unusedSince the number of milliseconds the binary has not been used; this value will be recorded in the binary
* value
*/
public ChunkOutputStream( DBCollection content,
long unusedSince ) {
this.content = content;
// start from header
// mark first chunk as header and mark it as used
BasicDBObject header = new BasicDBObject();
header.put(FIELD_CHUNK_TYPE, CHUNK_TYPE_HEADER);
header.put(FIELD_UNUSED, true);
header.put(FIELD_UNUSED_SINCE, unusedSince);
// insert into database
this.content.insert(header);
}
@Override
public void write( int b ) {
// fill the local buffer first
if (offset < buffer.length) {
buffer[offset++] = (byte)b;
}
// push chunk into storage
if (offset == buffer.length) {
flush();
}
}
@Override
public void flush() {
if (offset > 0) {
// fill data
chunk.put(FIELD_CHUNK_TYPE, CHUNK_TYPE_DATA_CHUNK);
chunk.put(FIELD_CHUNK_SIZE, offset);
chunk.put(FIELD_CHUNK_BUFFER, buffer);
chunk.put(FIELD_CHUNK_POSITION, position);
// store chink
content.insert(chunk);
// reset (weird thing is that we can't use mutable objects here)
offset = 0;
position++;
chunk = new BasicDBObject();
}
}
}
/**
* Provide an InputStream which will read from a database storage.
*/
@NotThreadSafe
protected class ChunkInputStream extends InputStream {
// list of datachunks
private final DBCursor cursor;
// local buffer and current position inthe buffer
private byte[] buffer = new byte[chunkSize];
private int offset = 0;
// object for reading chunks from database
private DBObject chunk = new BasicDBObject();
// the actual amount of data stored in chunk
private int size = 0;
public ChunkInputStream( DBCollection chunks ) {
// dynamically create the cursor based on the version
cursor = cursorFor(chunks);
}
private DBCursor cursorFor(DBCollection parent) {
DBObject header = parent.findOne(HEADER_QUERY);
// we should always have a header
assert header != null;
Object version = header.get(FIELD_CHUNK_VERSION);
// we're always interested in data chunks
DBCursor result = parent.find(DATA_CHUNK_QUERY);
if (version == null) {
// no version present
return result;
}
switch ((Integer) version) {
case VERSION_1:
return result.sort(DATA_CHUNK_SORT_INDEX);
default:
throw new IllegalArgumentException("Unknown chunk version " + version);
}
}
@Override
public int read() {
// read current chunk
if (offset < size) {
// make sure it's unsigned (see javadoc)
return 0xff & buffer[offset++];
}
// try to pick up next chunk
if (cursor.hasNext()) {
chunk = cursor.next();
size = (Integer)chunk.get(FIELD_CHUNK_SIZE);
buffer = (byte[])chunk.get(FIELD_CHUNK_BUFFER);
offset = 0;
}
// start reading from new chunk
if (offset < size) {
return 0xff & buffer[offset++];
}
// end of stream reached
return -1;
}
}
}