/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.deep.mongodb.extractor;
import static com.stratio.deep.commons.utils.Utils.initConfig;
import static com.stratio.deep.commons.utils.Utils.removeAddressPort;
import static com.stratio.deep.mongodb.utils.UtilMongoDB.MONGO_DEFAULT_ID;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.spark.Partition;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.BasicDBObjectBuilder;
import com.mongodb.CommandResult;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MongoClient;
import com.mongodb.QueryBuilder;
import com.mongodb.ReadPreference;
import com.mongodb.ServerAddress;
import com.stratio.deep.commons.config.BaseConfig;
import com.stratio.deep.commons.config.DeepJobConfig;
import com.stratio.deep.commons.config.ExtractorConfig;
import com.stratio.deep.commons.exception.DeepGenericException;
import com.stratio.deep.commons.impl.DeepPartition;
import com.stratio.deep.commons.querybuilder.UpdateQueryBuilder;
import com.stratio.deep.commons.rdd.DeepTokenRange;
import com.stratio.deep.commons.rdd.IExtractor;
import com.stratio.deep.commons.utils.Pair;
import com.stratio.deep.mongodb.config.MongoDeepJobConfig;
import com.stratio.deep.mongodb.partition.MongoPartition;
import com.stratio.deep.mongodb.reader.MongoReader;
import com.stratio.deep.mongodb.writer.MongoWriter;
/**
* Created by rcrespo on 7/11/14.
*
* @param <T> the type parameter
* @param <S> the type parameter
*/
public abstract class MongoNativeExtractor<T, S extends BaseConfig> implements IExtractor<T, S> {
/**
* The constant SPLIT_KEYS.
*/
public static final String SPLIT_KEYS = "splitKeys";
/**
* The constant serialVersionUID.
*/
private static final long serialVersionUID = -4020891863696443624L;
/**
* The Split size.
*/
private int splitSize = 10;
/**
* The constant MONGO_DEFAULT_ID.
*/
/**
* The Reader.
*/
private MongoReader reader;
/**
* The Writer.
*/
private MongoWriter writer;
/**
* The Mongo deep job config.
*/
protected MongoDeepJobConfig<T> mongoDeepJobConfig;
@Override
public Partition[] getPartitions(S config) {
MongoClient mongoClient = null;
try {
mongoDeepJobConfig = initConfig(config, mongoDeepJobConfig);
DBCollection collection;
ServerAddress address = new ServerAddress(mongoDeepJobConfig.getHost());
List<ServerAddress> addressList = new ArrayList<>();
addressList.add(address);
mongoClient = new MongoClient(addressList);
mongoClient.setReadPreference(ReadPreference.nearest());
DB db = mongoClient.getDB(mongoDeepJobConfig.getDatabase());
collection = db.getCollection(mongoDeepJobConfig.getCollection());
return isShardedCollection(collection) ? calculateShardChunks(collection) : calculateSplits(collection);
} catch (UnknownHostException e) {
throw new DeepGenericException(e);
} finally {
if (mongoClient != null) {
mongoClient.close();
}
}
}
/**
* Is sharded collection.
*
* @param collection the collection
* @return the boolean
*/
private boolean isShardedCollection(DBCollection collection) {
DB config = collection.getDB().getMongo().getDB("config");
DBCollection configCollections = config.getCollection("collections");
DBObject dbObject = configCollections.findOne(new BasicDBObject(MONGO_DEFAULT_ID, collection.getFullName()));
return dbObject != null;
}
/**
* Gets shards.
*
* @param collection the collection
* @return the shards
*/
private Map<String, String[]> getShards(DBCollection collection) {
DB config = collection.getDB().getSisterDB("config");
DBCollection configShards = config.getCollection("shards");
DBCursor cursorShards = configShards.find();
Map<String, String[]> map = new HashMap<>();
while (cursorShards.hasNext()) {
DBObject currentShard = cursorShards.next();
String currentHost = (String) currentShard.get("host");
int slashIndex = currentHost.indexOf("/");
if (slashIndex > 0) {
map.put((String) currentShard.get(MONGO_DEFAULT_ID),
currentHost.substring(slashIndex + 1).split(","));
}
}
return map;
}
/**
* Gets chunks.
*
* @param collection the collection
* @return the chunks
*/
private DBCursor getChunks(DBCollection collection) {
DB config = collection.getDB().getSisterDB("config");
DBCollection configChunks = config.getCollection("chunks");
return configChunks.find(new BasicDBObject("ns", collection.getFullName()));
}
/**
* Calculate splits.
*
* @param collection the collection
* @return the deep partition [ ]
*/
private DeepPartition[] calculateSplits(DBCollection collection) {
BasicDBList splitData = getSplitData(collection);
List<ServerAddress> serverAddressList = collection.getDB().getMongo().getServerAddressList();
if (splitData == null) {
Pair<BasicDBList, List<ServerAddress>> pair = getSplitDataCollectionShardEnviroment(getShards(collection),
collection.getDB().getName(),
collection.getName());
splitData = pair.left;
serverAddressList = pair.right;
}
Object lastKey = null; // Lower boundary of the first min split
List<String> stringHosts = new ArrayList<>();
for (ServerAddress serverAddress : serverAddressList) {
stringHosts.add(serverAddress.toString());
}
int i = 0;
MongoPartition[] partitions = new MongoPartition[splitData.size() + 1];
for (Object aSplitData : splitData) {
BasicDBObject currentKey = (BasicDBObject) aSplitData;
Object currentO = currentKey.get(MONGO_DEFAULT_ID);
partitions[i] = new MongoPartition(mongoDeepJobConfig.getRddId(), i, new DeepTokenRange(lastKey,
currentO, stringHosts), MONGO_DEFAULT_ID);
lastKey = currentO;
i++;
}
QueryBuilder queryBuilder = QueryBuilder.start(MONGO_DEFAULT_ID);
queryBuilder.greaterThanEquals(lastKey);
partitions[i] = new MongoPartition(0, i, new DeepTokenRange(lastKey, null, stringHosts), MONGO_DEFAULT_ID);
return partitions;
}
/**
* Gets split data.
*
* @param collection the collection
* @return the split data
*/
private BasicDBList getSplitData(DBCollection collection) {
final DBObject cmd = BasicDBObjectBuilder.start("splitVector", collection.getFullName())
.add("keyPattern", new BasicDBObject(MONGO_DEFAULT_ID, 1))
.add("force", false)
.add("maxChunkSize", splitSize)
.get();
CommandResult splitVectorResult = collection.getDB().getSisterDB("admin").command(cmd);
return (BasicDBList) splitVectorResult.get(SPLIT_KEYS);
}
/**
* Gets split data collection shard enviroment.
*
* @param shards the shards
* @param dbName the db name
* @param collectionName the collection name
* @return the split data collection shard enviroment
*/
private Pair<BasicDBList, List<ServerAddress>> getSplitDataCollectionShardEnviroment(Map<String, String[]> shards,
String dbName,
String collectionName) {
MongoClient mongoClient = null;
try {
Set<String> keys = shards.keySet();
for (String key : keys) {
List<ServerAddress> addressList = getServerAddressList(Arrays.asList(shards.get(key)));
mongoClient = new MongoClient(addressList);
BasicDBList dbList = getSplitData(mongoClient.getDB(dbName).getCollection(collectionName));
if (dbList != null) {
return Pair.create(dbList, addressList);
}
}
} catch (UnknownHostException e) {
throw new DeepGenericException(e);
} finally {
if (mongoClient != null) {
mongoClient.close();
}
}
return null;
}
@Override
public List<String> getPreferredLocations(Partition split) {
return removeAddressPort(((DeepPartition) split).splitWrapper().getReplicas());
}
/**
* Calculates shard chunks.
*
* @param collection the collection
* @return the deep partition [ ]
*/
private DeepPartition[] calculateShardChunks(DBCollection collection) {
DBCursor chuncks = getChunks(collection);
Map<String, String[]> shards = getShards(collection);
MongoPartition[] deepPartitions = new MongoPartition[chuncks.count()];
int i = 0;
boolean keyAssigned = false;
String key = null;
while (chuncks.hasNext()) {
DBObject dbObject = chuncks.next();
if (!keyAssigned) {
Set<String> keySet = ((DBObject) dbObject.get("min")).keySet();
for (String s : keySet) {
key = s;
keyAssigned = true;
}
}
deepPartitions[i] = new MongoPartition(mongoDeepJobConfig.getRddId(), i,
new DeepTokenRange(shards.get(dbObject.get
("shard")),
((DBObject) dbObject.get
("min")).get(key),
((DBObject) dbObject.get("max")).get(key)), key);
i++;
}
List<MongoPartition> mongoPartitions = Arrays.asList(deepPartitions);
Collections.shuffle(mongoPartitions);
return mongoPartitions.toArray(new MongoPartition[mongoPartitions.size()]);
}
/**
* Gets server address list.
*
* @param addressStringList the address string list
* @return the server address list
* @throws UnknownHostException the unknown host exception
*/
private List<ServerAddress> getServerAddressList(List<String> addressStringList) throws UnknownHostException {
List<ServerAddress> addressList = new ArrayList<>();
for (String addressString : addressStringList) {
addressList.add(new ServerAddress(addressString));
}
return addressList;
}
@Override
public boolean hasNext() {
return reader.hasNext();
}
@Override
public T next() {
return transformElement(reader.next());
}
@Override
public void close() {
if (reader != null) {
reader.close();
}
if (writer != null) {
writer.close();
}
}
@Override
public void initIterator(Partition dp, S config) {
mongoDeepJobConfig = initConfig(config, mongoDeepJobConfig);
reader = new MongoReader(mongoDeepJobConfig);
reader.init(dp);
}
@Override
public void saveRDD(T entity) {
writer.save(transformElement(entity));
}
@Override
public void initSave(S config, T first, UpdateQueryBuilder queryBuilder) {
mongoDeepJobConfig = initConfig(config, mongoDeepJobConfig);
try {
writer = new MongoWriter(getServerAddressList(mongoDeepJobConfig.getHostList()),
mongoDeepJobConfig.getDatabase(),
mongoDeepJobConfig.getCollection(), mongoDeepJobConfig.getWriteConcern());
} catch (UnknownHostException e) {
throw new DeepGenericException(e);
}
}
/**
* Transform element.
*
* @param dbObject the db object
* @return the t
*/
protected abstract T transformElement(DBObject dbObject);
/**
* Transform element.
*
* @param entity the entity
* @return the dB object
*/
protected abstract DBObject transformElement(T entity);
}