/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.deep.cassandra.config;
import static com.stratio.deep.cassandra.util.CassandraUtils.createTableQueryGenerator;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.BATCHSIZE;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.BISECT_FACTOR;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.CQLPORT;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.CREATE_ON_WRITE;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.PAGE_SIZE;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.READ_CONSISTENCY_LEVEL;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.RPCPORT;
import static com.stratio.deep.commons.extractor.utils.ExtractorConstants.WRITE_CONSISTENCY_LEVEL;
import static com.stratio.deep.commons.utils.Utils.quote;
import java.io.Serializable;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.collections.map.HashedMap;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.ColumnMetadata;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.KeyspaceMetadata;
import com.datastax.driver.core.Metadata;
import com.datastax.driver.core.ProtocolVersion;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.TableMetadata;
import com.stratio.deep.cassandra.filter.value.EqualsInValue;
import com.stratio.deep.commons.config.DeepJobConfig;
import com.stratio.deep.commons.config.ExtractorConfig;
import com.stratio.deep.commons.entity.Cell;
import com.stratio.deep.commons.entity.Cells;
import com.stratio.deep.commons.exception.DeepIOException;
import com.stratio.deep.commons.exception.DeepIndexNotFoundException;
import com.stratio.deep.commons.exception.DeepNoSuchFieldException;
import com.stratio.deep.commons.extractor.utils.ExtractorConstants;
import com.stratio.deep.commons.filter.Filter;
import com.stratio.deep.commons.utils.Constants;
import scala.Tuple2;
/**
* Base class for all config implementations providing default implementations for methods defined in
* {@link ICassandraDeepJobConfig}.
*/
public abstract class CassandraDeepJobConfig<T> extends DeepJobConfig<T, CassandraDeepJobConfig<T>> implements
AutoCloseable,
ICassandraDeepJobConfig<T> {
private static final Logger LOG = Logger.getLogger(CassandraDeepJobConfig.class);
private static final long serialVersionUID = -7179376653643603038L;
private String partitionerClassName = "org.apache.cassandra.dht.Murmur3Partitioner";
/**
* Cassandra server RPC port.
*/
private Integer rpcPort = Constants.DEFAULT_CASSANDRA_RPC_PORT;
/**
* Cassandra server CQL port.
*/
private Integer cqlPort = Constants.DEFAULT_CASSANDRA_CQL_PORT;
/**
* default "where" filter to use to access ColumnFamily's data.
*/
private final Map<String, Serializable> additionalFilters = new TreeMap<>();
/**
* Size of the batch created when writing to Cassandra.
*/
private int batchSize = Constants.DEFAULT_BATCH_SIZE;
/**
* holds columns metadata fetched from Cassandra.
*/
private transient Map<String, Cell> columnDefinitionMap;
/**
* Default read consistency level. Defaults to LOCAL_ONE.
*/
private String readConsistencyLevel = ConsistencyLevel.LOCAL_ONE.name();
/**
* Default write consistency level. Defaults to QUORUM.
*/
private String writeConsistencyLevel = ConsistencyLevel.QUORUM.name();
/**
* Enables/Disables auto-creation of column family when writing to Cassandra. By Default we do not create the output
* column family.
*/
protected Boolean createTableOnWrite = Boolean.TRUE;
private transient Session session;
private Boolean isInitialized = Boolean.FALSE;
private int pageSize = Constants.DEFAULT_PAGE_SIZE;
protected Boolean isWriteConfig = Boolean.TRUE;
private int bisectFactor = Constants.DEFAULT_BISECT_FACTOR;
private final int splitSize = Constants.DEFAULT_SPLIT_SIZE;
private boolean isSplitModeSet = false;
private boolean isBisectModeSet = true;
private EqualsInValue equalsInValue = null;
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> session(Session session) {
this.session = session;
return this;
}
/**
* {@inheritDoc}
*/
private static Map<String, Session > cassandraSession = new HashedMap();
@Override
public synchronized Session getSession() {
String id = this.getHost()+":"+this.cqlPort;
if (!cassandraSession.containsKey(id)){
Cluster cluster = Cluster.builder()
.withPort(this.cqlPort)
.addContactPoint(this.getHost())
.withCredentials(this.username, this.password)
.withProtocolVersion(PROTOCOL_VERSION)
.build();
session = cluster.connect(quote(this.catalog));
cassandraSession.put(id,session);
}
return cassandraSession.get(id);
}
/**
* {@inheritDoc}
*/
@Override
public void close() {
LOG.debug("closing " + getClass().getCanonicalName());
if (session != null) {
session.close();
}
}
public CassandraDeepJobConfig(Class<T> entityClass) {
super(entityClass);
}
/**
* {@inheritDoc}
*
* @Override protected void finalize() { LOG.debug("finalizing " + getClass().getCanonicalName()); close(); }
*/
/**
* Checks if this configuration object has been initialized or not.
*
* @throws com.stratio.deep.commons.exception.DeepIllegalAccessException if not initialized
*/
protected void checkInitialized() {
if (!isInitialized) {
initialize();
LOG.warn("CassandraDeepJobConfig has not been initialized!");
}
}
/**
* Fetches table metadata from the underlying datastore, using DataStax java driver.
*
* @return the table metadata as returned by the driver.
*/
public TableMetadata fetchTableMetadata() {
Metadata metadata = getSession().getCluster().getMetadata();
KeyspaceMetadata ksMetadata = metadata.getKeyspace(quote(this.catalog));
if (ksMetadata != null) {
return ksMetadata.getTable(quote(this.table));
} else {
return null;
}
}
/**
* Creates the output column family if not exists. <br/>
* We first check if the column family exists. <br/>
* If not, we get the first element from <i>tupleRDD</i> and we use it as a template to get columns metadata.
* <p>
* This is a very heavy operation since to obtain the schema we need to get at least one element of the output RDD.
* </p>
*
* @param first the pair RDD.
*/
public void createOutputTableIfNeeded(Tuple2<Cells, Cells> first) {
TableMetadata metadata = getSession()
.getCluster()
.getMetadata()
.getKeyspace(this.catalog)
.getTable(quote(this.table));
if (metadata == null && !createTableOnWrite) {
throw new DeepIOException("Cannot write RDD, output table does not exists and configuration object has " +
"'createTableOnWrite' = false");
}
if (metadata != null) {
return;
}
if (first._1() == null || first._1().isEmpty()) {
throw new DeepNoSuchFieldException("no key structure found on row metadata");
}
String createTableQuery = createTableQueryGenerator(first._1(), first._2(), this.catalog,
quote(this.table));
getSession().execute(createTableQuery);
waitForNewTableMetadata();
}
/**
* waits until table metadata is not null
*/
private void waitForNewTableMetadata() {
TableMetadata metadata;
int retries = 0;
final int waitTime = 100;
do {
metadata = getSession()
.getCluster()
.getMetadata()
.getKeyspace(this.catalog)
.getTable(quote(this.table));
if (metadata != null) {
continue;
}
LOG.warn(String.format("Metadata for new table %s.%s NOT FOUND, waiting %d millis", this.catalog,
this.table, waitTime));
try {
Thread.sleep(waitTime);
} catch (InterruptedException e) {
LOG.error("Sleep interrupted", e);
}
retries++;
if (retries >= 10) {
throw new DeepIOException("Cannot retrieve metadata for the newly created CF ");
}
} while (metadata == null);
}
/**
* {@inheritDoc}
*/
@Override
public synchronized Map<String, Cell> columnDefinitions() {
if (columnDefinitionMap != null) {
return columnDefinitionMap;
}
TableMetadata tableMetadata = fetchTableMetadata();
if (tableMetadata == null && !createTableOnWrite) {
LOG.warn("Configuration not suitable for writing RDD: output table does not exists and configuration " +
"object has 'createTableOnWrite' = false");
return null;
} else if (tableMetadata == null) {
return null;
}
initColumnDefinitionMap(tableMetadata);
return columnDefinitionMap;
}
private void initColumnDefinitionMap(TableMetadata tableMetadata) {
columnDefinitionMap = new HashMap<>();
List<ColumnMetadata> partitionKeys = tableMetadata.getPartitionKey();
List<ColumnMetadata> clusteringKeys = tableMetadata.getClusteringColumns();
List<ColumnMetadata> allColumns = tableMetadata.getColumns();
for (ColumnMetadata key : partitionKeys) {
Cell metadata = Cell.create(key.getName(), key.getType(), Boolean.TRUE, Boolean.FALSE);
columnDefinitionMap.put(key.getName(), metadata);
}
for (ColumnMetadata key : clusteringKeys) {
Cell metadata = Cell.create(key.getName(), key.getType(), Boolean.FALSE, Boolean.TRUE);
columnDefinitionMap.put(key.getName(), metadata);
}
for (ColumnMetadata key : allColumns) {
Cell metadata = Cell.create(key.getName(), key.getType(), Boolean.FALSE, Boolean.FALSE);
if (!columnDefinitionMap.containsKey(key.getName())) {
columnDefinitionMap.put(key.getName(), metadata);
}
}
columnDefinitionMap = Collections.unmodifiableMap(columnDefinitionMap);
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#columnFamily(java.lang.String)
*/
@Override
public CassandraDeepJobConfig<T> columnFamily(String columnFamily) {
this.table = columnFamily;
return this;
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#columnFamily(java.lang.String)
*/
@Override
public CassandraDeepJobConfig<T> table(String table) {
return columnFamily(table);
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#getColumnFamily()
*/
@Override
public String getColumnFamily() {
checkInitialized();
return table;
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#getColumnFamily()
*/
@Override
public String getTable() {
return getColumnFamily();
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#getKeyspace()
*/
@Override
public String getKeyspace() {
checkInitialized();
return catalog;
}
@Override
public String getPartitionerClassName() {
checkInitialized();
return partitionerClassName;
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#getPassword()
*/
@Override
public String getPassword() {
checkInitialized();
return password;
}
/*
* (non-Javadoc)
*
* @see com.stratio.deep.config.IICassandraDeepJobConfig#getRpcPort()
*/
@Override
public Integer getRpcPort() {
checkInitialized();
return rpcPort;
}
/**
* {@inheritDoc}
*/
@Override
public Integer getCqlPort() {
checkInitialized();
return cqlPort;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> initialize() {
if (isInitialized) {
return this;
}
if (StringUtils.isEmpty(getHost())) {
try {
host.add(InetAddress.getLocalHost().getCanonicalHostName());
} catch (UnknownHostException e) {
LOG.warn("Cannot resolve local host canonical name, using \"localhost\"");
host.add(InetAddress.getLoopbackAddress().getCanonicalHostName());
}
}
validate();
columnDefinitions();
isInitialized = Boolean.TRUE;
return this;
}
@Override
public CassandraDeepJobConfig<T> initialize(ExtractorConfig extractorConfig) {
super.initialize(extractorConfig);
Map<String, Serializable> values = extractorConfig.getValues();
if (values.get(BATCHSIZE) != null) {
batchSize(extractorConfig.getInteger(BATCHSIZE));
}
if (values.get(CQLPORT) != null) {
cqlPort(extractorConfig.getInteger(CQLPORT));
}
if (values.get(RPCPORT) != null) {
rpcPort(extractorConfig.getInteger(RPCPORT));
}
if (values.get(CREATE_ON_WRITE) != null) {
createTableOnWrite(extractorConfig.getBoolean(CREATE_ON_WRITE));
}
if (values.get(PAGE_SIZE) != null) {
pageSize(extractorConfig.getInteger(PAGE_SIZE));
}
if (values.get(READ_CONSISTENCY_LEVEL) != null) {
readConsistencyLevel(extractorConfig.getString(READ_CONSISTENCY_LEVEL));
}
if (values.get(WRITE_CONSISTENCY_LEVEL) != null) {
writeConsistencyLevel(extractorConfig.getString(WRITE_CONSISTENCY_LEVEL));
}
if (values.get(BISECT_FACTOR) != null) {
bisectFactor(extractorConfig.getInteger(BISECT_FACTOR));
}
// if (values.get(ExtractorConstants.FILTER_FIELD) != null) {
// Pair<String, Serializable> filterFields = extractorConfig.getPair(ExtractorConstants.FILTER_FIELD,
// String.class, Serializable.class);
// filterByField(filterFields.left, filterFields.right);
// }
if (values.get(ExtractorConstants.FILTER_QUERY) != null) {
filters(extractorConfig.getFilterArray(ExtractorConstants.FILTER_QUERY));
}
if (values.get(ExtractorConstants.EQUALS_IN_FILTER) != null) {
setEqualsInValue((EqualsInValue) extractorConfig.getValue(EqualsInValue.class,
ExtractorConstants.EQUALS_IN_FILTER));
}
this.initialize();
return this;
}
@Override
public CassandraDeepJobConfig<T> filters(Filter... filters) {
this.filters = filters;
return this;
}
@Override
public Filter[] getFilters() {
return filters;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> keyspace(String keyspace) {
this.catalog = keyspace;
return this;
}
@Override
public CassandraDeepJobConfig<T> bisectFactor(int bisectFactor) {
this.isSplitModeSet = false;
this.isBisectModeSet = true;
this.bisectFactor = bisectFactor;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> partitioner(String partitionerClassName) {
this.partitionerClassName = partitionerClassName;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> password(String password) {
this.password = password;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> rpcPort(Integer port) {
this.rpcPort = port;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> cqlPort(Integer port) {
this.cqlPort = port;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> username(String username) {
this.username = username;
return this;
}
/**
* Validates if any of the mandatory fields have been configured or not. Throws an {@link IllegalArgumentException}
* if any of the mandatory properties have not been configured.
*/
void validate() {
validateCassandraParams();
if (pageSize <= 0) {
throw new IllegalArgumentException("pageSize cannot be zero");
}
validateConsistencyLevels();
TableMetadata tableMetadata = fetchTableMetadata();
validateTableMetadata(tableMetadata);
validateAdditionalFilters(tableMetadata);
if (!(this.isBisectModeSet && this.isSplitModeSet)) {
if (this.isBisectModeSet) {
if (this.bisectFactor != Constants.DEFAULT_BISECT_FACTOR
&& !this.checkIsPowerOfTwo(this.bisectFactor)) {
throw new IllegalArgumentException(
"Bisect factor should be greater than zero and a power of 2");
}
} else if (this.isSplitModeSet) {
if (this.splitSize <= 0) {
throw new IllegalArgumentException(
"The split size must be a positve integer");
}
} else {
throw new IllegalArgumentException(
"One split mode must be defined, please choose between Split or Bisect");
}
} else {
throw new IllegalArgumentException(
"Only one split mode can be defined, please choose between Split or Bisect");
}
}
private void validateCassandraParams() {
if (StringUtils.isEmpty(getHost())) {
throw new IllegalArgumentException("host cannot be null");
}
if (rpcPort == null) {
throw new IllegalArgumentException("rpcPort cannot be null");
}
if (StringUtils.isEmpty(catalog)) {
throw new IllegalArgumentException("keyspace cannot be null");
}
if (StringUtils.isEmpty(table)) {
throw new IllegalArgumentException("columnFamily cannot be null");
}
}
private void validateTableMetadata(TableMetadata tableMetadata) {
if (tableMetadata == null && !isWriteConfig) {
throw new IllegalArgumentException(String.format("Column family {%s.%s} does not exist", catalog,
table));
}
if (tableMetadata == null && !createTableOnWrite) {
throw new IllegalArgumentException(String.format("Column family {%s.%s} does not exist and " +
"createTableOnWrite = false", catalog, table));
}
if (!ArrayUtils.isEmpty(inputColumns)) {
for (String column : inputColumns) {
assert tableMetadata != null;
ColumnMetadata columnMetadata = tableMetadata.getColumn(column);
if (columnMetadata == null) {
throw new DeepNoSuchFieldException("No column with name " + column + " has been found on table "
+ this.catalog + "." + this.table);
}
}
}
}
private void validateAdditionalFilters(TableMetadata tableMetadata) {
for (Map.Entry<String, Serializable> entry : additionalFilters.entrySet()) {
/* check if there's an index specified on the provided column */
ColumnMetadata columnMetadata = tableMetadata.getColumn(entry.getKey());
if (columnMetadata == null) {
throw new DeepNoSuchFieldException("No column with name " + entry.getKey() + " has been found on " +
"table " + this.catalog + "." + this.table);
}
if (columnMetadata.getIndex() == null) {
throw new DeepIndexNotFoundException("No index has been found on column " + columnMetadata.getName()
+ " on table " + this.catalog + "." + this.table);
}
}
}
private void validateConsistencyLevels() {
if (readConsistencyLevel != null) {
try {
ConsistencyLevel.valueOf(readConsistencyLevel);
} catch (Exception e) {
throw new IllegalArgumentException("readConsistencyLevel not valid, " +
"should be one of thos defined in org.apache.cassandra.db.ConsistencyLevel", e);
}
}
if (writeConsistencyLevel != null) {
try {
ConsistencyLevel.valueOf(writeConsistencyLevel);
} catch (Exception e) {
throw new IllegalArgumentException("writeConsistencyLevel not valid, " +
"should be one of those defined in org.apache.cassandra.db.ConsistencyLevel", e);
}
}
}
private boolean checkIsPowerOfTwo(int n) {
return (n > 0) && ((n & (n - 1)) == 0);
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> batchSize(int batchSize) {
this.batchSize = batchSize;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public Boolean isCreateTableOnWrite() {
return createTableOnWrite;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> createTableOnWrite(Boolean createTableOnWrite) {
this.createTableOnWrite = createTableOnWrite;
this.isWriteConfig = createTableOnWrite;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public Map<String, Serializable> getAdditionalFilters() {
return Collections.unmodifiableMap(additionalFilters);
}
public int getPageSize() {
checkInitialized();
return this.pageSize;
}
public CassandraDeepJobConfig<T> pageSize(int pageSize) {
this.pageSize = pageSize;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public String getReadConsistencyLevel() {
return readConsistencyLevel;
}
/**
* {@inheritDoc}
*/
@Override
public String getWriteConsistencyLevel() {
return writeConsistencyLevel;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> readConsistencyLevel(String level) {
this.readConsistencyLevel = level;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public CassandraDeepJobConfig<T> writeConsistencyLevel(String level) {
this.writeConsistencyLevel = level;
return this;
}
/**
* {@inheritDoc}
*/
@Override
public int getBatchSize() {
return batchSize;
}
/**
* {@inheritDoc}
*/
@Override
public Boolean getIsWriteConfig() {
return isWriteConfig;
}
@Override
public int getBisectFactor() {
return bisectFactor;
}
// TODO: It will be added in a future release
@Override
public CassandraDeepJobConfig<T> splitSize(int splitSize) {
// this.isSplitModeSet = true;
// this.isBisectModeSet = false;
// this.splitSize = splitSize;
return this;
}
@Override
public Integer getSplitSize() {
return this.splitSize;
}
@Override
public boolean isSplitModeSet() {
return this.isSplitModeSet;
}
@Override
public boolean isBisectModeSet() {
return this.isBisectModeSet;
}
public EqualsInValue getEqualsInValue() {
return equalsInValue;
}
public void setEqualsInValue(EqualsInValue equalsInValue) {
this.equalsInValue = equalsInValue;
}
public static ProtocolVersion PROTOCOL_VERSION = ProtocolVersion.V2;
}