/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.deep.cassandra.thrift;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.dht.Token.TokenFactory;
import org.apache.cassandra.thrift.CfSplit;
import org.apache.cassandra.thrift.TokenRange;
import org.apache.thrift.TApplicationException;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.stratio.deep.cassandra.config.CassandraDeepJobConfig;
import com.stratio.deep.commons.exception.DeepGenericException;
import com.stratio.deep.commons.rdd.DeepTokenRange;
import com.stratio.deep.commons.utils.Utils;
/**
* Class that provides several token range utilities using Cassandra's Thrift RPC API.
*
* @author Andres de la Pena <andres@stratio.com>
*/
public class ThriftRangeUtils {
private static final Logger LOG = LoggerFactory.getLogger(ThriftRangeUtils.class);
private final String host; // The Cassandra contact host name
private final int rpcPort; // The Cassandra contact host RPC port
private final int splitSize; // The number of rows per split
private final String keyspace; // The Cassandra keyspace name
private final String columnFamily; // The Cassandra column family name
private final AbstractType tokenType; // The token validator
private final TokenFactory tokenFactory; // The token factory
private final Comparable minToken; // The partitioner's minimum token
/**
* Builds a new {@link ThriftRangeUtils}.
*
* @param partitioner the partitioner.
* @param host the host address.
* @param rpcPort the host RPC port.
* @param keyspace the keyspace name.
* @param columnFamily the column family name.
* @param splitSize the number of rows per split.
*/
public ThriftRangeUtils(IPartitioner partitioner,
String host,
int rpcPort,
String keyspace,
String columnFamily,
int splitSize) {
this.host = host;
this.rpcPort = rpcPort;
this.splitSize = splitSize;
this.keyspace = keyspace;
this.columnFamily = columnFamily;
tokenType = partitioner.getTokenValidator();
tokenFactory = partitioner.getTokenFactory();
minToken = (Comparable) partitioner.getMinimumToken().token;
}
/**
* Returns a new {@link ThriftRangeUtils} using the specified configuration.
*
* @param config the Deep configuration object.
*/
public static ThriftRangeUtils build(CassandraDeepJobConfig config) {
String host = config.getHost();
int rpcPort = config.getRpcPort();
int splitSize = config.getSplitSize();
String keyspace = config.getKeyspace();
String columnFamily = config.getColumnFamily();
String partitionerClassName = config.getPartitionerClassName();
IPartitioner partitioner = Utils.newTypeInstance(partitionerClassName, IPartitioner.class);
return new ThriftRangeUtils(partitioner, host, rpcPort, keyspace, columnFamily, splitSize);
}
/**
* Returns the token range splits of the Cassandra ring that will be mapped to Spark partitions.
*
* @return the list of computed token ranges.
*/
public List<DeepTokenRange> getSplits() {
// Get the cluster token ranges
List<DeepTokenRange> tokenRanges = getRanges();
// Get the cluster token ranges splits
List<DeepTokenRange> splits = new ArrayList<>();
for (DeepTokenRange tokenRange : tokenRanges) {
List<DeepTokenRange> nodeSplits = getSplits(tokenRange);
splits.addAll(nodeSplits);
}
return splits;
}
/**
* Returns the token ranges of the Cassandra ring that will be mapped to Spark partitions.
* The returned ranges are the Cassandra's physical ones, without any splitting.
*
* @return the list of Cassandra ring token ranges.
*/
public List<DeepTokenRange> getRanges() {
try {
List<TokenRange> tokenRanges;
ThriftClient client = ThriftClient.build(host, rpcPort);
try {
tokenRanges = client.describe_local_ring(keyspace);
} catch (TApplicationException e) {
if (e.getType() == TApplicationException.UNKNOWN_METHOD) {
tokenRanges = client.describe_ring(keyspace);
} else {
throw new DeepGenericException("Unknown server error", e);
}
}
client.close();
List<DeepTokenRange> deepTokenRanges = new ArrayList<>(tokenRanges.size());
for (TokenRange tokenRange : tokenRanges) {
Comparable start = tokenAsComparable(tokenRange.getStart_token());
Comparable end = tokenAsComparable(tokenRange.getEnd_token());
deepTokenRanges.add(new DeepTokenRange(start, end, tokenRange.getEndpoints()));
}
return deepTokenRanges;
} catch (TException e) {
throw new DeepGenericException("No available replicas for get ring token ranges", e);
}
}
/**
* Returns the computed token range splits of the specified token range.
*
* @param deepTokenRange the token range to be splitted.
* @return the list of token range splits, which are also token ranges.
*/
public List<DeepTokenRange> getSplits(DeepTokenRange deepTokenRange) {
String start = tokenAsString((Comparable) deepTokenRange.getStartToken());
String end = tokenAsString((Comparable) deepTokenRange.getEndToken());
List<String> endpoints = deepTokenRange.getReplicas();
for (String endpoint : endpoints) {
try {
ThriftClient client = ThriftClient.build(endpoint, rpcPort, keyspace);
List<CfSplit> splits = client.describe_splits_ex(columnFamily, start, end, splitSize);
client.close();
return deepTokenRanges(splits, endpoints);
} catch (TException e) {
LOG.warn("Endpoint %s failed while splitting range %s", endpoint, deepTokenRange);
}
}
throw new DeepGenericException("No available replicas for splitting range " + deepTokenRange);
}
/**
* Returns the Deep splits represented by the specified Thrift splits using the specified endpoints for all of them.
* Note that the returned list can contain one more ranges than the specified because the range containing the
* partitioner's minimum token are divided into two ranges.
*
* @param splits the Thrift splits to be converted.
* @param endpoints the endpoints list to be set in each generated Deep split
* @return the {@link com.stratio.deep.commons.rdd.DeepTokenRange}s represented by the specified
* {@link org.apache.cassandra.thrift.CfSplit}s
*/
public List<DeepTokenRange> deepTokenRanges(List<CfSplit> splits, List<String> endpoints) {
List<DeepTokenRange> result = new ArrayList<>();
for (CfSplit split : splits) {
Comparable splitStart = tokenAsComparable(split.getStart_token());
Comparable splitEnd = tokenAsComparable(split.getEnd_token());
if (splitStart.equals(splitEnd)) {
result.add(new DeepTokenRange(minToken, minToken, endpoints));
} else if (splitStart.compareTo(splitEnd) > 0) {
result.add(new DeepTokenRange(splitStart, minToken, endpoints));
result.add(new DeepTokenRange(minToken, splitEnd, endpoints));
} else {
result.add(new DeepTokenRange(splitStart, splitEnd, endpoints));
}
}
return result;
}
/**
* Returns the specified token as a {@link java.lang.Comparable}.
*
* @param tokenAsString a token represented as a {@link java.lang.String}.
* @return the specified token as a {@link java.lang.Comparable}.
*/
@SuppressWarnings("unchecked")
public Comparable tokenAsComparable(String tokenAsString) {
Token token = tokenFactory.fromString(tokenAsString);
ByteBuffer bb = tokenFactory.toByteArray(token);
return (Comparable) tokenType.compose(bb);
}
/**
* Returns the specified token as a {@link java.lang.String}.
*
* @param tokenAsComparable a token represented as a {@link java.lang.Comparable}.
* @return the specified token as a {@link java.lang.String}.
*/
@SuppressWarnings("unchecked")
public String tokenAsString(Comparable tokenAsComparable) {
ByteBuffer bb = tokenType.decompose(tokenAsComparable);
Token token = tokenFactory.fromByteArray(bb);
return tokenFactory.toString(token);
}
}