/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.deep.cassandra.cql;
import static com.stratio.deep.cassandra.cql.CassandraClientProvider.trySessionForLocation;
import static com.stratio.deep.cassandra.util.CassandraUtils.isFilterdByKey;
import static com.stratio.deep.cassandra.util.CassandraUtils.isTokenIncludedInRange;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.CompositeType;
import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Token;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.datastax.driver.core.ColumnMetadata;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.SimpleStatement;
import com.datastax.driver.core.Statement;
import com.datastax.driver.core.TableMetadata;
import com.datastax.driver.core.exceptions.NoHostAvailableException;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.stratio.deep.cassandra.config.CassandraDeepJobConfig;
import com.stratio.deep.cassandra.entity.CellValidator;
import com.stratio.deep.cassandra.filter.value.EqualsInValue;
import com.stratio.deep.cassandra.util.CassandraUtils;
import com.stratio.deep.commons.config.DeepJobConfig;
import com.stratio.deep.commons.exception.DeepGenericException;
import com.stratio.deep.commons.exception.DeepIOException;
import com.stratio.deep.commons.exception.DeepIllegalAccessException;
import com.stratio.deep.commons.filter.Filter;
import com.stratio.deep.commons.impl.DeepPartitionLocationComparator;
import com.stratio.deep.commons.rdd.DeepTokenRange;
import com.stratio.deep.commons.rdd.IDeepRecordReader;
import com.stratio.deep.commons.utils.Pair;
import com.stratio.deep.commons.utils.Utils;
/**
* Implements a cassandra record reader with pagination capabilities. Does not rely on Cassandra's Hadoop
* CqlPagingRecordReader.
* <p/>
* Pagination is outsourced to Datastax Java Driver.
*
* @author Luca Rosellini <luca@strat.io>
*/
public class DeepRecordReader implements IDeepRecordReader {
/**
* The constant LOG.
*/
private static final Logger LOG = LoggerFactory.getLogger(DeepRecordReader.class);
/**
* The Split.
*/
private final DeepTokenRange<?, String> split;
/**
* The Row iterator.
*/
private RowIterator rowIterator;
/**
* The Cf name.
*/
private String cfName;
/**
* The Partition bound columns.
*/
// partition keys -- key aliases
private final List<BoundColumn> partitionBoundColumns = new ArrayList<>();
/**
* The Cluster columns.
*/
// cluster keys -- column aliases
private final List<BoundColumn> clusterColumns = new ArrayList<>();
/**
* The Columns.
*/
// cql query select columns
private String columns;
/**
* The Page size.
*/
// the number of cql rows per page
private final int pageSize;
/**
* The Partitioner.
*/
private IPartitioner<?> partitioner;
/**
* The Key validator.
*/
private AbstractType<?> keyValidator;
/**
* The Config.
*/
private final CassandraDeepJobConfig<?> config;
/**
* The Session.
*/
private Session session;
private boolean filterByKey = false;
/**
* public constructor. Takes a list of filters to pass to the underlying data stores.
*
* @param config the deep configuration object.
* @param split the token range on which the new reader will be based.
*/
public DeepRecordReader(DeepJobConfig<?, ?> config, DeepTokenRange<?, String> split) {
this.config = (CassandraDeepJobConfig<?>) config;
this.split = split;
this.pageSize = ((CassandraDeepJobConfig<?>) config).getPageSize();
initialize();
}
/**
* Initialized this object.
* <p>
* Creates a new client and row iterator.
* </p>
*/
private void initialize() {
cfName = config.getTable();
if (!ArrayUtils.isEmpty(config.getInputColumns())) {
columns = StringUtils.join(config.getInputColumns(), ",");
}
partitioner = Utils.newTypeInstance(config.getPartitionerClassName(), IPartitioner.class);
try {
session = createConnection();
retrieveKeys();
} catch (Exception e) {
throw new DeepIOException(e);
}
rowIterator = new RowIterator();
}
/**
* Creates a new connection. Reuses a cached connection if possible.
*
* @return the new session
*/
private Session createConnection() {
/* reorder locations */
List<String> locations = Lists.newArrayList(split.getReplicas());
Collections.sort(locations, new DeepPartitionLocationComparator());
Exception lastException = null;
LOG.debug("createConnection: " + locations);
for (String location : locations) {
try {
return trySessionForLocation(location, config, false).left;
} catch (Exception e) {
LOG.error("Could not get connection for: {}, replicas: {}", location, locations);
lastException = e;
}
}
throw new DeepIOException(lastException);
}
/**
* Closes this input reader object.
*/
@Override
public void close() {
/* dummy close method, no need to close any resource here */
}
/**
* Creates a new empty LinkedHashMap.
*
* @return the map of associations between row column names and their values.
*/
public Map<String, ByteBuffer> createEmptyMap() {
return new LinkedHashMap<String, ByteBuffer>();
}
/**
* CQL row iterator
*/
class RowIterator extends AbstractIterator<Pair<Map<String, ByteBuffer>, Map<String, ByteBuffer>>> {
/**
* The Rows.
*/
private Iterator<Row> rows;
/**
* The Partition key string.
*/
private String partitionKeyString; // keys in <key1>, <key2>, <key3> string format
/**
* The Partition key markers.
*/
private String partitionKeyMarkers; // question marks in ? , ? , ? format which matches the number of keys
/**
* Default constructor.
*/
public RowIterator() {
// initial page
executeQuery();
}
/**
* Is column wanted.
*
* @param columnName the column name
* @return the boolean
*/
private boolean isColumnWanted(String columnName) {
return ArrayUtils.isEmpty(config.getInputColumns()) ||
ArrayUtils.contains(config.getInputColumns(), columnName);
}
/**
* {@inheritDoc}
*/
@Override
protected Pair<Map<String, ByteBuffer>, Map<String, ByteBuffer>> computeNext() {
if (rows == null || !rows.hasNext()) {
return endOfData();
}
Map<String, ByteBuffer> valueColumns = createEmptyMap();
Map<String, ByteBuffer> keyColumns = createEmptyMap();
initColumns(valueColumns, keyColumns);
return Pair.create(keyColumns, valueColumns);
}
/**
* Init columns.
*
* @param valueColumns the value columns
* @param keyColumns the key columns
*/
private void initColumns(Map<String, ByteBuffer> valueColumns, Map<String, ByteBuffer> keyColumns) {
Row row = rows.next();
TableMetadata tableMetadata = config.fetchTableMetadata();
List<ColumnMetadata> partitionKeys = tableMetadata.getPartitionKey();
List<ColumnMetadata> clusteringKeys = tableMetadata.getClusteringColumns();
List<ColumnMetadata> allColumns = tableMetadata.getColumns();
for (ColumnMetadata key : partitionKeys) {
String columnName = key.getName();
ByteBuffer bb = row.getBytesUnsafe(columnName);
keyColumns.put(columnName, bb);
}
for (ColumnMetadata key : clusteringKeys) {
String columnName = key.getName();
ByteBuffer bb = row.getBytesUnsafe(columnName);
keyColumns.put(columnName, bb);
}
for (ColumnMetadata key : allColumns) {
String columnName = key.getName();
if (keyColumns.containsKey(columnName) || !isColumnWanted(columnName)) {
continue;
}
ByteBuffer bb = row.getBytesUnsafe(columnName);
valueColumns.put(columnName, bb);
}
}
/**
* serialize the prepared query, pair.left is query id, pair.right is query
*
* @return the string
*/
//TODO: return id column
private String composeQuery() {
String generatedColumns = columns;
if (generatedColumns == null) {
generatedColumns = "*";
} else {
// add keys in the front in order
String partitionKey = keyString(partitionBoundColumns);
String clusterKey = keyString(clusterColumns);
generatedColumns = withoutKeyColumns(generatedColumns);
generatedColumns = (generatedColumns != null ? "," + generatedColumns : "");
generatedColumns = StringUtils.isEmpty(clusterKey)
? partitionKey + generatedColumns
: partitionKey + "," + clusterKey + generatedColumns;
}
EqualsInValue equalsInValue = config.getEqualsInValue();
String generatedQuery = null;
// Checking whether the job is a EQUALS_IN special query or not
if (equalsInValue == null) {
String whereClause = whereClause();
generatedQuery = String.format("SELECT %s FROM %s%s ALLOW FILTERING",
generatedColumns, Utils.quote(cfName), whereClause);
} else {
// partitioner.getToken(getPartitionKey(equalsInValue));
String equalsInClause = equalsInWhereClause(equalsInValue);
generatedQuery = String.format("SELECT %s FROM %s %s",
generatedColumns, Utils.quote(cfName), equalsInClause);
}
return generatedQuery;
}
/**
* Prepares a Cassandra statement before being executed
*
* @return statement
*/
private Statement prepareStatement() {
String query = composeQuery();
EqualsInValue equalsInValue = config.getEqualsInValue();
Object[] values = null;
if (equalsInValue == null) {
List<Object> bindValues = preparedQueryBindValues();
assert bindValues != null;
values = bindValues.toArray(new Object[bindValues.size()]);
LOG.debug("query: " + query + "; values: " + Arrays.toString(values));
} else {
values = new Object[equalsInValue.getEqualsList().size() + 1];
for (int i = 0; i < equalsInValue.getEqualsList().size(); i++) {
values[i] = equalsInValue.getEqualsList().get(i).right;
}
values[values.length - 1] = filterSplits(equalsInValue);
if (values[values.length - 1] == null) {
return null;
}
LOG.debug("query: " + query + "; values: " + Arrays.toString(values));
}
Statement stmt = new SimpleStatement(query, values);
stmt.setFetchSize(pageSize);
return stmt;
}
/**
* Filter splits.
*
* @param equalsInValue the equals in value
* @return the list
*/
private List<Serializable> filterSplits(EqualsInValue equalsInValue) {
List<Serializable> filteredInValues = new ArrayList<>();
for (Serializable value : equalsInValue.getInValues()) {
Token<Comparable> token = partitioner.getToken(getPartitionKey(
equalsInValue.getEqualsList(),
value));
if (isTokenIncludedInRange(split, token)) {
filteredInValues.add(value);
}
}
if (filteredInValues.isEmpty()) {
return null;
}
return filteredInValues;
}
/**
* Retrieve the column name for the lucene indexes. Null if there is no lucene index.
*
* @return Lucene index; null, if doesn't exist.
*/
private String getLuceneIndex() {
String indexName = "";
TableMetadata tableMetadata = config.fetchTableMetadata();
List<ColumnMetadata> columns = tableMetadata.getColumns();
for (ColumnMetadata column : columns) {
if (column.getIndex() != null) {
if (column.getIndex().isCustomIndex()) {
indexName = column.getName();
}
}
}
return indexName;
}
/**
* remove key columns from the column string
*
* @param columnString the column string
* @return the string
*/
private String withoutKeyColumns(String columnString) {
Set<String> keyNames = new HashSet<>();
for (BoundColumn column : Iterables.concat(partitionBoundColumns, clusterColumns)) {
keyNames.add(column.name);
}
String[] cols = columnString.split(",");
String result = null;
for (String column : cols) {
String trimmed = column.trim();
if (keyNames.contains(trimmed)) {
continue;
}
String quoted = quote(trimmed);
result = result == null ? quoted : result + "," + quoted;
}
return result;
}
/**
* serialize the where clause
*
* @return the string
*/
private String whereClause() {
if (partitionKeyString == null) {
partitionKeyString = keyString(partitionBoundColumns);
}
if (partitionKeyMarkers == null) {
partitionKeyMarkers = partitionKeyMarkers();
}
// initial
// query token(k) >= start_token and token(k) <= end_token
filterByKey = isFilterdByKey(config.getFilters(), partitionKeyString);
String filterGenerator = CassandraUtils.additionalFilterGenerator(config.getAdditionalFilters(),
config.getFilters(), getLuceneIndex());
StringBuffer sb = new StringBuffer();
sb.append(" WHERE ");
if(filterByKey){
filterGenerator = filterGenerator.substring(4);
}else{
sb.append(String.format(" token(%s) > ? AND token(%s) <= ?", partitionKeyString,
partitionKeyString));
}
sb.append(filterGenerator);
return sb.toString();
}
/**
* Generates the special equals_in clause
*
* @param equalsInValue the equals in value
* @return Returns the equals in clause
*/
private String equalsInWhereClause(EqualsInValue equalsInValue) {
StringBuffer sb = new StringBuffer();
sb.append("WHERE ");
for (int i = 0; i < equalsInValue.getEqualsList().size(); i++) {
sb.append(equalsInValue.getEqualsList().get(i).left).append(" = ? AND ");
}
sb.append(equalsInValue.getInField()).append(" IN ?");
return sb.toString();
}
/**
* serialize the partition key string in format of <key1>, <key2>, <key3>
*
* @param columns the columns
* @return the string
*/
private String keyString(List<BoundColumn> columns) {
String result = null;
for (BoundColumn column : columns) {
result = result == null ? quote(column.name) : result + "," + quote(column.name);
}
return result == null ? "" : result;
}
/**
* serialize the question marks for partition key string in format of ?, ? , ?
*
* @return the string
*/
private String partitionKeyMarkers() {
String result = null;
for (BoundColumn partitionBoundColumn : partitionBoundColumns) {
result = result == null ? "?" : result + ",?";
}
return result;
}
/**
* serialize the query binding variables, pair.left is query id, pair.right is the binding variables
*
* @return the list
*/
private List<Object> preparedQueryBindValues() {
List<Object> values = new LinkedList<>();
if(!filterByKey){
Object startToken = split.getStartToken();
Object endToken = split.getEndToken();
values.add(startToken);
values.add(endToken);
}
return values;
}
/**
* Quoting for working with uppercase
*
* @param identifier the identifier
* @return the string
*/
private String quote(String identifier) {
return "\"" + identifier.replaceAll("\"", "\"\"") + "\"";
}
/**
* execute the prepared query
*/
private void executeQuery() {
Statement stmt = prepareStatement();
if (stmt != null) {
rows = null;
int retries = 0;
Exception exception = null;
// only try three times for TimedOutException and UnavailableException
while (retries < 3) {
try {
ResultSet resultSet = session.execute(stmt);
if (resultSet != null) {
rows = resultSet.iterator();
}
return;
} catch (NoHostAvailableException e) {
LOG.error("Could not connect to ");
exception = e;
try {
Thread.sleep(100);
} catch (InterruptedException e1) {
LOG.error("sleep exception", e1);
}
++retries;
} catch (Exception e) {
throw new DeepIOException(e);
}
}
if (exception != null) {
throw new DeepIOException(exception);
}
}
}
}
/**
* retrieve the partition keys and cluster keys from system.schema_columnfamilies table
*/
//TODO check this
private void retrieveKeys() {
TableMetadata tableMetadata = config.fetchTableMetadata();
List<ColumnMetadata> partitionKeys = tableMetadata.getPartitionKey();
List<ColumnMetadata> clusteringKeys = tableMetadata.getClusteringColumns();
List<AbstractType<?>> types = new ArrayList<>();
for (ColumnMetadata key : partitionKeys) {
String columnName = key.getName();
BoundColumn boundColumn = new BoundColumn(columnName);
boundColumn.validator = CellValidator.cellValidator(key.getType()).getAbstractType();
partitionBoundColumns.add(boundColumn);
types.add(boundColumn.validator);
}
for (ColumnMetadata key : clusteringKeys) {
String columnName = key.getName();
BoundColumn boundColumn = new BoundColumn(columnName);
boundColumn.validator = CellValidator.cellValidator(key.getType()).getAbstractType();
clusterColumns.add(boundColumn);
}
if (types.size() > 1) {
keyValidator = CompositeType.getInstance(types);
} else if (types.size() == 1) {
keyValidator = types.get(0);
} else {
throw new DeepGenericException("Cannot determine if keyvalidator is composed or not, " +
"partitionKeys: " + partitionKeys);
}
}
/**
* check whether current row is at the end of range
*
* @return the boolean
*/
private boolean reachEndRange() {
// current row key
ByteBuffer rowKey;
if (keyValidator instanceof CompositeType) {
ByteBuffer[] keys = new ByteBuffer[partitionBoundColumns.size()];
for (int i = 0; i < partitionBoundColumns.size(); i++) {
keys[i] = partitionBoundColumns.get(i).value.duplicate();
}
rowKey = CompositeType.build(keys);
} else {
rowKey = partitionBoundColumns.get(0).value;
}
String endToken = String.valueOf(split.getEndToken());
String currentToken = partitioner.getToken(rowKey).toString();
return endToken.equals(currentToken);
}
/**
* The type Bound column.
*/
private static class BoundColumn implements Serializable {
/**
* The Name.
*/
private final String name;
/**
* The Value.
*/
private ByteBuffer value;
/**
* The Validator.
*/
private AbstractType<?> validator;
/**
* Instantiates a new Bound column.
*
* @param name the name
*/
public BoundColumn(String name) {
this.name = name;
}
}
/**
* Returns a boolean indicating if the underlying rowIterator has a new element or not. DOES NOT advance the
* iterator to the next element.
*
* @return a boolean indicating if the underlying rowIterator has a new element or not.
*/
@Override
public boolean hasNext() {
return rowIterator.hasNext();
}
/**
* Returns the next element in the underlying rowIterator.
*
* @return the next element in the underlying rowIterator.
*/
@Override
public Pair<Map<String, ByteBuffer>, Map<String, ByteBuffer>> next() {
if (!this.hasNext()) {
throw new DeepIllegalAccessException("DeepRecordReader exhausted");
}
return rowIterator.next();
}
/**
* Builds the partition key in {@link ByteBuffer} format for the given values.
*
* @param equalsList List of equals field and value pairs.
* @param inValue Value for the operator in.
* @return with the partition key.
*/
private ByteBuffer getPartitionKey(List<Pair<String, Serializable>> equalsList, Serializable inValue) {
assert (equalsList.size() + 1) == ((CompositeType) keyValidator).componentsCount();
ByteBuffer[] serialized = new ByteBuffer[equalsList.size() + 1];
for (int i = 0; i < equalsList.size(); i++) {
ByteBuffer buffer = ((AbstractType) keyValidator.getComponents().get(i)).decompose(equalsList.get(i).right);
serialized[i] = buffer;
}
serialized[serialized.length - 1] = ((AbstractType) keyValidator.getComponents().get(serialized.length - 1))
.decompose(inValue);
return CompositeType.build(serialized);
}
}