/*
* Licensed to STRATIO (C) under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership. The STRATIO (C) licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.stratio.cassandra.lucene.key;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.stratio.cassandra.lucene.IndexException;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.PartitionPosition;
import org.apache.cassandra.db.marshal.LongType;
import org.apache.cassandra.dht.Murmur3Partitioner;
import org.apache.cassandra.dht.Token;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LongField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import java.nio.ByteBuffer;
import java.util.Optional;
/**
* Class for several token mappings between Cassandra and Lucene.
*
* @author Andres de la Pena {@literal <adelapena@stratio.com>}
*/
public final class TokenMapper {
/** The Lucene field name */
static final String FIELD_NAME = "_token";
/** The Lucene field type */
static final FieldType FIELD_TYPE = new FieldType();
static {
FIELD_TYPE.setTokenized(true);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setNumericType(FieldType.NumericType.LONG);
FIELD_TYPE.setDocValuesType(DocValuesType.NUMERIC);
FIELD_TYPE.freeze();
}
/** A query cache of token range queries */
private final Cache<CacheKey, CachingWrapperQuery> cache;
/**
* Constructor taking the cache size.
*
* @param cacheSize the max token cache size
*/
public TokenMapper(int cacheSize) {
if (!(DatabaseDescriptor.getPartitioner() instanceof Murmur3Partitioner)) {
throw new IndexException("Only Murmur3 partitioner is supported");
}
cache = CacheBuilder.newBuilder().maximumSize(cacheSize).build();
}
/**
* Adds to the specified {@link Document} the {@link Field}s associated to the token of the specified row key.
*
* @param document a {@link Document}
* @param key the raw partition key to be added
*/
public void addFields(Document document, DecoratedKey key) {
Token token = key.getToken();
Long value = value(token);
Field field = new LongField(FIELD_NAME, value, FIELD_TYPE);
document.add(field);
}
/**
* Returns the {code Long} value of the specified Murmur3 partitioning {@link Token}.
*
* @param token a Murmur3 token
* @return the {@code token}'s {code Long} value
*/
public static Long value(Token token) {
return (Long) token.getTokenValue();
}
/**
* Returns the {code ByteBuffer} value of the specified Murmur3 partitioning {@link Token}.
*
* @param token a Murmur3 token
* @return the {@code token}'s {code ByteBuffer} value
*/
public static ByteBuffer byteBuffer(Token token) {
return LongType.instance.decompose(value(token));
}
/**
* Returns the {@link BytesRef} indexing value of the specified Murmur3 partitioning {@link Token}.
*
* @param token a Murmur3 token
* @return the {@code token}'s indexing value
*/
private static BytesRef bytesRef(Token token) {
Long value = value(token);
BytesRefBuilder bytesRef = new BytesRefBuilder();
NumericUtils.longToPrefixCoded(value, 0, bytesRef);
return bytesRef.get();
}
/**
* Returns a Lucene {@link SortField} for sorting documents/rows according to the partitioner's order.
*
* @return a sort field for sorting by token
*/
public SortField sortField() {
return new SortField(FIELD_NAME, SortField.Type.LONG);
}
/**
* Returns if the specified lower partition position must be included in a filtered range.
*
* @param position a {@link PartitionPosition}
* @return {@code true} if {@code position} must be included, {@code false} otherwise
*/
public boolean includeStart(PartitionPosition position) {
return position.kind() == PartitionPosition.Kind.MIN_BOUND;
}
/**
* Returns if the specified upper partition position must be included in a filtered range.
*
* @param position a {@link PartitionPosition}
* @return {@code true} if {@code position} must be included, {@code false} otherwise
*/
public boolean includeStop(PartitionPosition position) {
return position.kind() == PartitionPosition.Kind.MAX_BOUND;
}
/**
* Returns a Lucene {@link Query} to find the {@link Document}s containing a {@link Token} inside the specified
* token range.
*
* @param lower the lower token
* @param upper the upper token
* @param includeLower if the lower token should be included
* @param includeUpper if the upper token should be included
* @return the query to find the documents containing a token inside the range
*/
public Optional<Query> query(Token lower, Token upper, boolean includeLower, boolean includeUpper) {
// Skip if it's full data range
if (lower.isMinimum() && upper.isMinimum()) {
return Optional.empty();
}
// Get token values
Long start = lower.isMinimum() ? null : value(lower);
Long stop = upper.isMinimum() ? null : value(upper);
// Do with cache
CacheKey cacheKey = new CacheKey(start, stop, includeLower, includeUpper);
CachingWrapperQuery cachedQuery = cache.getIfPresent(cacheKey);
if (cachedQuery == null) {
Query query = DocValuesRangeQuery.newLongRange(FIELD_NAME, start, stop, includeLower, includeUpper);
cachedQuery = new CachingWrapperQuery(query);
cache.put(cacheKey, cachedQuery);
}
return Optional.of(cachedQuery);
}
/**
* Returns a Lucene {@link Query} to find the {@link Document}s containing a {@link Token} inside the specified
* {@link PartitionPosition}s.
*
* @param start the start position
* @param stop the stop position
* @return the query to find the documents containing a token inside the range
*/
public Optional<Query> query(PartitionPosition start, PartitionPosition stop) {
return query(start.getToken(), stop.getToken(), includeStart(start), includeStop(stop));
}
/**
* Returns a Lucene {@link Query} to find the {@link Document}s containing the specified {@link Token}.
*
* @param token the token
* @return the query to find the documents containing {@code token}
*/
public Query query(Token token) {
return new TermQuery(new Term(FIELD_NAME, bytesRef(token)));
}
private static final class CacheKey {
private final Long lower;
private final Long upper;
private final boolean includeLower;
private final boolean includeUpper;
CacheKey(Long lower, Long upper, boolean includeLower, boolean includeUpper) {
this.lower = lower;
this.upper = upper;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
CacheKey that = (CacheKey) o;
if (includeLower != that.includeLower) {
return false;
}
if (includeUpper != that.includeUpper) {
return false;
}
if (lower != null ? !lower.equals(that.lower) : that.lower != null) {
return false;
}
return upper != null ? upper.equals(that.upper) : that.upper == null;
}
@Override
public int hashCode() {
int result = lower != null ? lower.hashCode() : 0;
result = 31 * result + (upper != null ? upper.hashCode() : 0);
result = 31 * result + (includeLower ? 1 : 0);
result = 31 * result + (includeUpper ? 1 : 0);
return result;
}
}
}