Index.java example

Explorer
cassandra-lucene-index-master
/*
 * Licensed to STRATIO (C) under one or more contributor license agreements.
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership.  The STRATIO (C) licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.stratio.cassandra.lucene;

import com.stratio.cassandra.lucene.search.Search;
import com.stratio.cassandra.lucene.search.SearchBuilder;
import org.apache.cassandra.config.CFMetaData;
import org.apache.cassandra.config.ColumnDefinition;
import org.apache.cassandra.cql3.Operator;
import org.apache.cassandra.db.*;
import org.apache.cassandra.db.filter.RowFilter;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.db.marshal.UTF8Type;
import org.apache.cassandra.db.partitions.PartitionIterator;
import org.apache.cassandra.db.partitions.PartitionUpdate;
import org.apache.cassandra.exceptions.ConfigurationException;
import org.apache.cassandra.exceptions.InvalidRequestException;
import org.apache.cassandra.index.IndexRegistry;
import org.apache.cassandra.index.transactions.IndexTransaction;
import org.apache.cassandra.schema.IndexMetadata;
import org.apache.cassandra.service.ClientState;
import org.apache.cassandra.utils.concurrent.OpOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.util.Collections;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.function.BiFunction;

/**
 * {@link org.apache.cassandra.index.Index} that uses Apache Lucene as backend. It allows, among others, multi-column
 * and full-text search.
 *
 * @author Andres de la Pena {@literal <adelapena@stratio.com>}
 */
public class Index implements org.apache.cassandra.index.Index {

    private static final Logger logger = LoggerFactory.getLogger(Index.class);

    private final ColumnFamilyStore table;
    private final IndexMetadata indexMetadata;
    private IndexService service;
    private String name;

    // Setup CQL query handler
    static {
        try {
            Field field = ClientState.class.getDeclaredField("cqlQueryHandler");
            field.setAccessible(true);

            Field modifiersField = Field.class.getDeclaredField("modifiers");
            modifiersField.setAccessible(true);
            modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);

            field.set(null, new IndexQueryHandler());
        } catch (Exception e) {
            logger.error("Unable to set Lucene CQL query handler", e);
        }
    }

    /**
     * Builds a new Lucene index for the specified {@link ColumnFamilyStore} using the specified {@link IndexMetadata}.
     *
     * @param table the indexed {@link ColumnFamilyStore}
     * @param indexMetadata the index's metadata
     */
    public Index(ColumnFamilyStore table, IndexMetadata indexMetadata) {
        logger.debug("Building Lucene index {} {}", table.metadata, indexMetadata);
        this.table = table;
        this.indexMetadata = indexMetadata;
        try {
            service = IndexService.build(table, indexMetadata);
        } catch (Exception e) {
            throw new IndexException(e);
        }
        name = service.qualifiedName;
    }

    /**
     * Validates the specified index options.
     *
     * @param options the options to be validated
     * @param metadata the metadata of the table to be indexed
     * @return the validated options
     * @throws ConfigurationException if the options are not valid
     */
    public static Map<String, String> validateOptions(Map<String, String> options, CFMetaData metadata) {
        logger.debug("Validating Lucene index options");
        try {
            IndexOptions.validateOptions(options, metadata);
        } catch (IndexException e) {
            logger.error("Lucene index options are invalid", e);
            throw new ConfigurationException(e.getMessage());
        }
        logger.debug("Lucene index options are valid");
        return Collections.emptyMap();
    }

    /*
     * Management functions
     */

    /**
     * Return a task to perform any initialization work when a new index instance is created. This may involve costly
     * operations such as (re)building the index, and is performed asynchronously by SecondaryIndexManager
     *
     * @return a task to perform any necessary initialization work
     */
    @Override
    public Callable<?> getInitializationTask() {
        logger.info("Getting initialization task of {}", name);
        if (table.isEmpty() || SystemKeyspace.isIndexBuilt(table.keyspace.getName(), indexMetadata.name)) {
            logger.info("Index {} doesn't need (re)building", name);
            return null;
        } else {
            logger.info("Index {} needs (re)building", name);
            return () -> {
                table.forceBlockingFlush();
                service.truncate();
                table.indexManager.buildIndexBlocking(this);
                return null;
            };
        }
    }

    /**
     * Returns the IndexMetadata which configures and defines the index instance. This should be the same object passed
     * as the argument to setIndexMetadata.
     *
     * @return the index's metadata
     */
    @Override
    public IndexMetadata getIndexMetadata() {
        return indexMetadata;
    }

    /**
     * Return a task to reload the internal metadata of an index. Called when the base table metadata is modified or
     * when the configuration of the Index is updated Implementations should return a task which performs any necessary
     * work to be done due to updating the configuration(s) such as (re)building etc. This task is performed
     * asynchronously by SecondaryIndexManager
     *
     * @return task to be executed by the index manager during a reload
     */
    @Override
    public Callable<?> getMetadataReloadTask(IndexMetadata indexMetadata) { // TODO: Check rebuild
        return () -> {
            logger.debug("Reloading Lucene index {} metadata: {}", name, indexMetadata);
            return null;
        };
    }

    /**
     * An index must be registered in order to be able to either subscribe to update events on the base table and/or to
     * provide IndexSearcher functionality for reads. The double dispatch involved here, where the Index actually
     * performs its own registration by calling back to the supplied IndexRegistry's own registerIndex method, is to
     * make the decision as to whether or not to register an index belong to the implementation, not the manager.
     *
     * @param registry the index registry to register the instance with
     */
    @Override
    public void register(IndexRegistry registry) {
        registry.registerIndex(this);
    }

    /**
     * If the index implementation uses a local table to store its index data this method should return a handle to it.
     * If not, an empty Optional should be returned. Typically, this is useful for the built-in Index implementations.
     *
     * @return an Optional referencing the Index's backing storage table if it has one, or Optional.empty() if not
     */
    public Optional<ColumnFamilyStore> getBackingTable() {
        return Optional.empty();
    }

    /**
     * Return a task which performs a blocking flush of the index's data to persistent storage.
     *
     * @return task to be executed by the index manager to perform the flush
     */
    @Override
    public Callable<?> getBlockingFlushTask() {
        return () -> {
            logger.info("Flushing Lucene index {}", name);
            service.commit();
            return null;
        };
    }

    /**
     * Return a task which invalidates the index, indicating it should no longer be considered usable. This should
     * include an clean up and releasing of resources required when dropping an index.
     *
     * @return task to be executed by the index manager to invalidate the index
     */
    @Override
    public Callable<?> getInvalidateTask() {
        return () -> {
            service.delete();
            return null;
        };
    }

    /**
     * Return a task to truncate the index with the specified truncation timestamp. Called when the base table is
     * truncated.
     *
     * @param truncatedAt timestamp of the truncation operation. This will be the same timestamp used in the truncation
     * of the base table.
     * @return task to be executed by the index manager when the base table is truncated.
     */
    @Override
    public Callable<?> getTruncateTask(long truncatedAt) {
        logger.trace("Getting truncate task");
        return () -> {
            logger.info("Truncating Lucene index {}", name);
            service.truncate();
            logger.info("Truncated Lucene index {}", name);
            return null;
        };
    }

    /**
     * Return true if this index can be built or rebuilt when the index manager determines it is necessary. Returning
     * false enables the index implementation (or some other component) to control if and when SSTable data is
     * incorporated into the index.
     *
     * This is called by SecondaryIndexManager in buildIndexBlocking, buildAllIndexesBlocking and rebuildIndexesBlocking
     * where a return value of false causes the index to be excluded from the set of those which will process the
     * SSTable data.
     *
     * @return if the index should be included in the set which processes SSTable data, false otherwise.
     */
    @Override
    public boolean shouldBuildBlocking() {
        logger.trace("Asking if it should build blocking");
        return true;
    }

    /*
     * Index selection
     */

    /**
     * Called to determine whether this index targets a specific column. Used during schema operations such as when
     * dropping or renaming a column, to check if the index will be affected by the change. Typically, if an index
     * answers that it does depend upon a column, then schema operations on that column are not permitted until the
     * index is dropped or altered.
     *
     * @param column the column definition to check
     * @return true if the index depends on the supplied column being present; false if the column may be safely dropped
     * or modified without adversely affecting the index
     */
    @Override
    public boolean dependsOn(ColumnDefinition column) { // TODO: Could return true only for key and/or mapped columns
        logger.trace("Asking if it depends on column {}", column);
        return service.schema.maps(column);
    }

    /**
     * Called to determine whether this index can provide a searcher to execute a query on the supplied column using the
     * specified operator. This forms part of the query validation done before a CQL select statement is executed.
     *
     * @param column the target column of a search query predicate
     * @param operator the operator of a search query predicate
     * @return true if this index is capable of supporting such expressions, false otherwise
     */
    @Override
    public boolean supportsExpression(ColumnDefinition column, Operator operator) {
        logger.trace("Asking if it supports the expression {} {}", column, operator);
        return false;
    }

    /**
     * If the index supports custom search expressions using the {@code}SELECT * FROM table WHERE expr(index_name,
     * expression){@code} syntax, this method should return the expected type of the expression argument. For example,
     * if the index supports custom expressions as Strings, calls to this method should return
     * {@code}UTF8Type.instance{@code}. If the index implementation does not support custom expressions, then it should
     * return null.
     *
     * @return an the type of custom index expressions supported by this index, or an null if custom expressions are not
     * supported.
     */
    @Override
    public AbstractType<?> customExpressionValueType() {
        logger.trace("Requesting the custom expressions value type");
        return UTF8Type.instance;
    }

    /**
     * Transform an initial RowFilter into the filter that will still need to applied to a set of Rows after the index
     * has performed it's initial scan. Used in ReadCommand#executeLocal to reduce the amount of filtering performed on
     * the results of the index query.
     *
     * @param filter the initial filter belonging to a ReadCommand
     * @return the (hopefully) reduced filter that would still need to be applied after the index was used to narrow the
     * initial result set
     */
    @Override
    public RowFilter getPostIndexQueryFilter(RowFilter filter) {
        logger.trace("Getting the post index query filter for {}", filter);
        return filter;
    }

    /**
     * Return an estimate of the number of results this index is expected to return for any given query that it can be
     * used to answer. Used in conjunction with indexes() and supportsExpression() to determine the most selective index
     * for a given ReadCommand. Additionally, this is also used by StorageProxy.estimateResultsPerRange to calculate the
     * initial concurrency factor for range requests
     *
     * @return the estimated average number of results aIndexSearcher may return for any given query
     */
    @Override
    public long getEstimatedResultRows() {
        logger.trace("Getting the estimated result rows");
        return 1;
    }

    /*
     * Input validation
     */

    /**
     * Called at write time to ensure that values present in the update are valid according to the rules of all
     * registered indexes which will process it. The partition key as well as the clustering and cell values for each
     * row in the update may be checked by index implementations
     *
     * @param update PartitionUpdate containing the values to be validated by registered Index implementations.
     * @throws InvalidRequestException If the update doesn't pass through the validation.
     */
    @Override
    public void validate(PartitionUpdate update) {
        logger.trace("Validating {}", update);
        try {
            service.validate(update);
        } catch (Exception e) {
            throw new InvalidRequestException(e.getMessage());
        }
    }

    /*
     * Update processing
     */

    /**
     * Creates an new {@code IndexWriter} object for updates to a given partition.
     *
     * @param key key of the partition being modified
     * @param columns the regular and static columns the created indexer will have to deal with. This can be empty as an
     * update might only contain partition, range and row deletions, but the indexer is guaranteed to not get any cells
     * for a column that is not part of {@code columns}.
     * @param nowInSec current time of the update operation
     * @param opGroup operation group spanning the update operation
     * @param transactionType indicates what kind of update is being performed on the base data i.e. a write time
     * insert/update/delete or the result of compaction
     * @return the newly created indexer or {@code null} if the index is not interested by the update (this could be
     * because the index doesn't care about that particular partition, doesn't care about that type of transaction,
     * ...).
     */
    @Override
    public Indexer indexerFor(DecoratedKey key,
                              PartitionColumns columns,
                              int nowInSec,
                              OpOrder.Group opGroup,
                              IndexTransaction.Type transactionType) {
        return service.indexWriter(key, nowInSec, opGroup, transactionType);
    }

    /*
     * Querying
     */

    /**
     * Return a function which performs post processing on the results of a partition range read command. In future,
     * this may be used as a generalized mechanism for transforming results on the coordinator prior to returning them
     * to the caller.
     *
     * This is used on the coordinator during execution of a range command to perform post processing of merged results
     * obtained from the necessary replicas. This is the only way in which results are transformed in this way but this
     * may change over time as usage is generalized. See CASSANDRA-8717 for further discussion.
     *
     * The function takes a PartitionIterator of the results from the replicas which has already been collated and
     * reconciled, along with the command being executed. It returns another PartitionIterator containing the results of
     * the transformation (which may be the same as the input if the transformation is a no-op).
     */
    @Override
    public BiFunction<PartitionIterator, ReadCommand, PartitionIterator> postProcessorFor(ReadCommand command) {
        return (partitions, readCommand) -> service.postProcess(partitions, readCommand);
    }

    /**
     * Factory method for query time search helper. Custom index implementations should perform any validation of query
     * expressions here and throw a meaningful InvalidRequestException when any expression is invalid.
     *
     * @param command the read command being executed
     * @return an IndexSearcher with which to perform the supplied command
     * @throws InvalidRequestException if the command's expressions are invalid according to the specific syntax
     * supported by the index implementation.
     */
    @Override
    public Searcher searcherFor(ReadCommand command) {
        logger.trace("Getting searcher for {}", command);
        try {
            return service.searcher(command);
        } catch (Exception e) {
            logger.error("Error while searching", e);
            throw new InvalidRequestException(e.getMessage());
        }
    }

    /**
     * Validates the specified {@link RowFilter.CustomExpression}.
     *
     * @param expression the expression to be validated
     * @return the valid search represented by {@code expression}
     * @throws InvalidRequestException if the expression is not valid
     */
    public Search validate(RowFilter.CustomExpression expression) {
        try {
            String json = UTF8Type.instance.compose(expression.getValue());
            Search search = SearchBuilder.fromJson(json).build();
            search.query(service.schema);
            return search;
        } catch (Exception e) {
            throw new InvalidRequestException(e.getMessage());
        }
    }
}