/* * Copyright (C) 2012-2016 DuyHai DOAN * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package info.archinnov.achilles.annotations; import java.lang.annotation.*; import java.util.Locale; /** * Annotation for SASI index * <br/> * <br/> * The following combinations are allowed for index options: * <br/> * <table> * <thead> * <tr> * <th>Data type</th> * <th>Index Mode</th> * <th>Analyzer Class</th> * <th>Possible option values</th> * </tr> * </thead> * <tbody> * <tr> * <td>Text or Ascii</td> * <td>PREFIX or CONTAINS</td> * <td>NoOpAnalyzer</td> * <td> * <ul> * <li>analyzed = <strong>false</strong> (DEFAULT)</li> * <li>normalization = <strong>NONE</strong> (DEFAULT)</li> * <li>locale is <strong>ignored</strong></li> * <li>maxCompactionFlushMemoryInMb (OPTIONAL)</li> * <li>enableStemming = <strong>false</strong> (DEFAULT)</li> * <li>skipStopWords = <strong>false</strong> (DEFAULT)</li> * </ul> * </td> * </tr> * <tr> * <td>Text or Ascii</td> * <td>PREFIX or CONTAINS</td> * <td>NonTokenizingAnalyzer</td> * <td> * <ul> * <li>analyzed = <strong>true</strong> (MANDATORY)</li> * <li>normalization (OPTIONAL)</li> * <li>locale (OPTIONAL)</li> * <li>maxCompactionFlushMemoryInMb (OPTIONAL)</li> * <li>enableStemming = <strong>false</strong> (DEFAULT)</li> * <li>skipStopWords = <strong>false</strong> (DEFAULT)</li> * </ul> * </td> * </tr> * <tr> * <td>Text or Ascii</td> * <td>PREFIX or CONTAINS</td> * <td>StandardAnalyzer</td> * <td> * <ul> * <li>analyzed = <strong>true</strong> (MANDATORY)</li> * <li>normalization (OPTIONAL)</li> * <li>locale (OPTIONAL)</li> * <li>maxCompactionFlushMemoryInMb (OPTIONAL)</li> * <li>enableStemming (OPTIONAL)</li> * <li>skipStopWords (OPTIONAL)</li> * </ul> * </td> * </tr> * <tr> * <td>Non Text</td> * <td>PREFIX OR SPARSE</td> * <td>NoOpAnalyzer</td> * <td> * <ul> * <li>analyzed = <strong>false</strong> (DEFAULT)</li> * <li>normalization = <strong>NONE</strong> (DEFAULT)</li> * <li>locale is <strong>ignored</strong></li> * <li>maxCompactionFlushMemoryInMb (OPTIONAL)</li> * <li>enableStemming = <strong>false</strong> (DEFAULT)</li> * <li>skipStopWords = <strong>false</strong> (DEFAULT)</li> * </ul> * </td> * </tr> * </tbody> * </table> *<br/> */ @Retention(RetentionPolicy.RUNTIME) @Target({ElementType.FIELD}) @Documented public @interface SASI { /** * <strong>Optional</strong>. * Define the name of the SASI index. If not set, defaults to <strong>table name_field name_index</strong> * <pre class="code"><code class="java"> * {@literal @}Table * public class User { * ... * {@literal @}SASI(<strong>name = "country_code_index"</strong>) * {@literal @}Column * private String countryCode; * ... * } * </code></pre> * </p> * If the index name was not set above, it would default to <string>user_countrycode_index</string> */ String name() default ""; /** * SASI index mode. Allowed values are: * <ul> * <li><strong>PREFIX</strong> (DEFAULT): allows search on prefix for text/ascii data types. * Default and only valid index mode for non-text data types</li> * <li><strong>CONTAINS</strong>: allows search on prefix, suffix and substring for text/ascii data types. * Invalid for non-text data types</li> * <li><strong>SPARSE</strong>: only valid for non-text data types. * SPARSE mode is optimized for low-cardinality e.g. for indexed values having * <strong>5 or less</strong> corresponding rows. If there are more than 5 CQL rows having * this index value, SASI will complain by throwing an exception</li> * </ul> * */ IndexMode indexMode() default IndexMode.PREFIX; /** * Indicates whether the data should be analyzed or not. * <br/> * <br/> * Setting 'analyzed' = true is only valid for text/ascii data types. * <br/> * <br/> * Setting 'analyzed' = true is <strong>mandatory</strong> if 'analyzerClass' is set to: * <ul> * <li><strong>NON_TOKENIZING_ANALYZER</strong></li> * <li><strong>STANDARD_ANALYZER</strong></li> * </ul> */ boolean analyzed() default false; /** * Defines the analyzer class. Available values are: * <ul> * <li><strong>NO_OP_ANALYSER</strong> (DEFAULT): do not analyze the input</li> * <li><strong>NON_TOKENIZING_ANALYZER</strong>: only valid for text/ascii data types. * Do not tokenize the input. * Normalization by lowercase/uppercase is allowed</li> * <li><strong>STANDARD_ANALYZER</strong>: only valid for text/ascii data types. * Split the input text into tokens, using the locale defined by attribute 'locale' * Normalization by lowercase/uppercase is allowed</li> * </ul> * <br/> * <br/> * <strong> * Please note that setting 'analyzerClass' to NON_TOKENIZING_ANALYZER or STANDARD_ANALYZER * also requires setting 'analyzed' to true * </strong> */ Analyzer analyzerClass() default Analyzer.NO_OP_ANALYZER; /** * Maximum size of SASI data to keep in memory during compaction process. * <br/> * <br/> * Default = 1024 e.g. 1Gb * <br/> * <br/> * If there are more than 'maxCompactionFlushMemoryInMb' worth of index data, SASI * will flush them on temporary files on disk before merging all the temp files into * a single one. Of course it will add up to compaction duration. No free lunch, sorry */ int maxCompactionFlushMemoryInMb() default 1024; /** * Defines the normalization to be applied to the input. Available values are: * <ul> * <li><strong>NONE</strong> (DEFAULT): no normalization</li> * <li><strong>LOWERCASE</strong>: normalize input text and search term to lower case</li> * <li><strong>UPPERCASE</strong>: normalize input text and search term to upper case</li> * </ul> */ Normalization normalization() default Normalization.NONE; /** * Defines the locale for tokenization. This attribute is only used when * 'analyzerClass' == STANDARD_ANALYZER otherwise it is ignored */ String locale() default "en"; /** * Enable stemming of input text. This attribute is only used when * 'analyzerClass' == STANDARD_ANALYZER */ boolean enableStemming() default false; /** * Enable stemming of input text. This attribute is only used when * 'analyzerClass' == STANDARD_ANALYZER */ boolean skipStopWords() default false; enum IndexMode { PREFIX, CONTAINS, SPARSE } enum Analyzer { NO_OP_ANALYZER("org.apache.cassandra.index.sasi.analyzer.NoOpAnalyzer"), NON_TOKENIZING_ANALYZER("org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer"), STANDARD_ANALYZER("org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer"); private String analyzerClass; Analyzer(String analyzerClass) { this.analyzerClass = analyzerClass; } public String analyzerClass() { return analyzerClass; } } enum Normalization { LOWERCASE { @Override public String forStandardAnalyzer() { return "tokenization_normalize_lowercase"; } @Override public String forNonTokenizingAnalyzer() { return "normalize_lowercase"; } }, UPPERCASE { @Override public String forStandardAnalyzer() { return "tokenization_normalize_uppercase"; } @Override public String forNonTokenizingAnalyzer() { return "normalize_uppercase"; } }, NONE { @Override public String forStandardAnalyzer() { return ""; } @Override public String forNonTokenizingAnalyzer() { return ""; } }; abstract public String forStandardAnalyzer(); abstract public String forNonTokenizingAnalyzer(); } }