package org.elasticsearch.index.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.elasticsearch.common.io.FastStringReader;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
@Test
public class HashSplitterSearchTokenizerTests {
private HashSplitterSearchTokenizer tokenizer;
private FastStringReader reader;
private CharTermAttribute termAttr;
private OffsetAttribute offAttr;
private String input;
@BeforeMethod
public void init() {
tokenizer = null;
reader = null;
termAttr = null;
offAttr = null;
input = null;
}
protected void analyze(String _input) throws Exception {
input = _input;
reader = new FastStringReader(input);
tokenizer.reset(reader);
termAttr = tokenizer.getAttribute(CharTermAttribute.class);
offAttr = tokenizer.getAttribute(OffsetAttribute.class);
}
protected void closeAnalysis() throws Exception {
tokenizer.end();
tokenizer.close();
termAttr = null;
offAttr = null;
input = null;
}
@Test
public void testDefaultAnalysis() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null);
analyze("0123456789");
for (int i = 0 ; i < input.length() ; ++i) {
assertThat("at i = " + i, tokenizer.incrementToken(), equalTo(true));
assertThat("at i = " + i, termAttr.toString(), equalTo(HashSplitterSearchAnalyzer.DEFAULT_PREFIXES.substring(i,i+1) + input.substring(i,i+1)));
assertThat("at i = " + i, offAttr.startOffset(), equalTo(i));
assertThat("at i = " + i, offAttr.endOffset(), equalTo(i+1));
}
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
@Test
public void testChunkLength() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null, 2, HashSplitterSearchTokenizer.DEFAULT_PREFIXES, HashSplitterSearchTokenizer.DEFAULT_WILDCARD_ONE, HashSplitterSearchTokenizer.DEFAULT_WILDCARD_ANY, true, -1);
analyze("0123456789");
for (int i = 0 ; i < input.length() ; i += 2) {
assertThat("at i = " + i, tokenizer.incrementToken(), equalTo(true));
assertThat("at i = " + i, termAttr.toString(), equalTo(HashSplitterSearchAnalyzer.DEFAULT_PREFIXES.substring(i/2,i/2+1) + input.substring(i,i+2)));
assertThat("at i = " + i, offAttr.startOffset(), equalTo(i));
assertThat("at i = " + i, offAttr.endOffset(), equalTo(i+2));
}
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
@Test
public void testPrefixes() throws Exception {
String prefixes = "⁰¹²³⁴⁵⁶⁷⁸⁹";
assertThat(prefixes.length(), equalTo(10));
tokenizer = new HashSplitterSearchTokenizer(null, HashSplitterSearchTokenizer.DEFAULT_CHUNK_LENGTH, prefixes, HashSplitterSearchTokenizer.DEFAULT_WILDCARD_ONE, HashSplitterSearchTokenizer.DEFAULT_WILDCARD_ANY, true, -1);
analyze("0123456789");
for (int i = 0 ; i < input.length() ; ++i) {
assertThat("at i = " + i, tokenizer.incrementToken(), equalTo(true));
assertThat("at i = " + i, termAttr.toString(), equalTo(prefixes.substring(i,i+1) + input.substring(i,i+1)));
assertThat("at i = " + i, offAttr.startOffset(), equalTo(i));
assertThat("at i = " + i, offAttr.endOffset(), equalTo(i+1));
}
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
@Test
public void testSearchWildcardOne() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null, 4, "abcd", '?', '*', false, 12);
analyze("00001??12222");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0000"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b1??1"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat("at i = 2", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 2", termAttr.toString(), equalTo("c2222"));
assertThat("at i = 2", offAttr.startOffset(), equalTo(8));
assertThat("at i = 2", offAttr.endOffset(), equalTo(12));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
analyze("?????11?????");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b?11?"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat("at i = 2", tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
@Test
public void testSearchWildcardAnyPrefixVariableSize() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null, 4, "abcd", '?', '*', true, -1);
analyze("00001*");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0000"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b1???"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
analyze("0000111*");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0000"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b111?"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
@Test
public void testSearchWildcardAnyPrefixFixedSize() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null, 4, "abcd", '?', '*', false, 12);
analyze("00001*");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0000"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b1???"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
analyze("0000111*");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0000"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b111?"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
@Test
public void testSearchWildcardAnySuffixFixedSize() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null, 4, "abcd", '?', '*', false, 12);
analyze("*12222");
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b???1"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat("at i = 2", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 2", termAttr.toString(), equalTo("c2222"));
assertThat("at i = 2", offAttr.startOffset(), equalTo(8));
assertThat("at i = 2", offAttr.endOffset(), equalTo(12));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
analyze("*1112222");
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b?111"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat("at i = 2", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 2", termAttr.toString(), equalTo("c2222"));
assertThat("at i = 2", offAttr.startOffset(), equalTo(8));
assertThat("at i = 2", offAttr.endOffset(), equalTo(12));
closeAnalysis();
}
@Test
public void testSearchWildcardAnyPrefixAndSuffixFixedSize() throws Exception {
tokenizer = new HashSplitterSearchTokenizer(null, 4, "abcd", '?', '*', false, 12);
analyze("0*12222");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0???"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 1", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 1", termAttr.toString(), equalTo("b???1"));
assertThat("at i = 1", offAttr.startOffset(), equalTo(4));
assertThat("at i = 1", offAttr.endOffset(), equalTo(8));
assertThat("at i = 2", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 2", termAttr.toString(), equalTo("c2222"));
assertThat("at i = 2", offAttr.startOffset(), equalTo(8));
assertThat("at i = 2", offAttr.endOffset(), equalTo(12));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
analyze("0*2");
assertThat("at i = 0", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 0", termAttr.toString(), equalTo("a0???"));
assertThat("at i = 0", offAttr.startOffset(), equalTo(0));
assertThat("at i = 0", offAttr.endOffset(), equalTo(4));
assertThat("at i = 2", tokenizer.incrementToken(), equalTo(true));
assertThat("at i = 2", termAttr.toString(), equalTo("c???2"));
assertThat("at i = 2", offAttr.startOffset(), equalTo(8));
assertThat("at i = 2", offAttr.endOffset(), equalTo(12));
assertThat(tokenizer.incrementToken(), equalTo(false));
closeAnalysis();
}
}