package org.apache.lucene.queryparser.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/**
* Low level tests of the lexer.
*/
public class TestSpanQueryParserLexer extends LuceneTestCase {
private final SpanQueryLexer lexer = new SpanQueryLexer();
public void testSimple() throws ParseException {
testParseException("the [quick (brown fox)~23 jumped]");
testParseException("the [quick (brown fox)~ jumped]");
testParseException("the \"quick (brown fox)~23 jumped\"");
testParseException("the \"quick (brown fox)~ jumped\"");
}
/*
public void testSingleDebug() throws Exception {
String s = "[\\* TO '*']";
List<SQPToken> tokens = lexer.getTokens(s);
for (SQPToken t : tokens) {
System.out.println(t.getClass() + " : " + t);
if (t instanceof SQPBoostableToken) {
System.out.println("BOOST: " + ((SQPBoostableToken)t).getBoost());
}
}
}*/
/*
public void testOneOffs() throws ParseException {
String s = "the \"quick brown\"";
List<SQPToken> tokens = lexer.getTokens(s);
for (SQPToken t : tokens) {
if (t != null) {
System.out.println(t.toString());
} else {
System.out.println("NULL");
}
}
//now test crazy apparent mods on first dquote
SQPTerm tTerm = new SQPTerm("~2", false);
executeSingleTokenTest(
"the \"~2 quick brown\"",
2,
tTerm
);
}
*/
public void testLargeNumberOfORs() throws Exception {
//Thanks to Modassar Ather for finding this!
StringBuilder sb = new StringBuilder();
sb.append("(");
for (int i = 0; i < 10000; i++) {
if (i > 0) {
sb.append(" OR ");
}
sb.append("TERM_" + i);
}
sb.append(")");
lexer.getTokens(sb.toString());
}
public void testDoubleVsSingleQuotesAroundSingleTerm() throws Exception {
//Thanks to Modassar Ather for finding this!
//if a term is in double-quotes, treat it as a regular single term within a phrase
//which pretty much means that the phrasal part is not calculated.
//before this bug fix, the lexer was dropping the "*"
SQPPrefixTerm truth = new SQPPrefixTerm("term");
executeSingleTokenTest(
"\"term*\"",
1,
truth
);
//use single quotes for literal 'fox*'
SQPTerm truthTerm = new SQPTerm("fox*", true);
executeSingleTokenTest(
"'fox*'",
0,
truthTerm
);
}
public void testSingleQuoteExceptions() throws ParseException {
testParseException("the'quick");
testParseException("quick'");
testParseException("'quick");
testParseException("'");
testParseException(" '");
testParseException("' ");
//need to have something between single quotes
testParseException("the '' quick");
}
public void testFuzzy() throws Exception {
SQPFuzzyTerm truth = new SQPFuzzyTerm("fox");
executeSingleTokenTest(
"fox~",
0,
truth
);
truth.setMaxEdits(1);
executeSingleTokenTest(
"fox~1",
0,
truth
);
truth.setMaxEdits(30);
executeSingleTokenTest(
"fox~30",
0,
truth
);
truth.setMaxEdits(1);
truth.setTranspositions(false);
executeSingleTokenTest(
"fox~>1",
0,
truth
);
truth.setTranspositions(true);
executeSingleTokenTest(
"fox~1",
0,
truth
);
truth.setPrefixLength(2);
executeSingleTokenTest(
"fox~1,2 and some other",
0,
truth
);
truth.setPrefixLength(null);
executeSingleTokenTest(
"fox~1abc and some other",
0,
truth
);
//classic queryparser swallows all alphanumerics
//after ~\d. This parser treats ~\d as a break
//and reads "abc" as a token
SQPTerm abc = new SQPTerm("abc", false);
executeSingleTokenTest(
"fox~1abc and some other",
1,
abc
);
truth = new SQPFuzzyTerm("f*x");
truth.setMaxEdits(1);
executeSingleTokenTest(
"f\\*x~1 and some other",
0,
truth
);
//classic query parser allows this but silently
//drops the fuzzy; SQP throws parse exception
testParseException("f*x~2");
testParseException("fox~0.11");
testParseException("fox~2.2");
testParseException("fox~-0.12");
testParseException("fox~-1");
testParseException("fox~+1.0");
testParseException("fox~+1");
}
public void testAllDocs() throws ParseException {
SQPAllDocsTerm truth = new SQPAllDocsTerm();
executeSingleTokenTest(
"*:*",
0,
truth
);
truth.setBoost(2.3f);
executeSingleTokenTest(
"*:*^2.3",
0,
truth
);
SQPWildcardTerm wildcardTerm = new SQPWildcardTerm("*foobar");
executeSingleTokenTest(
"*:*foobar",
1,
wildcardTerm
);
}
public void testWildcard() throws ParseException {
SQPWildcardTerm truth = new SQPWildcardTerm("f*x");
executeSingleTokenTest(
"f*x and some other",
0,
truth
);
truth = new SQPWildcardTerm("*");
executeSingleTokenTest(
"* and some other",
0,
truth
);
}
public void testWildcardEscapes() throws ParseException {
//if not wildcard, strip escapes
executeSingleTokenTest(
"f\\ox",
0,
new SQPTerm("fox", false)
);
executeSingleTokenTest(
"f\\o?x",
0,
new SQPWildcardTerm("f\\o?x")
);
}
public void testSingleQuotes() throws ParseException {
executeSingleTokenTest(
" 'the''quick' fox",
0,
new SQPTerm("the'quick", true)
);
executeSingleTokenTest(
" 'the quick' fox",
0,
new SQPTerm("the quick", true)
);
executeSingleTokenTest(
" 'the quick' fox 'brown fox' ",
0,
new SQPTerm("the quick", true)
);
executeSingleTokenTest(
" 'the quick' fox 'brown fox' ran ",
2,
new SQPTerm("brown fox", true)
);
executeSingleTokenTest(
" 'the quick' fox 'brown fox' ran ",
3,
new SQPTerm("ran", false)
);
executeSingleTokenTest(
" abc '/some/pa''th/or/other.txt' fox 'brown fox' ran ",
1,
new SQPTerm("/some/pa'th/or/other.txt", true)
);
//apostrophes
executeSingleTokenTest(
" john\\'s tiger ",
0,
new SQPTerm("john's", false)
);
}
public void testEscapedUnicodeChars() throws ParseException {
//copied from QueryParserTestBase
//TODO: get rid of this once new lexer is built
executeSingleTokenTest(
"\\\\\\u0028\\u0062\\\"",
0,
new SQPTerm("\\(b\"", false)
);
executeSingleTokenTest(
"\\\\\\u0028\\u0062\\\"",
0,
new SQPTerm("\\(b\"", false)
);
//test escape beyond bmp
String stagDouble = "\\uD800\\uDC82";
String stagSimple = new StringBuilder().appendCodePoint(0x10082).toString();
executeSingleTokenTest(
stagDouble,
0,
new SQPTerm(stagSimple, false)
);
//too short
testParseException("\\u002");
//not hex
testParseException("\\u002k");
}
public void testRegexes() throws ParseException {
executeSingleTokenTest(
"the quick /rabb.?t/ /f?x/",
2,
new SQPRegexTerm("rabb.?t")
);
executeSingleTokenTest(
"the quick /rab//b.?t/ /f?x/",
2,
new SQPRegexTerm("rab/b.?t")
);
//this is really nasty!
// the
// quick
// rabb/
// b.*?
// / /
// f?x
executeSingleTokenTest(
"the quick /rabb///b.?/ /f?x",
2,
new SQPRegexTerm("rabb/")
);
executeSingleTokenTest(
"the quick /rabb///b.?/ /f?x",
3,
new SQPWildcardTerm("b.?")
);
executeSingleTokenTest(
"the quick /rabb///b.?/ /f?x",
4,
new SQPRegexTerm(" ")
);
executeSingleTokenTest(
"the quick [brown (/rabb.?t/ /f?x/)]",
5,
new SQPRegexTerm("rabb.?t")
);
executeSingleTokenTest(
"the quick [brown (ab/rabb.?t/cd /f?x/)]",
6,
new SQPRegexTerm("rabb.?t")
);
//test regex unescape
executeSingleTokenTest(
"the quick [brown (/ra\\wb\\db//t/ /f?x/)]",
5,
new SQPRegexTerm("ra\\wb\\db/t")
);
//test operators within regex
executeSingleTokenTest(
"the quick [brown (/(?i)a(b)+[c-e]*(f|g){0,3}/ /f?x/)]",
5,
new SQPRegexTerm("(?i)a(b)+[c-e]*(f|g){0,3}")
);
//test non-regex
executeSingleTokenTest(
"'/quick/'",
0,
new SQPTerm("/quick/", true)
);
}
public void testFields() throws ParseException {
executeSingleTokenTest(
"the quick f1: brown fox",
2,
new SQPField("f1")
);
//no space
executeSingleTokenTest(
"the quick f1:brown fox",
2,
new SQPField("f1")
);
//non-escaped colon
testParseException("the quick f1:f2:brown fox");
//escaped colon
executeSingleTokenTest(
"the quick f1\\:f2:brown fox",
2,
new SQPField("f1:f2")
);
//escaped colon
executeSingleTokenTest(
"the quick f1\\:f2:brown fox",
3,
new SQPTerm("brown", false)
);
executeSingleTokenTest(
"the quick f1\\ f2: brown fox",
2,
new SQPField("f1 f2")
);
//fields should not be tokenized within a regex
executeSingleTokenTest(
"the quick /f1: brown/ fox",
2,
new SQPRegexTerm("f1: brown")
);
//fields are tokenized within parens
executeSingleTokenTest(
"the quick (f1: brown fox)",
3,
new SQPField("f1")
);
//can't have field definitions within near or range
testParseException("the quick \"f1: brown fox\"");
testParseException("the quick [f1: brown fox]");
testParseException("the quick [f1: brown TO fox]");
testParseException("the quick [f1: TO fox]");
}
public void testOr() throws ParseException {
SQPOrClause truth = new SQPOrClause(2, 5);
executeSingleTokenTest(
"the quick (brown fox) jumped",
2,
truth
);
truth.setMinimumNumberShouldMatch(23);
executeSingleTokenTest(
"the quick (brown fox)~23 jumped",
2,
truth
);
truth.setMinimumNumberShouldMatch(2);
executeSingleTokenTest(
"the quick (brown fox)~ jumped",
2,
truth
);
//can't specify min number of ORs within a spannear phrase
testParseException("the [quick (brown fox)~23 jumped]");
testParseException("the [quick (brown fox)~ jumped]");
testParseException("the \"quick (brown fox)~23 jumped\"");
testParseException("the \"quick (brown fox)~ jumped\"");
}
public void testNear() throws ParseException {
SQPNearClause truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, null, null);
executeSingleTokenTest(
"the quick \"brown fox\" jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, false, null);
executeSingleTokenTest(
"the quick \"brown fox\"~ jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, true, null);
executeSingleTokenTest(
"the quick \"brown fox\"~> jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, false, 3);
executeSingleTokenTest(
"the quick \"brown fox\"~3 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, true, 3);
executeSingleTokenTest(
"the quick \"brown fox\"~>3 jumped",
2,
truth
);
//now try with boosts
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, null, null);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick \"brown fox\"^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, false, null);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick \"brown fox\"~^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, true, null);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick \"brown fox\"~>^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE,
false,
3);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick \"brown fox\"~3^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.QUOTE, true, 3);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick \"brown fox\"~>3^2.5 jumped",
2,
truth
);
//now test brackets
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, null, null);
executeSingleTokenTest(
"the quick [brown fox] jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, false, null);
executeSingleTokenTest(
"the quick [brown fox]~ jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, true,null);
executeSingleTokenTest(
"the quick [brown fox]~> jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, false,3);
executeSingleTokenTest(
"the quick [brown fox]~3 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, true,3);
executeSingleTokenTest(
"the quick [brown fox]~>3 jumped",
2,
truth
);
//now brackets with boosts
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, null,null);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
SQPTerm fox = new SQPTerm("fox", false);
executeSingleTokenTest(
"the quick [brown fox]^2.5 jumped",
2,
truth
);
executeSingleTokenTest(
"the quick [brown fox]^2.5 jumped",
4,
fox
);
fox.setBoost(10f);
executeSingleTokenTest(
"the quick [brown fox^10] jumped",
4,
fox
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, false, null);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick [brown fox]~^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, true, null);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick [brown fox]~>^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, false, 3);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick [brown fox]~3^2.5 jumped",
2,
truth
);
truth = new SQPNearClause(3, 5, SQPClause.TYPE.BRACKET, true,3);
((SQPBoostableOrPositionRangeToken) truth).setBoost(2.5f);
executeSingleTokenTest(
"the quick [brown fox]~>3^2.5 jumped",
2,
truth
);
//now test crazy apparent mods on first dquote
SQPTerm tTerm = new SQPTerm("~2", false);
executeSingleTokenTest(
"the \"~2 quick brown\"",
2,
tTerm
);
SQPFuzzyTerm fTerm = new SQPFuzzyTerm("!");
fTerm.setMaxEdits(2);
executeSingleTokenTest(
"the \"!~2 quick brown\"",
2,
fTerm
);
}
public void testBoosts() throws Exception {
SQPToken truth = new SQPTerm("apache", false);
((SQPBoostableOrPositionRangeToken) truth).setBoost(4.0f);
executeSingleTokenTest(
"apache^4",
0,
truth
);
truth = new SQPRegexTerm("apache");
((SQPBoostableOrPositionRangeToken) truth).setBoost(4.0f);
executeSingleTokenTest(
"/apache/^4",
0,
truth
);
truth = new SQPRangeTerm("abc", "efg", true, true);
((SQPBoostableOrPositionRangeToken) truth).setBoost(4.0f);
executeSingleTokenTest(
"the [abc TO efg]^4 cat",
1,
truth
);
truth = new SQPTerm("apache", false);
((SQPBoostableOrPositionRangeToken) truth).setBoost(0.4f);
executeSingleTokenTest(
"apache^.4",
0,
truth
);
executeSingleTokenTest(
"apache^0.4",
0,
truth
);
testParseException("apache^-0.4");
testParseException("apache^-.4");
testParseException("apache^-4");
testParseException("/apache/^-2");
testParseException("apache~2^-2");
testParseException("ap?che^-2");
testParseException("apach*^-2");
testParseException("fox^.");
testParseException("the [abc TO efg]^-4 cat");
}
public void testNotNear() throws ParseException {
SQPNotNearClause truth = new SQPNotNearClause(3, 5, SQPClause.TYPE.QUOTE,null, null);
executeSingleTokenTest(
"the quick \"brown fox\"!~ jumped",
2,
truth
);
truth = new SQPNotNearClause(3, 5, SQPClause.TYPE.QUOTE,
3, 3);
executeSingleTokenTest(
"the quick \"brown fox\"!~3 jumped",
2,
truth
);
truth = new SQPNotNearClause(3, 5, SQPClause.TYPE.QUOTE,
3, 4);
executeSingleTokenTest(
"the quick \"brown fox\"!~3,4 jumped",
2,
truth
);
truth = new SQPNotNearClause(3, 5, SQPClause.TYPE.BRACKET,null, null);
executeSingleTokenTest(
"the quick [brown fox]!~ jumped",
2,
truth
);
truth = new SQPNotNearClause(3, 5, SQPClause.TYPE.BRACKET,
3,
3);
executeSingleTokenTest(
"the quick [brown fox]!~3 jumped",
2,
truth
);
truth = new SQPNotNearClause(3, 5, SQPClause.TYPE.BRACKET,
3,
4);
executeSingleTokenTest(
"the quick [brown fox]!~3,4 jumped",
2,
truth
);
}
public void testUnescapes() throws ParseException {
//lexer should unescape field names
//and boolean operators but nothing else
//the parser may need the escapes for determining type of multiterm
//and a few other things
executeSingleTokenTest(
"the qu\\(ck",
1,
new SQPTerm("qu(ck", false)
);
executeSingleTokenTest(
"the qu\\[ck",
1,
new SQPTerm("qu[ck", false)
);
executeSingleTokenTest(
"the qu\\+ck",
1,
new SQPTerm("qu+ck", false)
);
executeSingleTokenTest(
"the qu\\-ck",
1,
new SQPTerm("qu-ck", false)
);
executeSingleTokenTest(
"the qu\\\\ck",
1,
new SQPTerm("qu\\ck", false)
);
executeSingleTokenTest(
"the qu\\ ck",
1,
new SQPTerm("qu ck", false)
);
executeSingleTokenTest(
"the field\\: quick",
1,
new SQPTerm("field:", false)
);
executeSingleTokenTest(
"the \\+ (quick -nimble)",
1,
new SQPTerm("+", false)
);
}
public void testBoolean() throws Exception {
executeSingleTokenTest(
"the quick AND nimble",
2,
new SQPBooleanOpToken(SpanQueryParserBase.CONJ_AND)
);
executeSingleTokenTest(
"the quick AND nimble",
3,
new SQPTerm("nimble", false)
);
executeSingleTokenTest(
"the quick NOT nimble",
2,
new SQPBooleanOpToken(SpanQueryParserBase.MOD_NOT)
);
executeSingleTokenTest(
"the (quick NOT nimble) fox",
3,
new SQPBooleanOpToken(SpanQueryParserBase.MOD_NOT)
);
//not sure this is the right behavior
//lexer knows when it is in a near clause and doesn't parse
//boolean operators
executeSingleTokenTest(
"the [quick NOT nimble] fox",
3,
new SQPTerm("NOT", false)
);
executeSingleTokenTest(
"the +quick +nimble",
1,
new SQPBooleanOpToken(SpanQueryParserBase.MOD_REQ)
);
executeSingleTokenTest(
"the +quick -nimble",
3,
new SQPBooleanOpToken(SpanQueryParserBase.MOD_NOT)
);
executeSingleTokenTest(
"the +(quick -nimble)",
1,
new SQPBooleanOpToken(SpanQueryParserBase.MOD_REQ)
);
executeSingleTokenTest(
"the +(quick -nimble)",
4,
new SQPBooleanOpToken(SpanQueryParserBase.MOD_NOT)
);
//test non-operators
executeSingleTokenTest(
"the 10-22+02",
1,
new SQPTerm("10-22+02", false)
);
}
public void testRange() throws ParseException {
executeSingleTokenTest(
"the [abc TO def] cat",
1,
new SQPRangeTerm("abc", "def", true, true)
);
executeSingleTokenTest(
"the [quick brown ([abc TO def] fox)] cat",
5,
new SQPRangeTerm("abc", "def", true, true)
);
executeSingleTokenTest(
"the [* TO def] cat",
1,
new SQPRangeTerm(null, "def", true, true)
);
executeSingleTokenTest(
"the [def TO *] cat",
1,
new SQPRangeTerm("def", null, true, true)
);
executeSingleTokenTest(
"the [def TO '*'] cat",
1,
new SQPRangeTerm("def", "*", true, true)
);
SQPNearClause nearClause = new SQPNearClause(2, 5,
SQPClause.TYPE.BRACKET, null, null);
executeSingleTokenTest(
"the [abc to def] cat",
1,
nearClause
);
nearClause = new SQPNearClause(1, 4,
SQPClause.TYPE.BRACKET, null, null);
executeSingleTokenTest(
"[abc to def]",
0,
nearClause
);
//not ranges
nearClause = new SQPNearClause(2, 5,
SQPClause.TYPE.BRACKET, false, 3);
executeSingleTokenTest(
"the [abc to def]~3 cat",
1,
nearClause
);
executeSingleTokenTest(
"the [abc TO def]~3 cat",
1,
nearClause
);
SQPNotNearClause notNear = new SQPNotNearClause(2,
5, SQPClause.TYPE.BRACKET,
1,
2);
executeSingleTokenTest(
"the [abc TO def]!~1,2 cat",
1,
notNear
);
//Curly brackets in non-range queries
testParseException("some stuff [abc def ghi} some other");
testParseException("some stuff {abc def ghi] some other");
testParseException("some stuff {abc def ghi} some other");
testParseException("some stuff [abc} some other");
//can't have modifiers on range queries
testParseException("some stuff [abc TO ghi}~2 some other");
testParseException("some stuff {abc TO ghi]~2 some other");
//can't have multiterm looking terms in range queries
testParseException("the [abc~2 TO def] cat");
testParseException("the [a?c TO def] cat");
testParseException("the [abc TO def~2] cat");
testParseException("the [abc TO de*] cat");
testParseException("the [/abc/ TO def] cat");
}
public void testBeyondBMP() throws Exception {
//this blew out regex during development
String bigChar = new String(new int[]{100000}, 0, 1);
String s = "ab" + bigChar + "cd";
executeSingleTokenTest(
s,
0,
new SQPTerm(s, false)
);
}
public void testEscapedOperators() throws Exception {
executeSingleTokenTest("foo \\AND bar",
1,
new SQPTerm("AND", false)
);
executeSingleTokenTest("foo \\AND",
1,
new SQPTerm("AND", false)
);
executeSingleTokenTest("foo \\OR bar",
1,
new SQPTerm("OR", false)
);
}
public void testSpanPositionRangeOnTerms() throws Exception {
SQPTerm expected = new SQPTerm("foo", false);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("foo@2..10", 0, expected);
expected.setBoost(2.5f);
executeSingleTokenTest("foo@2..10^2.5", 0, expected);
executeSingleTokenTest("foo^2.5@2..10", 0, expected);
//allow flipped ranges
executeSingleTokenTest("foo@10..2^2.5", 0, expected);
executeSingleTokenTest("foo^2.5@10..2", 0, expected);
SQPPrefixTerm prefixTerm = new SQPPrefixTerm("foo");
prefixTerm.setStartPosition(2);
prefixTerm.setEndPosition(10);
executeSingleTokenTest("foo*@2..10", 0, prefixTerm);
prefixTerm.setBoost(2.5f);
executeSingleTokenTest("foo*^2.5@2..10", 0, prefixTerm);
executeSingleTokenTest("foo*@2..10^2.5", 0, prefixTerm);
SQPFuzzyTerm fuzzyTerm = new SQPFuzzyTerm("foo");
fuzzyTerm.setMaxEdits(2);
fuzzyTerm.setPrefixLength(1);
fuzzyTerm.setTranspositions(false);
fuzzyTerm.setStartPosition(2);
fuzzyTerm.setEndPosition(10);
executeSingleTokenTest("foo~>2,1@2..10", 0, fuzzyTerm);
fuzzyTerm.setBoost(2.5f);
executeSingleTokenTest("foo~>2,1@2..10^2.5", 0, fuzzyTerm);
executeSingleTokenTest("foo~>2,1^2.5@2..10", 0, fuzzyTerm);
SQPWildcardTerm wildcardTerm = new SQPWildcardTerm("fo?o");
wildcardTerm.setStartPosition(2);
wildcardTerm.setEndPosition(10);
executeSingleTokenTest("fo?o@2..10", 0, wildcardTerm);
//test @ not interpreted as range position elsewhere
expected = new SQPTerm("@yahoo", false);
executeSingleTokenTest("@yahoo", 0, expected);
expected = new SQPTerm("y@hoo", false);
executeSingleTokenTest("y@hoo", 0, expected);
expected = new SQPTerm("y@.hoo", false);
executeSingleTokenTest("y@.hoo", 0, expected);
expected = new SQPTerm("y@..hoo", false);
executeSingleTokenTest("y@..hoo", 0, expected);
expected = new SQPTerm("y@10.hoo", false);
executeSingleTokenTest("y@10.hoo", 0, expected);
//try single quotes
expected = new SQPTerm("yahoo@2..10", true);
executeSingleTokenTest("'yahoo@2..10'", 0, expected);
//need to escape ranges in middle of terms -- throw exception
//if a valid range is found in middle of term
testParseException("y@10..hoo");
testParseException("y@10..20hoo");
testParseException("y@..20hoo");
}
public void testSpanPositionRangeOnNear() throws Exception {
SQPNearClause expected = new SQPNearClause(1, 3, SQPClause.TYPE.BRACKET, null, null);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("[foo bar]@2..10", 0, expected);
expected.setBoost(2.5f);
executeSingleTokenTest("[foo bar]@2..10^2.5", 0, expected);
executeSingleTokenTest("[foo bar]^2.5@2..10", 0, expected);
expected = new SQPNearClause(1, 3, SQPClause.TYPE.BRACKET, true, null);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("[foo bar]~>@2..10", 0, expected);
expected = new SQPNearClause(1, 3, SQPClause.TYPE.BRACKET, true, 3);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("[foo bar]~>3@2..10", 0, expected);
expected = new SQPNearClause(1, 3, SQPClause.TYPE.QUOTE, null, null);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("\"foo bar\"@2..10", 0, expected);
expected.setBoost(2.5f);
executeSingleTokenTest("\"foo bar\"@2..10^2.5", 0, expected);
executeSingleTokenTest("\"foo bar\"^2.5@2..10", 0, expected);
expected = new SQPNearClause(1, 3, SQPClause.TYPE.QUOTE, true, null);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("\"foo bar\"~>@2..10", 0, expected);
expected = new SQPNearClause(1, 3, SQPClause.TYPE.QUOTE, true, 3);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest("\"foo bar\"~>3@2..10", 0, expected);
testParseException("[foo bar]@");
testParseException("[foo bar]@2");
testParseException("[foo bar]@2.");
testParseException("[foo bar]@..");
testParseException("\"foo bar\"@");
testParseException("\"foo bar\"@2");
testParseException("\"foo bar\"@2.");
testParseException("\"foo bar\"@..");
}
@Test
public void testOrSpanPositionRange() throws Exception {
SQPOrClause expected = new SQPOrClause(2, 5);
expected.setStartPosition(2);
expected.setEndPosition(10);
executeSingleTokenTest(
"the quick (brown fox)@2..10 jumped",
2,
expected
);
expected.setEndPosition(null);
executeSingleTokenTest(
"the quick (brown fox)@2.. jumped",
2,
expected
);
expected.setStartPosition(null);
expected.setEndPosition(10);
executeSingleTokenTest(
"the quick (brown fox)@..10 jumped",
2,
expected
);
}
/*
@Test(timeout = 1000)
public void testNonMatchingSingleQuote() throws Exception {
//test there isn't a permanent hang triggered by the non matching '
//Thanks to Modassar Ather for finding this!
String s = "SEARCH TOOL'S SOLUTION PROVIDER TECHNOLOGY CO., LTD";
executeSingleTokenTest(
s,
0,
new SQPTerm("SEARCH", false)
);
}
public void testQueryEndsInEscape() throws ParseException {
//Again, thanks to Modassar Ather for finding this!
String bad = "the quick \\";
testParseException(bad);
String ok = "the quick \\\\";
executeSingleTokenTest(
ok,
0,
new SQPTerm("the", false)
);
}
*/
private void executeSingleTokenTest(String q, int targetOffset, SQPToken truth)
throws ParseException {
List<SQPToken> tokens = lexer.getTokens(q);
SQPToken target = tokens.get(targetOffset);
if (truth instanceof SQPBoostableOrPositionRangeToken && target instanceof SQPBoostableOrPositionRangeToken) {
Float truthBoost = ((SQPBoostableOrPositionRangeToken) truth).getBoost();
Float targetBoost = ((SQPBoostableOrPositionRangeToken)target).getBoost();
if (truthBoost == null || targetBoost == null) {
assertEquals(truthBoost, targetBoost);
} else {
assertEquals(truthBoost, targetBoost, 0.0001f);
}
Integer truthStartPosition = ((SQPBoostableOrPositionRangeToken)truth).getStartPosition();
Integer targetStartPosition = ((SQPBoostableOrPositionRangeToken)target).getStartPosition();
assertEquals("start position range", truthStartPosition, targetStartPosition);
Integer truthEndPosition = ((SQPBoostableOrPositionRangeToken)truth).getEndPosition();
Integer targetEndPosition = ((SQPBoostableOrPositionRangeToken)target).getEndPosition();
assertEquals("end position range", truthEndPosition, targetEndPosition);
}
assertEquals(truth, target);
}
private void testParseException(String qString) {
boolean ex = false;
try {
executeSingleTokenTest(
qString,
0,
new SQPTerm("anything", false)
);
} catch (ParseException e) {
ex = true;
}
assertTrue("ParseException: " + qString, ex);
}
@Test
public void isolateTest() throws Exception {
debug("y@.hoo");
SQPTerm fox = new SQPTerm("fox", false);
fox.setBoost(10f);
executeSingleTokenTest(
"the quick [brown fox^10] jumped",
4,
fox
);
}
private void debug(String qString) throws ParseException {
List<SQPToken> tokens = lexer.getTokens(qString);
for (SQPToken t : tokens) {
System.out.println(t);
}
}
}