/*
* Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
* license agreements. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/
package io.crate.integrationtests;
import io.crate.action.sql.SQLActionException;
import io.crate.testing.UseJdbc;
import org.junit.Test;
import static org.hamcrest.core.Is.is;
@UseJdbc
public class RegexpIntegrationTest extends SQLTransportIntegrationTest {
private Setup setup = new Setup(sqlExecutor);
@Test
public void testRegexpMatchesIsNull() throws Exception {
execute("create table regex_test (i integer, s string) with (number_of_replicas=0)");
ensureYellow();
execute("insert into regex_test(i, s) values (?, ?)", new Object[][]{
new Object[]{1, "foo is first"},
new Object[]{2, "bar is second"},
new Object[]{3, "foobar is great"},
new Object[]{4, "crate is greater"},
new Object[]{5, "foo"},
new Object[]{6, null}
});
refresh();
execute("select i from regex_test where regexp_matches(s, 'is') is not null");
assertThat(response.rowCount(), is(4L));
execute("select i from regex_test where regexp_matches(s, 'is') is null");
assertThat(response.rowCount(), is(2L));
}
@Test
public void testRegexpReplaceIsNull() throws Exception {
execute("create table regex_test (i integer, s string) with (number_of_replicas=0)");
ensureYellow();
execute("insert into regex_test(i, s) values (?, ?)", new Object[][]{
new Object[]{1, "foo is first"},
new Object[]{2, "bar is second"},
new Object[]{3, "foobar is great"},
new Object[]{4, "crate is greater"},
new Object[]{5, "foo"},
new Object[]{6, null}
});
refresh();
execute("select i from regex_test where regexp_replace(s, 'is', 'was') is not null");
assertThat(response.rowCount(), is(5L));
execute("select i from regex_test where regexp_replace(s, 'is', 'was') is null");
assertThat(response.rowCount(), is(1L));
}
@Test
public void testInvalidPatternSyntax() throws Exception {
expectedException.expect(SQLActionException.class);
expectedException.expectMessage(String.format("Dangling meta character '+' near index 0%n" +
"+1234567890%n" +
"^"));
execute("create table phone (phone string) with (number_of_replicas=0)");
ensureYellow();
execute("insert into phone (phone) values (?)", new Object[][]{
new Object[]{"+1234567890"}
});
refresh();
execute("select * from phone where phone ~* '+1234567890'");
ensureYellow();
}
/**
* Test querying using regular expressions based on RegexpQuery,
* which in turn is based on the fast finite-state automata
* regular expression engine implementation `dk.brics.automaton`.
* <p>
* This engine is the default when using the regexp tilde operator `~`.
*
* @see {@link org.apache.lucene.search.RegexpQuery}
* @see {@link org.apache.lucene.util.automaton.RegExp}
* @see <a href="http://www.brics.dk/automaton/">http://www.brics.dk/automaton/</a>
* @see <a href="http://tusker.org/regex/regex_benchmark.html">http://tusker.org/regex/regex_benchmark.html</a>
*/
@Test
public void testRegexpMatchQueryOperatorFast() throws Exception {
this.setup.setUpLocations();
ensureGreen();
refresh();
execute("select distinct name from locations where name ~ '[A-Z][a-z0-9]+' order by name");
assertThat(response.rowCount(), is(5L));
assertThat((String) response.rows()[0][0], is("Aldebaran"));
assertThat((String) response.rows()[1][0], is("Algol"));
assertThat((String) response.rows()[2][0], is("Altair"));
assertThat((String) response.rows()[3][0], is("Argabuthon"));
assertThat((String) response.rows()[4][0], is("Bartledan"));
execute("select name from locations where name !~ '[A-Z][a-z0-9]+' order by name");
assertThat(response.rowCount(), is(8L));
assertThat((String) response.rows()[0][0], is(""));
assertThat((String) response.rows()[1][0], is("Allosimanius Syneca"));
assertThat((String) response.rows()[2][0], is("Alpha Centauri"));
assertThat((String) response.rows()[3][0], is("Arkintoofle Minor"));
assertThat((String) response.rows()[4][0], is("End of the Galaxy"));
assertThat((String) response.rows()[5][0], is("Galactic Sector QQ7 Active J Gamma"));
assertThat((String) response.rows()[6][0], is("North West Ripple"));
assertThat((String) response.rows()[7][0], is("Outer Eastern Rim"));
}
/**
* Test querying using regular expressions based on RegexQuery,
* which in turn uses the regular expression engine of the
* Java standard library.
* <p>
* This engine is active when using the case-insensitive regexp tilde operator `~*`.
*
* @see {@link org.apache.lucene.sandbox.queries.regex.RegexQuery}
* @see {@link java.util.regex}
*/
@Test
public void testRegexpMatchQueryOperatorWithCaseInsensitivity() throws Exception {
this.setup.setUpLocations();
ensureGreen();
refresh();
execute("select distinct name from locations where name ~* 'aldebaran'");
assertThat(response.rowCount(), is(1L));
assertThat((String) response.rows()[0][0], is("Aldebaran"));
execute("select distinct name from locations where name !~* 'aldebaran|algol|altair' and name != '' order by name");
assertThat(response.rowCount(), is(9L));
assertThat((String) response.rows()[0][0], is("Allosimanius Syneca"));
assertThat((String) response.rows()[1][0], is("Alpha Centauri"));
}
/**
* Test querying using regular expressions based on RegexQuery,
* which in turn uses the regular expression engine of the
* Java standard library.
* <p>
* This engine is active when using the regular regexp tilde operator `~`,
* but the pattern used contains PCRE features, which the fast regex
* implementation {@link org.apache.lucene.util.automaton.RegExp}
* isn't capable of.
*
* @see {@link org.apache.lucene.sandbox.queries.regex.RegexQuery}
* @see {@link java.util.regex}
*/
@Test
public void testRegexpMatchQueryOperatorWithPcre() throws Exception {
this.setup.setUpLocations();
ensureGreen();
refresh();
// character class shortcut aliases
execute("select distinct name from locations where name ~ 'Alpha\\sCentauri'");
assertThat(response.rowCount(), is(1L));
assertThat((String) response.rows()[0][0], is("Alpha Centauri"));
// word boundaries: positive
execute("select distinct name from locations where name ~ '.*\\bCentauri\\b.*'");
assertThat(response.rowCount(), is(1L));
assertThat((String) response.rows()[0][0], is("Alpha Centauri"));
// word boundaries: negative
execute("select distinct name from locations where name ~ '.*\\bauri\\b.*'");
assertThat(response.rowCount(), is(0L));
// embedded flag expressions
execute("select distinct name from locations where name ~ '(?i).*centauri.*'");
assertThat(response.rowCount(), is(1L));
assertThat((String) response.rows()[0][0], is("Alpha Centauri"));
execute("select count(name) from locations where name ~ '(?i).*centauri.*'");
assertThat(response.rowCount(), is(1L));
assertThat((Long) response.rows()[0][0], is(1L));
}
/**
* Same as above except that the code path is different as a countOperation is used for count(*) queries
*
* @see {@link org.elasticsearch.index.query.RegexpQueryParser}
* @see {@link org.elasticsearch.index.mapper.core.AbstractFieldMapper#regexpQuery}
*/
@Test
public void testRegexpMatchQueryOperatorWithPcreViaElasticSearchForCount() throws Exception {
this.setup.setUpLocations();
ensureYellow();
refresh();
execute("select count(*) from locations where name ~ '(?i).*centauri.*'");
assertThat(response.rowCount(), is(1L));
assertThat((Long) response.rows()[0][0], is(1L));
}
/**
* Same as above, running through the same code path for DELETE expressions.
*
* @see {@link org.elasticsearch.index.query.RegexpQueryParser}
* @see {@link org.elasticsearch.index.mapper.core.AbstractFieldMapper#regexpQuery}
*/
@Test
public void testRegexpMatchQueryOperatorWithPcreViaElasticSearchForDelete() throws Exception {
this.setup.setUpLocations();
ensureGreen();
refresh();
execute("delete from locations where name ~ '(?i).*centauri.*'");
assertThat(response.rowCount(), is(1L));
}
/**
* Also test ~ and ~* operators with PCRE features, but on system tables.
*/
@Test
public void testRegexpMatchQueryOperatorOnSysShards() throws Exception {
this.setup.setUpLocations();
ensureGreen();
refresh();
execute("select table_name, * from sys.shards where table_name ~ '(?i)LOCATIONS' order by table_name");
assertThat(response.rowCount(), is(2L));
assertThat((String) response.rows()[0][0], is("locations"));
}
@Test
public void testRegexpMatchQueryOperatorWithCaseInsensitivityOnSysShards() throws Exception {
this.setup.setUpLocations();
ensureGreen();
refresh();
execute("select table_name, * from sys.shards where table_name ~* 'LOCATIONS' order by table_name");
assertThat(response.rowCount(), is(2L));
assertThat((String) response.rows()[0][0], is("locations"));
}
}