/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.expr.fn.impl;
import static org.junit.Assert.assertTrue;
import mockit.Mock;
import mockit.MockUp;
import mockit.integration.junit4.JMockit;
import org.apache.calcite.util.ConversionUtil;
import org.apache.calcite.util.Util;
import org.apache.commons.io.FileUtils;
import org.apache.drill.BaseTestQuery;
import org.apache.drill.exec.util.Text;
import org.junit.Ignore;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
import org.junit.runner.RunWith;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.nio.charset.Charset;
@RunWith(JMockit.class)
public class TestStringFunctions extends BaseTestQuery {
@Test
public void testStrPosMultiByte() throws Exception {
testBuilder()
.sqlQuery("select `position`('a', 'abc') res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues(1L)
.go();
testBuilder()
.sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues(1L)
.go();
}
@Test
public void testSplitPart() throws Exception {
testBuilder()
.sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 1) res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues("abc")
.go();
testBuilder()
.sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 2) res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues("def")
.go();
// invalid index
boolean expectedErrorEncountered;
try {
testBuilder()
.sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 0) res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues("abc")
.go();
expectedErrorEncountered = false;
} catch (Exception ex) {
assertTrue(ex.getMessage().contains("Index in split_part must be positive, value provided was 0"));
expectedErrorEncountered = true;
}
if (!expectedErrorEncountered) {
throw new RuntimeException("Missing expected error on invalid index for split_part function");
}
// with a multi-byte splitter
testBuilder()
.sqlQuery("select split_part('abc\\u1111drill\\u1111ghi', '\\u1111', 2) res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues("drill")
.go();
// going beyond the last available index, returns empty string
testBuilder()
.sqlQuery("select split_part('a,b,c', ',', 4) res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues("")
.go();
// if the delimiter does not appear in the string, 1 returns the whole string
testBuilder()
.sqlQuery("select split_part('a,b,c', ' ', 1) res1 from (values(1))")
.ordered()
.baselineColumns("res1")
.baselineValues("a,b,c")
.go();
}
@Test
public void testRegexpMatches() throws Exception {
testBuilder()
.sqlQuery("select regexp_matches(a, '^a.*') res1, regexp_matches(b, '^a.*') res2 " +
"from (values('abc', 'bcd'), ('bcd', 'abc')) as t(a,b)")
.unOrdered()
.baselineColumns("res1", "res2")
.baselineValues(true, false)
.baselineValues(false, true)
.build()
.run();
}
@Test
public void testRegexpMatchesNonAscii() throws Exception {
testBuilder()
.sqlQuery("select regexp_matches(a, 'München') res1, regexp_matches(b, 'AMünchenA') res2 " +
"from (values('München', 'MünchenA'), ('MünchenA', 'AMünchenA')) as t(a,b)")
.unOrdered()
.baselineColumns("res1", "res2")
.baselineValues(true, false)
.baselineValues(false, true)
.build()
.run();
}
@Test
public void testRegexpReplace() throws Exception {
testBuilder()
.sqlQuery("select regexp_replace(a, 'a|c', 'x') res1, regexp_replace(b, 'd', 'zzz') res2 " +
"from (values('abc', 'bcd'), ('bcd', 'abc')) as t(a,b)")
.unOrdered()
.baselineColumns("res1", "res2")
.baselineValues("xbx", "bczzz")
.baselineValues("bxd", "abc")
.build()
.run();
}
@Test
public void testILike() throws Exception {
testBuilder()
.sqlQuery("select n_name from cp.`tpch/nation.parquet` where ilike(n_name, '%united%') = true")
.unOrdered()
.baselineColumns("n_name")
.baselineValues("UNITED STATES")
.baselineValues("UNITED KINGDOM")
.build()
.run();
}
@Test
public void testILikeEscape() throws Exception {
testBuilder()
.sqlQuery("select a from (select concat(r_name , '_region') a from cp.`tpch/region.parquet`) where ilike(a, 'asia#_region', '#') = true")
.unOrdered()
.baselineColumns("a")
.baselineValues("ASIA_region")
.build()
.run();
}
@Test
public void testSubstr() throws Exception {
testBuilder()
.sqlQuery("select substr(n_name, 'UN.TE.') a from cp.`tpch/nation.parquet` where ilike(n_name, 'united%') = true")
.unOrdered()
.baselineColumns("a")
.baselineValues("UNITED")
.baselineValues("UNITED")
.build()
.run();
}
@Test
public void testLpadTwoArgConvergeToLpad() throws Exception {
final String query_1 = "SELECT lpad(r_name, 25) \n" +
"FROM cp.`tpch/region.parquet`";
final String query_2 = "SELECT lpad(r_name, 25, ' ') \n" +
"FROM cp.`tpch/region.parquet`";
testBuilder()
.sqlQuery(query_1)
.unOrdered()
.sqlBaselineQuery(query_2)
.build()
.run();
}
@Test
public void testRpadTwoArgConvergeToRpad() throws Exception {
final String query_1 = "SELECT rpad(r_name, 25) \n" +
"FROM cp.`tpch/region.parquet`";
final String query_2 = "SELECT rpad(r_name, 25, ' ') \n" +
"FROM cp.`tpch/region.parquet`";
testBuilder()
.sqlQuery(query_1)
.unOrdered()
.sqlBaselineQuery(query_2)
.build()
.run();
}
@Test
public void testLtrimOneArgConvergeToLtrim() throws Exception {
final String query_1 = "SELECT ltrim(concat(' ', r_name, ' ')) \n" +
"FROM cp.`tpch/region.parquet`";
final String query_2 = "SELECT ltrim(concat(' ', r_name, ' '), ' ') \n" +
"FROM cp.`tpch/region.parquet`";
testBuilder()
.sqlQuery(query_1)
.unOrdered()
.sqlBaselineQuery(query_2)
.build()
.run();
}
@Test
public void testRtrimOneArgConvergeToRtrim() throws Exception {
final String query_1 = "SELECT rtrim(concat(' ', r_name, ' ')) \n" +
"FROM cp.`tpch/region.parquet`";
final String query_2 = "SELECT rtrim(concat(' ', r_name, ' '), ' ') \n" +
"FROM cp.`tpch/region.parquet`";
testBuilder()
.sqlQuery(query_1)
.unOrdered()
.sqlBaselineQuery(query_2)
.build()
.run();
}
@Test
public void testBtrimOneArgConvergeToBtrim() throws Exception {
final String query_1 = "SELECT btrim(concat(' ', r_name, ' ')) \n" +
"FROM cp.`tpch/region.parquet`";
final String query_2 = "SELECT btrim(concat(' ', r_name, ' '), ' ') \n" +
"FROM cp.`tpch/region.parquet`";
testBuilder()
.sqlQuery(query_1)
.unOrdered()
.sqlBaselineQuery(query_2)
.build()
.run();
}
@Test
public void testSplit() throws Exception {
testBuilder()
.sqlQuery("select split(n_name, ' ') words from cp.`tpch/nation.parquet` where n_nationkey = 24")
.unOrdered()
.baselineColumns("words")
.baselineValues(ImmutableList.of(new Text("UNITED"), new Text("STATES")))
.build()
.run();
}
@Test
public void testReverse() throws Exception {
testBuilder()
.sqlQuery("select reverse('qwerty') words from (values(1))")
.unOrdered()
.baselineColumns("words")
.baselineValues("ytrewq")
.build()
.run();
}
@Test // DRILL-5424
public void testReverseLongVarChars() throws Exception {
File path = new File(BaseTestQuery.getTempDir("input"));
try {
path.mkdirs();
String pathString = path.toPath().toString();
try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(path, "table_with_long_varchars.json")))) {
for (int i = 0; i < 10; i++) {
writer.write("{ \"a\": \"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz\"}");
}
}
test("select reverse(a) from dfs_test.`%s/table_with_long_varchars.json` t", pathString);
} finally {
FileUtils.deleteQuietly(path);
}
}
@Test
public void testLower() throws Exception {
testBuilder()
.sqlQuery("select\n" +
"lower('ABC') col_upper,\n" +
"lower('abc') col_lower,\n" +
"lower('AbC aBc') col_space,\n" +
"lower('123ABC$!abc123.') as col_special,\n" +
"lower('') as col_empty,\n" +
"lower(cast(null as varchar(10))) as col_null\n" +
"from (values(1))")
.unOrdered()
.baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
.baselineValues("abc", "abc", "abc abc", "123abc$!abc123.", "", null)
.build()
.run();
}
@Test
public void testUpper() throws Exception {
testBuilder()
.sqlQuery("select\n" +
"upper('ABC')as col_upper,\n" +
"upper('abc') as col_lower,\n" +
"upper('AbC aBc') as col_space,\n" +
"upper('123ABC$!abc123.') as col_special,\n" +
"upper('') as col_empty,\n" +
"upper(cast(null as varchar(10))) as col_null\n" +
"from (values(1))")
.unOrdered()
.baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
.baselineValues("ABC", "ABC", "ABC ABC", "123ABC$!ABC123.", "", null)
.build()
.run();
}
@Test
public void testInitcap() throws Exception {
testBuilder()
.sqlQuery("select\n" +
"initcap('ABC')as col_upper,\n" +
"initcap('abc') as col_lower,\n" +
"initcap('AbC aBc') as col_space,\n" +
"initcap('123ABC$!abc123.') as col_special,\n" +
"initcap('') as col_empty,\n" +
"initcap(cast(null as varchar(10))) as col_null\n" +
"from (values(1))")
.unOrdered()
.baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
.baselineValues("Abc", "Abc", "Abc Abc", "123abc$!Abc123.", "", null)
.build()
.run();
}
@Ignore("DRILL-5477")
@Test
public void testMultiByteEncoding() throws Exception {
// mock calcite util method to return utf charset
// instead of setting saffron.default.charset at system level
new MockUp<Util>()
{
@Mock
Charset getDefaultCharset() {
return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME);
}
};
testBuilder()
.sqlQuery("select\n" +
"upper('привет')as col_upper,\n" +
"lower('ПРИВЕТ') as col_lower,\n" +
"initcap('приВЕТ') as col_initcap\n" +
"from (values(1))")
.unOrdered()
.baselineColumns("col_upper", "col_lower", "col_initcap")
.baselineValues("ПРИВЕТ", "привет", "Привет")
.build()
.run();
}
}