/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pig.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import java.io.IOException; import org.apache.pig.EvalFunc; import org.apache.pig.builtin.INDEXOF; import org.apache.pig.builtin.LAST_INDEX_OF; import org.apache.pig.builtin.REPLACE; import org.apache.pig.builtin.STARTSWITH; import org.apache.pig.builtin.ENDSWITH; import org.apache.pig.builtin.STRSPLIT; import org.apache.pig.builtin.STRSPLITTOBAG; import org.apache.pig.builtin.SUBSTRING; import org.apache.pig.builtin.TRIM; import org.apache.pig.builtin.LTRIM; import org.apache.pig.builtin.RTRIM; import org.apache.pig.builtin.EqualsIgnoreCase; import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.junit.Test; public class TestStringUDFs { private static final EvalFunc<String> stringSubstr_ = new SUBSTRING(); @Test public void testStringSubstr() throws IOException { Tuple testTuple = Util.buildTuple(null, 0, 2); assertNull("null is null", stringSubstr_.exec(testTuple)); testTuple = Util.buildTuple("", 0, 2); assertEquals("empty string", "", stringSubstr_.exec(testTuple)); testTuple = Util.buildTuple("abcde", 1, 3); assertEquals("lowercase string", "bc", stringSubstr_.exec(testTuple)); testTuple = Util.buildTuple("abc", 0, 15); assertEquals("uppercase string", "abc", stringSubstr_.exec(testTuple)); } @Test public void testStringSubstr_BI_EQ_EI() throws IOException { Tuple testTuple = Util.buildTuple("abc", 0, 0); assertEquals("Testing SUBSTIRNG, beginindex == endindex", "", stringSubstr_.exec(testTuple)); } @Test public void testStringSubstr_BI_LT_EI() throws IOException { Tuple testTuple = Util.buildTuple("abc", -2, 2); assertEquals("Testing SUBSTIRNG, beginindex < endindex", null, stringSubstr_.exec(testTuple)); } @Test public void testStringSubstr_BI_LT_ZERO() throws IOException { Tuple testTuple = Util.buildTuple("abc", -1, 2); assertEquals("Testing SUBSTIRNG, beginindex < 0", null, stringSubstr_.exec(testTuple)); } @Test public void testStringSubstr_BI_GT_EI() throws IOException { Tuple testTuple = Util.buildTuple("abc", 10, 2); assertEquals("Testing SUBSTIRNG, beginindex > endindex", null, stringSubstr_.exec(testTuple)); } @Test public void testStringSubstr_EI_LT_ZERO() throws IOException { Tuple testTuple = Util.buildTuple("abc", 0, -2); assertEquals("Testing SUBSTIRNG, endindex < 0", null, stringSubstr_.exec(testTuple)); } @Test public void testIndexOf() throws IOException { INDEXOF indexOf = new INDEXOF(); Tuple testTuple = Util.buildTuple("xyz", ""); assertEquals( ((Integer) "xyz".indexOf("")), indexOf.exec(testTuple)); testTuple = Util.buildTuple(null, null); assertNull(indexOf.exec(testTuple)); testTuple = Util.buildTuple("xyz", "y"); assertEquals( ((Integer) "xyz".indexOf("y")), indexOf.exec(testTuple)); testTuple = Util.buildTuple("xyz", "abc"); assertEquals( ((Integer) "xyz".indexOf("abc")), indexOf.exec(testTuple)); } @Test public void testLastIndexOf() throws IOException { LAST_INDEX_OF lastIndexOf = new LAST_INDEX_OF(); Tuple testTuple = Util.buildTuple("xyz", ""); assertEquals( ((Integer) "xyz".lastIndexOf("")), lastIndexOf.exec(testTuple)); testTuple = Util.buildTuple(null, null); assertNull(lastIndexOf.exec(testTuple)); testTuple = Util.buildTuple("xyzyy", "y"); assertEquals( ((Integer) "xyzyy".lastIndexOf("y")), lastIndexOf.exec(testTuple)); testTuple = Util.buildTuple("xyz", "abc"); assertEquals( ((Integer) "xyz".lastIndexOf("abc")), lastIndexOf.exec(testTuple)); } @Test public void testReplace() throws IOException { REPLACE replace = new REPLACE(); Tuple testTuple = Util.buildTuple("foobar", "z", "x"); assertEquals("foobar".replace("z", "x"), replace.exec(testTuple)); // Use cached version of pattern in REPLACE assertEquals("foobar".replace("z", "x"), replace.exec(testTuple)); testTuple = Util.buildTuple("foobar", "oo", "aa"); assertEquals("foobar".replace("oo", "aa"), replace.exec(testTuple)); } @Test public void testTrim() throws IOException { TRIM trim = new TRIM(); Tuple testTuple = Util.buildTuple("nospaces"); assertEquals("nospaces".trim(), trim.exec(testTuple)); testTuple = Util.buildTuple("spaces right "); assertEquals("spaces right", trim.exec(testTuple)); testTuple = Util.buildTuple(" spaces left"); assertEquals("spaces left", trim.exec(testTuple)); testTuple = Util.buildTuple(" spaces both "); assertEquals("spaces both", trim.exec(testTuple)); testTuple = TupleFactory.getInstance().newTuple(); assertNull(trim.exec(testTuple)); } @Test public void testLtrim() throws IOException { LTRIM trim = new LTRIM(); Tuple testTuple = Util.buildTuple("nospaces"); assertEquals("nospaces", trim.exec(testTuple)); testTuple = Util.buildTuple("spaces right "); assertEquals("spaces right ", trim.exec(testTuple)); testTuple = Util.buildTuple(" spaces left"); assertEquals("spaces left", trim.exec(testTuple)); testTuple = Util.buildTuple(" spaces both "); assertEquals("spaces both ", trim.exec(testTuple)); testTuple = TupleFactory.getInstance().newTuple(); assertNull(trim.exec(testTuple)); } @Test public void testRtrim() throws IOException { RTRIM trim = new RTRIM(); Tuple testTuple = Util.buildTuple("nospaces"); assertEquals("nospaces", trim.exec(testTuple)); testTuple = Util.buildTuple("spaces right "); assertEquals("spaces right", trim.exec(testTuple)); testTuple = Util.buildTuple(" spaces left"); assertEquals(" spaces left", trim.exec(testTuple)); testTuple = Util.buildTuple(" spaces both "); assertEquals(" spaces both", trim.exec(testTuple)); testTuple = TupleFactory.getInstance().newTuple(); assertNull(trim.exec(testTuple)); } @Test public void testSplit() throws IOException { STRSPLIT splitter = new STRSPLIT(); // test no delims Tuple testTuple = Util.buildTuple("foo", ":"); testTuple.set(0, "foo"); testTuple.set(1, ":"); Tuple splits = splitter.exec(testTuple); assertEquals("no matches should return tuple with original string", 1, splits.size()); assertEquals("no matches should return tuple with original string", "foo", splits.get(0)); // test default delimiter testTuple = Util.buildTuple("f ooo bar"); splits = splitter.exec(testTuple); assertEquals("split on default value ", 3, splits.size()); assertEquals("f", splits.get(0)); assertEquals("ooo", splits.get(1)); assertEquals("bar", splits.get(2)); // test trimming of whitespace testTuple = Util.buildTuple("foo bar "); splits = splitter.exec(testTuple); assertEquals("whitespace trimmed if no length arg", 2, splits.size()); // test forcing null matches with length param testTuple = Util.buildTuple("foo bar ", "\\s", 10); splits = splitter.exec(testTuple); assertEquals("length forces empty string matches on end", 5, splits.size()); // test limiting results with limit testTuple = Util.buildTuple("foo:bar:baz", ":", 2); splits = splitter.exec(testTuple); assertEquals(2, splits.size()); assertEquals("foo", splits.get(0)); assertEquals("bar:baz", splits.get(1)); } @Test public void testSplitToBag() throws IOException { STRSPLITTOBAG bagSplit = new STRSPLITTOBAG(); //test no delims in input Tuple testTuple = Util.buildTuple("1 2 3", "4"); DataBag outputBag = bagSplit.exec(testTuple); assertEquals("No of records split should be 1", 1, outputBag.size()); assertEquals("Split string should match the input string", "(1 2 3)", outputBag.iterator().next().toString()); //test default delimiter testTuple = Util.buildTuple("1 2 3"); outputBag = bagSplit.exec(testTuple); String[] assertionArray = {"1", "2", "3"}; assertEquals("No of record split should be " + assertionArray.length, assertionArray.length, outputBag.size()); int i = 0; for (Tuple t : outputBag) { assertEquals("Assertion tests on split strings", "(" + assertionArray[i] + ")", t.toString()); i++; } //test split on specified delimiter testTuple = Util.buildTuple("1:2:3", ":"); outputBag = bagSplit.exec(testTuple); assertEquals("No of record split should be " + assertionArray.length, assertionArray.length, outputBag.size()); i = 0; for (Tuple t : outputBag) { assertEquals("Assertion tests on split strings", "(" + assertionArray[i] + ")", t.toString()); i++; } // test limiting results with limit testTuple = Util.buildTuple("1:2:3", ":", 2); outputBag = bagSplit.exec(testTuple); assertionArray = new String[]{"1", "2:3"}; assertEquals("No of record split should be " + assertionArray.length, assertionArray.length, outputBag.size()); i = 0; for (Tuple t : outputBag) { assertEquals("Matched records in split results with limit", "(" + assertionArray[i] + ")", t.toString()); i++; } // test trimming of whitespace testTuple = Util.buildTuple("1 2 "); outputBag = bagSplit.exec(testTuple); assertionArray = new String[]{"1", "2"}; assertEquals("No of record split should be " + assertionArray.length, assertionArray.length, outputBag.size()); i = 0; for (Tuple t : outputBag) { assertEquals("Matched records in split results with trimming of whitespaces", "(" + assertionArray[i] + ")", t.toString()); i++; } // test forcing null matches with length param testTuple = Util.buildTuple("1:2:::", ":", 10); outputBag = bagSplit.exec(testTuple); assertionArray = new String[]{"1", "2", "", "", ""}; assertEquals("No of record split should be " + assertionArray.length, assertionArray.length, outputBag.size()); i = 0; for (Tuple t : outputBag) { assertEquals("Matched records in split results with forcing null matched with limit", "(" + assertionArray[i] + ")", t.toString()); i++; } //test wrong schemas testTuple = Util.buildTuple(1, 2, 3); outputBag = bagSplit.exec(testTuple); assertEquals("Wrong Schema checks", null, outputBag); } @Test public void testStartsWith() throws IOException { STARTSWITH startsWith = new STARTSWITH(); Tuple testTuple1 = Util.buildTuple("foo", "bar"); assertFalse("String prefix should not match", startsWith.exec(testTuple1)); Tuple testTuple2 = Util.buildTuple("foobaz", "foo"); assertTrue("String prefix should match", startsWith.exec(testTuple2)); } @Test public void testEndsWith() throws IOException { ENDSWITH endsWith = new ENDSWITH(); Tuple testTuple1 = Util.buildTuple("foo", "bar"); assertFalse("String suffix should not match", endsWith.exec(testTuple1)); Tuple testTuple2 = Util.buildTuple("foobaz", "foo"); assertFalse("String suffix should not match", endsWith.exec(testTuple2)); Tuple testTuple3 = Util.buildTuple("foobaz", "baz"); assertTrue("String suffix should match", endsWith.exec(testTuple3)); Tuple testTuple4 = Util.buildTuple(null, "bar"); assertNull("Should return null", endsWith.exec(testTuple4)); } @Test public void testEqualsIgnoreCase() throws IOException { EqualsIgnoreCase equalsIgnoreCase = new EqualsIgnoreCase (); Tuple testTuple = Util.buildTuple("ABC","abc"); assertEquals("Strings are NOT equalsIgnoreCase", "ABC".equalsIgnoreCase("abc"), equalsIgnoreCase.exec(testTuple)); testTuple = Util.buildTuple("ABC", "aBC"); assertEquals("strings are NOT equalsIgnoreCase", "ABC".equalsIgnoreCase("aBC"), equalsIgnoreCase.exec(testTuple)); testTuple = Util.buildTuple("abc", "abc"); assertEquals("strings are NOT equalsIgnoreCase", "abc".equalsIgnoreCase("abc"), equalsIgnoreCase.exec(testTuple)); testTuple = Util.buildTuple("abcd", "abc"); assertEquals("strings are NOT equalsIgnoreCase", "abcd".equalsIgnoreCase("abc"), equalsIgnoreCase.exec(testTuple)); } }