package semanticMarkup.ling.learn; import static org.junit.Assert.*; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import org.junit.Test; import semanticMarkup.ling.learn.knowledge.Constant; import semanticMarkup.ling.learn.utility.StringUtility; public class StringUtilityTest { @Test public void testStrip() { // Method strip assertEquals("strip", "word1 word2", StringUtility.strip("word1 <abc> word2")); assertEquals("strip", "word1 word2", StringUtility.strip("word1 <?abc?> word2")); assertEquals("strip", "word1 word2", StringUtility.strip("word1   word2")); } @Test public void testRemovePunctuation() { // Method removePunctuation assertEquals("removePunctuation", "word word word wo-rd cant Id end", StringUtility.removePunctuation("word word, word&$% wo-rd can't I'd end.","-")); } @Test public void testTrimString() { // Method trimString assertEquals("trimString head", "word", StringUtility.trimString(" word")); assertEquals("trimString tail", "word", StringUtility.trimString("word ")); assertEquals("trimString head and tail", "word", StringUtility.trimString(" word ")); } @Test public void testProcessWord() { // Method processWord String word = "<word>word <\\iword>word word</word2>"; assertEquals("processWord", "word word word", StringUtility.processWord(word)); assertEquals("processWord", "word word word", StringUtility.processWord(" word word word")); assertEquals("processWord", "word word word", StringUtility.processWord("word word word ")); } @Test public void testRemoveAll() { Constant myConstant = new Constant(); // Method removeAll assertEquals("removeAll - begin", "word word ", StringUtility.removeAll(" word word ", "^\\s+")); assertEquals("removeAll - end", "word|word", StringUtility.removeAll("word|word|", "\\|+$")); assertEquals("removeAll - all", "wordword", StringUtility.removeAll("|word|word|", "\\|")); assertEquals("removeAll - remove beginning", "word", StringUtility.removeAll("above word","^("+myConstant.STOP+"|"+myConstant.FORBIDDEN+")\\b\\s*")); assertEquals("removeAll - remove ending 1", "word1 word2", StringUtility.removeAll("word1 word2 or","\\s*\\b("+myConstant.STOP+"|"+myConstant.FORBIDDEN+"|\\w+ly)$")); assertEquals("removeAll - remove ending 2", "word1 word2", StringUtility.removeAll("word1 word2 usually","\\s*\\b("+myConstant.STOP+"|"+myConstant.FORBIDDEN+"|\\w+ly)$")); assertEquals("removeAll - remove middle pronouns", "word1 word2", StringUtility.removeAll("word1 each word2","\\b("+myConstant.PRONOUN+")\\b")); assertEquals("removeAll - remove beginning and ending", "word", StringUtility.removeAll(" word ","(^\\s*|\\s*$)")); } @Test public void testIsWord() { // Method isWord assertEquals("isWord - Length not > 1", false, StringUtility.isWord("a")); assertEquals("isWord - not all word characters", false, StringUtility.isWord("%^")); assertEquals("isWord - all word characters", true, StringUtility.isWord("ab")); assertEquals("isWord - STOP word", false, StringUtility.isWord("state")); assertEquals("isWord - STOP word", false, StringUtility.isWord("page")); assertEquals("isWord - STOP word", false, StringUtility.isWord("fig")); } @Test public void testIsMatchedWords() { Constant myConstant = new Constant(); assertEquals("isMatchedWords", true, StringUtility.isMatchedWords("and", myConstant.FORBIDDEN)); assertEquals("isMatchedWords", false, StringUtility.isMatchedWords("kahgds", myConstant.FORBIDDEN)); } @Test public void testRemoveFromWordList() { // assertEquals( // "removeFromWordList", // "ab|ad|bi|deca|dis|di|dodeca|endo|end|e|hemi|hetero|hexa|homo|infra|inter|ir|macro|mega|meso|micro|mid|mono|multi|ob|octo|over|penta|poly|postero|post|ptero|pseudo|quadri|quinque|semi|sub|sur|syn|tetra|tri|uni|un|xero|[a-z0-9]+_", // StringUtility // .removeFromWordList( // "de", // "ab|ad|bi|deca|de|dis|di|dodeca|endo|end|e|hemi|hetero|hexa|homo|infra|inter|ir|macro|mega|meso|micro|mid|mono|multi|ob|octo|over|penta|poly|postero|post|ptero|pseudo|quadri|quinque|semi|sub|sur|syn|tetra|tri|uni|un|xero|[a-z0-9]+_")); assertEquals( "removeFromWordList", "ad|bi|deca|de|dis|di|dodeca|endo|end|e|hemi|hetero|hexa|homo|infra|inter|ir|macro|mega|meso|micro|mid|mono|multi|ob|octo|over|penta|poly|postero|post|ptero|pseudo|quadri|quinque|semi|sub|sur|syn|tetra|tri|uni|un|xero|[a-z0-9]+_", StringUtility .removeFromWordList( "ab", "ab|ad|bi|deca|de|dis|di|dodeca|endo|end|e|hemi|hetero|hexa|homo|infra|inter|ir|macro|mega|meso|micro|mid|mono|multi|ob|octo|over|penta|poly|postero|post|ptero|pseudo|quadri|quinque|semi|sub|sur|syn|tetra|tri|uni|un|xero|[a-z0-9]+_")); assertEquals( "removeFromWordList", "above|across|after|along|around|as|at|before|below|beneath|between|beyond|by|during|for|from|in|into|near|of|off|on|onto|out|outside|over|than|through|throughout|toward|towards|up|upward|with", StringUtility .removeFromWordList( "without", "above|across|after|along|around|as|at|before|below|beneath|between|beyond|by|during|for|from|in|into|near|of|off|on|onto|out|outside|over|than|through|throughout|toward|towards|up|upward|with|without")); } /** while($modifier =~ /^($stop|$FORBIDDEN)\b/){ $modifier =~ s#^($stop|$FORBIDDEN)\b\s*##g; } while($tag =~ /^($stop|$FORBIDDEN)\b/){ $tag =~ s#^($stop|$FORBIDDEN)\b\s*##g; } #from ending while($modifier =~ /\b($stop|$FORBIDDEN|\w+ly)$/){ $modifier =~ s#\s*\b($stop|$FORBIDDEN|\w+ly)$##g; } while($tag =~ /\b($stop|$FORBIDDEN|\w+ly)$/){ $tag =~ s#\s*\b($stop|$FORBIDDEN|\w+ly)$##g; } */ @Test public void testRemoveAllRecursive() { assertEquals("removeAllRecursive - beginning", "word", StringUtility.removeAllRecursive("stop stop word", "^(stop)\\b\\s*")); assertEquals("removeAllRecursive - ending", "word", StringUtility.removeAllRecursive("word word1ly word2ly", "\\s*\\b\\w+ly$")); } // @Test // public void testEqualsWithNull(){ // assertEquals("equalsWithNull - null : null", true, StringUtility.equalsWithNull(null, null)); // assertEquals("equalsWithNull - null : not null", false, StringUtility.equalsWithNull(null, "s2")); // assertEquals("equalsWithNull - not null : null", false, StringUtility.equalsWithNull("s1", null)); // assertEquals("equalsWithNull - not null : not null - equal", true, StringUtility.equalsWithNull("abc", "abc")); // assertEquals("equalsWithNull - not null : not null - not equal", false, StringUtility.equalsWithNull("s1", "s2")); // } @Test public void testStringArray2String() { assertEquals("stringArray2String", "teeth unicuspid with", StringUtility.stringArray2String(("teeth unicuspid with".split(" ")))); } @Test public void testStringArraySplice() { List<String> target1 = new ArrayList<String>(); target1.addAll(Arrays.asList("hyohyoidei muscle".split(" "))); assertEquals("stringArraySplice", target1, StringUtility.stringArraySplice(Arrays.asList("hyohyoidei muscle".split(" ")), 0, 2)); } @Test public void testJoinList(){ List<String> input = new ArrayList<String>(); input.addAll(Arrays.asList("word1 word2 word3".split(" "))); String sep = "+++"; assertEquals("stringArraySplice", "word1+++word2+++word3", StringUtility.joinList(sep, input)); } @Test public void testIsEntireMatchedNullSafe(){ assertEquals("not match - pattern null", false, StringUtility.isEntireMatchedNullSafe("[abc]", null)); assertEquals("not match - text null", false, StringUtility.isEntireMatchedNullSafe(null, "^\\[.*$")); assertEquals("not match - pattern empty", false, StringUtility.isEntireMatchedNullSafe("[abc]", "")); assertEquals("not match - text empty", false, StringUtility.isEntireMatchedNullSafe("", "^\\[.*$")); assertEquals("not match", false, StringUtility.isEntireMatchedNullSafe("abc", "^\\z.*$")); assertEquals("match", true, StringUtility.isEntireMatchedNullSafe("[abc]", "^\\[.*$")); } @Test public void testReplaceAllBackreference(){ assertEquals("addHeadTailSpace", "word1 , word2 . word3 ! word4 . ", StringUtility.replaceAllBackreference("word1, word2. word3! word4.", "(\\W)", " $1 ")); } @Test public void testSetSub(){ Set<String> a = new HashSet<String>(); a.add("word1"); a.add("word2"); a.add("word3"); Set<String> b = new HashSet<String>(); b.add("word2"); b.add("word3"); b.add("word4"); Set<String> c = new HashSet<String>(); c.add("word1"); assertEquals("setSub", c, StringUtility.setSubtraction(a, b)); assertEquals("setSub", a, StringUtility.setSubtraction(a, null)); assertEquals("setSub", null, StringUtility.setSubtraction(null, b)); } @Test public void testEscapePerlRegex() { assertEquals("escapePerlRegex - null case", null, StringUtility.escapePerlRegex(null)); assertEquals("escapePerlRegex - + case", "word \\\\+ word", StringUtility.escapePerlRegex("word \\+ word")); assertEquals("escapePerlRegex - ? case", "word \\\\? word", StringUtility.escapePerlRegex("word \\? word")); assertEquals("escapePerlRegex - * case", "word \\\\* word", StringUtility.escapePerlRegex("word \\* word")); assertEquals("escapePerlRegex - ( case", "word \\\\( word", StringUtility.escapePerlRegex("word \\( word")); assertEquals("escapePerlRegex - | case", "word \\\\| word", StringUtility.escapePerlRegex("word \\| word")); } }