// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.converters;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
/**
* Test for class {@link DuplicateCharEraser}.
*
* @author qiongli
* @version 2017.03.30
*/
public class DuplicateCharEraserTest {
@Test
public void testRemoveDuplicateCR() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "a\rbccccdeaa\r\r\ry"; //$NON-NLS-1$
assertEquals("a\rbccccdeaa\ry", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateLF() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "a\nbccccdeaa\n\n\ny"; //$NON-NLS-1$
assertEquals("a\nbccccdeaa\ny", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateCRLF() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "a\r\nbccccdeaa\r\n\r\n\r\ny"; //$NON-NLS-1$
assertEquals("a\r\nbccccdeaa\r\ny", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateTAB() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "a\tbccccdeaa\t\t\t\t\t\ty"; //$NON-NLS-1$
assertEquals("a\tbccccdeaa\ty", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateLetter() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser('c');
String input = "atbccccdeaaCCtcy"; //$NON-NLS-1$
assertEquals("atbcdeaaCCtcy", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('a');
input = "aaatbccccdeaaCCtcy"; //$NON-NLS-1$
assertEquals("atbccccdeaCCtcy", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
input = "acacacactbccccdeaCCtaccy"; //$NON-NLS-1$
input = "abcdef"; //$NON-NLS-1$
assertEquals("abcdef", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateNumber() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser('1');
String input = "011111123"; //$NON-NLS-1$
assertEquals("0123", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('3');
input = "apple 12333"; //$NON-NLS-1$
assertEquals("apple 123", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateNull1() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser('c');
String input = null;
assertEquals(null, duplicateCharEraser.removeRepeatedChar(input));
input = ""; //$NON-NLS-1$
assertEquals("", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testRemoveDuplicateNull2() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "aaabc"; //$NON-NLS-1$
assertEquals(input, duplicateCharEraser.removeRepeatedChar(input));
duplicateCharEraser = new DuplicateCharEraser(' ');
assertEquals(input, duplicateCharEraser.removeRepeatedChar(input));
duplicateCharEraser = new DuplicateCharEraser();
assertEquals(input, duplicateCharEraser.removeRepeatedChar(input));
}
@Test
public void testRemoveWhiteSpace() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "a b\t\t\tc\n\n\nd\r\re\f\ff"; //$NON-NLS-1$
String cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertEquals("a b\tc\nd\re\ff", cleanStr); //$NON-NLS-1$
input = "aaab\r\n\r\n\r\nx"; //$NON-NLS-1$
cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertEquals("aaab\r\nx", cleanStr); //$NON-NLS-1$
input = "a\u0085\u0085\u0085b\u00A0\u00A0c\u1680\u1680d\u180E\u180Ee\u2000\u2000f\u2001\u2001g\u2002\u2002h\u2003\u2003i\u2004\u2004"; //$NON-NLS-1$
cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertEquals("a\u0085b\u00A0c\u1680d\u180Ee\u2000f\u2001g\u2002h\u2003i\u2004", cleanStr); //$NON-NLS-1$
input = "a\u2005\u2005\u2005b\u2006\u2006c\u2007\u2007d\u2008\u2008e\u2009\u2009f\u200A\u200Ag\u2028\u2028h\u2029\u2029i\u202F\u202Fj\u205F\u205Fk\u3000\u3000l"; //$NON-NLS-1$
cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertEquals("a\u2005b\u2006c\u2007d\u2008e\u2009f\u200Ag\u2028h\u2029i\u202Fj\u205Fk\u3000l", cleanStr); //$NON-NLS-1$
}
@Test
public void testRemoveWhiteSpaceNull() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = ""; //$NON-NLS-1$
String cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertEquals("", cleanStr); //$NON-NLS-1$
input = null;
cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertNull(cleanStr);
}
@Test
public void testRemoveWhiteSpacWithoutSpace() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser();
String input = "abccdef"; //$NON-NLS-1$
String cleanStr = duplicateCharEraser.removeRepeatedChar(input);
assertEquals("abccdef", cleanStr); //$NON-NLS-1$
}
@Test
/**
* test specail cahrs('|','(','}','[',']','+','^') in regex.
*/
public void testRemoveSpecialCharacter() {
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser(')');
String input = "Gooooalllll))))]]]]]]++++++[[[^^^\\\\(((|||"; //$NON-NLS-1$
assertEquals("Gooooalllll)]]]]]]++++++[[[^^^\\\\(((|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser(']');
assertEquals("Gooooalllll))))]++++++[[[^^^\\\\(((|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('+');
assertEquals("Gooooalllll))))]]]]]]+[[[^^^\\\\(((|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('\\');
assertEquals("Gooooalllll))))]]]]]]++++++[[[^^^\\(((|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('^');
assertEquals("Gooooalllll))))]]]]]]++++++[[[^\\\\(((|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('[');
assertEquals("Gooooalllll))))]]]]]]++++++[^^^\\\\(((|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('(');
assertEquals("Gooooalllll))))]]]]]]++++++[[[^^^\\\\(|||", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
duplicateCharEraser = new DuplicateCharEraser('|');
assertEquals("Gooooalllll))))]]]]]]++++++[[[^^^\\\\(((|", duplicateCharEraser.removeRepeatedChar(input)); //$NON-NLS-1$
}
@Test
public void testSeveralCharsShouldBeDeduplicated() {
// given
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser("abc");
String input = "abcabcabc";
// when
String cleanStr = duplicateCharEraser.removeRepeatedChar(input);
// then
assertEquals("abc", cleanStr);
}
@Test
public void testMustAcceptRegexSpecialChars() {
// given
DuplicateCharEraser duplicateCharEraser = new DuplicateCharEraser("a*({].");
String input = "a*({].a*({].a*({].a*({].";
// when
String cleanStr = duplicateCharEraser.removeRepeatedChar(input);
// then
assertEquals("a*({].", cleanStr);
}
}