// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.statistics.frequency.recognition; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Set; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; public class LatinExtendedCharPatternRecognizerTest { @Before public void setUp() throws Exception { } @After public void tearDown() throws Exception { } @Test public void testRecognize() { LatinExtendedCharPatternRecognizer recognizer = new LatinExtendedCharPatternRecognizer(); // Assert empty RecognitionResult result = recognizer.recognize(""); Assert.assertFalse(result.isComplete()); Assert.assertEquals(Collections.singleton(""), result.getPatternStringSet()); // Assert blank and compare the result instance RecognitionResult result2 = recognizer.recognize(" "); Assert.assertFalse(result2.isComplete()); Assert.assertEquals(Collections.singleton(" "), result2.getPatternStringSet()); // Assert null RecognitionResult result3 = recognizer.recognize(null); Assert.assertFalse(result3.isComplete()); Assert.assertEquals(Collections.singleton(null), result3.getPatternStringSet()); // Assert correctness of Ascii character replacement. String chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞß0123456789"; //$NON-NLS-1$ String replChars = "aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA9999999999"; //$NON-NLS-1$ RecognitionResult result4 = recognizer.recognize(chars); Assert.assertTrue(result4.isComplete()); Assert.assertEquals(Collections.singleton(replChars), result4.getPatternStringSet()); // Assert incomplete when the chars including a none-ascii character "ィ". String charsWithNoneAscii = "abcィd"; //$NON-NLS-1$ String replCharsWithNoneAscii = "aaaィa"; //$NON-NLS-1$ RecognitionResult result5 = recognizer.recognize(charsWithNoneAscii); Assert.assertFalse(result5.isComplete()); Assert.assertEquals(Collections.singleton(replCharsWithNoneAscii), result5.getPatternStringSet()); // Assert incomplete when the chars including a none-ascii character "-". String charsWithDash = "abc-d"; //$NON-NLS-1$ String replCharsWithDash = "aaa-a"; //$NON-NLS-1$ RecognitionResult result6 = recognizer.recognize(charsWithDash); Assert.assertFalse(result6.isComplete()); Assert.assertEquals(Collections.singleton(replCharsWithDash), result6.getPatternStringSet()); // Assert more patterns Map<String, String> str2Pattern = new HashMap<>(); str2Pattern.put("*-!", "*-!"); str2Pattern.put("1-3", "9-9"); str2Pattern.put("2001-9-10 - 2009-09-08", "9999-9-99 - 9999-99-99"); testRecognition(str2Pattern); str2Pattern.clear(); str2Pattern.put("2001-8-20", "yyyy-M-d"); testDateRecognition(str2Pattern); } private void testRecognition(Map<String, String> str2Pattern) { LatinExtendedCharPatternRecognizer recognizer = new LatinExtendedCharPatternRecognizer(); Iterator<String> strIterator = str2Pattern.keySet().iterator(); while (strIterator.hasNext()) { String str = strIterator.next(); Set<String> pattern = recognizer.recognize(str).getPatternStringSet(); Assert.assertEquals(Collections.singleton(str2Pattern.get(str)), pattern); } } private void testDateRecognition(Map<String, String> str2Pattern) { DateTimePatternRecognizer recognizer = new DateTimePatternRecognizer(); Iterator<String> strIterator = str2Pattern.keySet().iterator(); while (strIterator.hasNext()) { String str = strIterator.next(); Set<String> pattern = recognizer.recognize(str).getPatternStringSet(); Assert.assertEquals(Collections.singleton(str2Pattern.get(str)), pattern); } } }