// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.frequency.recognition;
import static org.junit.Assert.assertEquals;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.talend.dataquality.statistics.frequency.pattern.CompositePatternFrequencyAnalyzer;
import org.talend.dataquality.statistics.frequency.pattern.PatternFrequencyStatistics;
public class CustomDateTimePatternRecognizerTest {
@Before
public void setUp() throws Exception {
}
@After
public void tearDown() throws Exception {
}
@Test
public void testRecognize() {
ArrayList<AbstractPatternRecognizer> recognizers = new ArrayList<AbstractPatternRecognizer>();
recognizers.add(new EmptyPatternRecognizer());
DateTimePatternRecognizer recognizer = new DateTimePatternRecognizer();
recognizer.addCustomDateTimePattern("=d/M/yy=");
recognizers.add(recognizer);
recognizers.add(new LatinExtendedCharPatternRecognizer());
recognizers.add(new EastAsianCharPatternRecognizer());
CompositePatternFrequencyAnalyzer analyzer = new CompositePatternFrequencyAnalyzer(recognizers);
final List<String> DATETIME_TO_TEST = new ArrayList<String>() {
private static final long serialVersionUID = 1L;
{
add(" ");
add(" ");
add(null);
add("abc");
add("19 rue Pagès");
add("拓蓝科技");
add("2001-9-10");
add("2011-9-20");
add("2011-2-20");
add("2013-1-20");
add("=14/5/18="); // [custom: =d/M/yy=]
add("4/15/18");
add("4/5/2014");
add("02/03/2014");
add("22/03/2014");
}
};
final Map<String, Long> EXPECTED_PATTERN_MAP = new HashMap<String, Long>() {
private static final long serialVersionUID = 1L;
{
put("yyyy-M-d", 4L);
put("", 2L);
put("dd/MM/yyyy", 2L);
put("d/M/yyyy", 1L);
put("M/d/yyyy", 1L);
put("aaa", 1L);
put("99 aaa Aaaaa", 1L);
put("M/d/yy", 1L);
put("MM/dd/yyyy", 1L);
put("=d/M/yy=", 1L);
}
};
for (String str : DATETIME_TO_TEST) {
analyzer.analyze(str);
}
List<PatternFrequencyStatistics> statsList = analyzer.getResult();
PatternFrequencyStatistics stats = statsList.get(0);
Map<String, Long> topK = stats.getTopK(10);
assertEquals(EXPECTED_PATTERN_MAP.size(), topK.size());
for (String key : topK.keySet()) {
// System.out.println("put(\"" + key + "\", " + topK.get(key) + "L);");
assertEquals("Unexpected pattern count on pattern <" + key + ">", EXPECTED_PATTERN_MAP.get(key), topK.get(key));
}
}
}