// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.dataquality.statistics.numeric.summary; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.talend.dataquality.statistics.type.DataTypeEnum; public class MinMaxValueAnalyzerTest { private static final Logger LOGGER = LoggerFactory.getLogger(MinMaxValueAnalyzerTest.class); @Before public void setUp() throws Exception { } @After public void tearDown() throws Exception { } @Test public void testAnalyzeDoubleIntegerMixed() { String[][] test2Cols_Double_Int = new String[][] { { "20", "20" }, { "1.0", "1.0" }, { "3", "3" }, { "4.5", "4.5" }, { "8.0", "8" } }; SummaryAnalyzer analyzer = new SummaryAnalyzer(new DataTypeEnum[] { DataTypeEnum.DOUBLE, DataTypeEnum.INTEGER }); for (String[] values : test2Cols_Double_Int) { analyzer.analyze(values); } // for the double column: "20", "1.0", "3", "4.5", "8.0" assertEquals(1.0, analyzer.getResult().get(0).getMin(), 0); assertEquals(20, analyzer.getResult().get(0).getMax(), 0);// "20" is also valid as a double // for the integer column: "20", "1.0", "3", "4.5", "8" assertEquals(3, analyzer.getResult().get(1).getMin(), 0);// "1.0" is not valid in a integer column assertEquals(20, analyzer.getResult().get(1).getMax(), 0); } @Test public void testAnalyzeStr() { String[][] test2Cols_Double_Str = new String[][] { { "a str", "a" }, { "1.0", "b" }, { "3", "c" }, { "4.5", "4.5" }, { "8.0", "8.0" } }; SummaryAnalyzer analyzer = new SummaryAnalyzer(new DataTypeEnum[] { DataTypeEnum.DOUBLE, DataTypeEnum.STRING }); for (String[] values : test2Cols_Double_Str) { analyzer.analyze(values); } // for the double type column with one string value: "a str", "1.0", "3", "4.5", "8.0" assertEquals(1.0, analyzer.getResult().get(0).getMin(), 0); assertEquals(8, analyzer.getResult().get(0).getMax(), 0); // for the string type column with double values: "a", "b", "c", "4.5", "8.0" assertTrue(Double.isNaN(analyzer.getResult().get(1).getMin())); assertTrue(Double.isNaN(analyzer.getResult().get(1).getMax())); } @Test public void testAnalyzeEmpty() { String[][] test2Cols_Empty_StrSpace = new String[][] { { "", "" }, { "", "" } }; SummaryAnalyzer analyzer = new SummaryAnalyzer(new DataTypeEnum[] { DataTypeEnum.EMPTY, DataTypeEnum.STRING }); for (String[] values : test2Cols_Empty_StrSpace) { analyzer.analyze(values); } // for the EMPTY type column: "", "" assertTrue(Double.isNaN(analyzer.getResult().get(0).getMin())); assertTrue(Double.isNaN(analyzer.getResult().get(0).getMax())); // for the STRING type column: "", "" assertTrue(Double.isNaN(analyzer.getResult().get(1).getMin())); assertTrue(Double.isNaN(analyzer.getResult().get(1).getMax())); // for issue: https://jira.talendforge.org/browse/TDQ-10863 String[] testMixedCol = new String[] { "22", "21", "18", "", "23", "25", "26", "26.5" }; analyzer = new SummaryAnalyzer(new DataTypeEnum[] { DataTypeEnum.INTEGER }); for (String value : testMixedCol) { analyzer.analyze(value); } assertEquals(18, analyzer.getResult().get(0).getMin(), 0); assertEquals(26, analyzer.getResult().get(0).getMax(), 0); } @Test public void testMixedNumberFormats() { final String[][] testers = new String[][] { // { "1,1", "3.333,33", "5,555", "1E308" }, // { "2.2", "4,444.44", "6.666", "1E309" },// }; SummaryAnalyzer analyzer = new SummaryAnalyzer( new DataTypeEnum[] { DataTypeEnum.DOUBLE, DataTypeEnum.DOUBLE, DataTypeEnum.DOUBLE, DataTypeEnum.DOUBLE, }); for (String[] values : testers) { analyzer.analyze(values); } SummaryStatistics summary1 = analyzer.getResult().get(0); LOGGER.debug("Min: " + summary1.getMin() + " Max: " + summary1.getMax()); SummaryStatistics summary2 = analyzer.getResult().get(1); LOGGER.debug("Min: " + summary2.getMin() + " Max: " + summary2.getMax()); SummaryStatistics summary3 = analyzer.getResult().get(2); LOGGER.debug("Min: " + summary3.getMin() + " Max: " + summary3.getMax()); SummaryStatistics summary4 = analyzer.getResult().get(3); LOGGER.debug("Min: " + summary4.getMin() + " Max: " + summary4.getMax()); } }