// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.numeric.quantile;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.talend.dataquality.common.inference.Analyzer;
import org.talend.dataquality.statistics.numeric.summary.SummaryAnalyzer;
import org.talend.dataquality.statistics.quality.ValueQualityAnalyzerTest;
import org.talend.dataquality.statistics.type.DataTypeEnum;
public class QuantileAnalyzerTest {
@Before
public void setUp() throws Exception {
}
@After
public void tearDown() throws Exception {
}
@Test
public void testAnalyzeStringArray() {
String[] data = new String[] { "1", "2", "3", "4", "5", "6", "7", "8", "9" };
QuantileAnalyzer analyzer = new QuantileAnalyzer(new DataTypeEnum[] { DataTypeEnum.INTEGER });
TDigestAnalyzer tanalyzer = new TDigestAnalyzer(new DataTypeEnum[] { DataTypeEnum.INTEGER });
analyzer.init();
tanalyzer.init();
for (String value : data) {
analyzer.analyze(value);
tanalyzer.analyze(value);
}
analyzer.end();
tanalyzer.end();
Assert.assertEquals(5, analyzer.getResult().get(0).getMedian(), 0);
Assert.assertEquals(2.5, analyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(7.5, analyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(3, analyzer.getResult().get(0).getQuantile(0.3), 0);
Assert.assertEquals(4.5, tanalyzer.getResult().get(0).getMedian(), 0.0);
Assert.assertEquals(2.75, tanalyzer.getResult().get(0).getLowerQuartile(), 0.0);
Assert.assertEquals(6.25, tanalyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(2.3, tanalyzer.getResult().get(0).getQuantile(0.3), 0.001);
data = new String[] { "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21" };
analyzer.init();
tanalyzer.init();
for (String value : data) {
analyzer.analyze(value);
tanalyzer.analyze(value);
}
analyzer.end();
tanalyzer.end();
Assert.assertEquals(11, analyzer.getResult().get(0).getMedian(), 0.0);
Assert.assertEquals(5.5, analyzer.getResult().get(0).getLowerQuartile(), 0.0);
Assert.assertEquals(16.5, analyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(6.6, analyzer.getResult().get(0).getQuantile(0.3), 0.001);
Assert.assertEquals(10.5, tanalyzer.getResult().get(0).getMedian(), 0.0);
Assert.assertEquals(5.75, tanalyzer.getResult().get(0).getLowerQuartile(), 0.0);
Assert.assertEquals(15.25, tanalyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(6.7, tanalyzer.getResult().get(0).getQuantile(0.3), 0.001);
data = new String[] { "1", "9", "3", "4", "6", "7", "5", "8", "2" };
analyzer.init();
tanalyzer.init();
for (String value : data) {
analyzer.analyze(value);
tanalyzer.analyze(value);
}
analyzer.end();
tanalyzer.end();
Assert.assertEquals(5, analyzer.getResult().get(0).getMedian(), 0);
Assert.assertEquals(2.5, analyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(7.5, analyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(3, analyzer.getResult().get(0).getQuantile(0.3), 0);
Assert.assertEquals(4.5, tanalyzer.getResult().get(0).getMedian(), 0.0);
Assert.assertEquals(2.75, tanalyzer.getResult().get(0).getLowerQuartile(), 0.0);
Assert.assertEquals(6.25, tanalyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(2.3, tanalyzer.getResult().get(0).getQuantile(0.3), 0.001);
data = new String[] { "0", "0", "0", "0", "0", "0", "0", "0", "0" };
analyzer.init();
tanalyzer.init();
for (String value : data) {
analyzer.analyze(value);
tanalyzer.analyze(value);
}
analyzer.end();
tanalyzer.end();
Assert.assertEquals(0, analyzer.getResult().get(0).getMedian(), 0);
Assert.assertEquals(0, analyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(0, analyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(0, analyzer.getResult().get(0).getQuantile(0.3), 0);
Assert.assertEquals(0, tanalyzer.getResult().get(0).getMedian(), 0);
Assert.assertEquals(0, tanalyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(0, tanalyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(0, tanalyzer.getResult().get(0).getQuantile(0.3), 0);
data = new String[] { "-1", "-1", "1", "2", "3", "4", "5", "6", "7" };
analyzer.init();
tanalyzer.init();
for (String value : data) {
analyzer.analyze(value);
tanalyzer.analyze(value);
}
analyzer.end();
tanalyzer.end();
Assert.assertEquals(3, analyzer.getResult().get(0).getMedian(), 0);
Assert.assertEquals(0, analyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(5.5, analyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(1, analyzer.getResult().get(0).getQuantile(0.3), 0);
Assert.assertEquals(2.5, tanalyzer.getResult().get(0).getMedian(), 0);
Assert.assertEquals(0.5, tanalyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(4.25, tanalyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(-0.4, tanalyzer.getResult().get(0).getQuantile(0.3), 0.001);
data = new String[] {};
analyzer.init();
for (String value : data) {
analyzer.analyze(value);
}
analyzer.end();
Assert.assertEquals(0, analyzer.getResult().size(), 0);
}
@Test
public void testQuantileOfFile() throws IOException {// test for double data, TDQ-10789, TDP-394
final List<String[]> records = ValueQualityAnalyzerTest
.getRecords(this.getClass().getResourceAsStream("../../data/t-shirt_100.csv"), ",");
QuantileAnalyzer analyzer = new QuantileAnalyzer(new DataTypeEnum[] { DataTypeEnum.DOUBLE });
TDigestAnalyzer tanalyzer = new TDigestAnalyzer(new DataTypeEnum[] { DataTypeEnum.DOUBLE });
SummaryAnalyzer summaryAnalyzer = new SummaryAnalyzer(new DataTypeEnum[] { DataTypeEnum.DOUBLE });
tanalyzer.init();
analyzer.init();
summaryAnalyzer.init();
List<Analyzer<?>> analyzers = new ArrayList<Analyzer<?>>();
analyzers.add(summaryAnalyzer);
analyzers.add(tanalyzer);
analyzers.add(analyzer);
records.forEach(r -> analyze(analyzers, r[7]));
tanalyzer.end();
analyzer.end();
summaryAnalyzer.end();
Assert.assertEquals(23.9, tanalyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(16.7, tanalyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(23.9, analyzer.getResult().get(0).getUpperQuartile(), 0);
Assert.assertEquals(16.7, analyzer.getResult().get(0).getLowerQuartile(), 0);
Assert.assertEquals(16.7, summaryAnalyzer.getResult().get(0).getMin(), 0);
Assert.assertEquals(32, summaryAnalyzer.getResult().get(0).getMax(), 0);
}
private Object analyze(List<Analyzer<?>> analyzers, String value) {
for (Analyzer<?> ana : analyzers) {
ana.analyze(value);
}
return null;
}
}