// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.text;
import java.util.ArrayList;
import java.util.List;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.talend.dataquality.common.inference.Analyzer;
public class TextLengthAnalyzerTest {
TextLengthAnalyzer analyzer = new TextLengthAnalyzer();
@Before
public void setUp() throws Exception {
analyzer.init();
}
@After
public void tearDown() throws Exception {
analyzer.end();
}
@Test
public void testAnalyze() {
String[] data = new String[] { "Brayan", "Ava", " ", "" };
for (String value : data) {
analyzer.analyze(value);
}
TextLengthStatistics stats = analyzer.getResult().get(0);
// Min
Assert.assertEquals(0, stats.getMinTextLength(), 0);
Assert.assertEquals(3, stats.getMinTextLengthIgnoreBlank(), 0);
// Max
Assert.assertEquals(6, stats.getMaxTextLength(), 0);
Assert.assertEquals(6, stats.getMaxTextLengthIgnoreBlank(), 0);
// Avg
Assert.assertEquals(2.5, stats.getAvgTextLength(), 0);
Assert.assertEquals(4.5, stats.getAvgTextLengthIgnoreBlank(), 0);
}
@Test
public void testAnalyzeWithNullValue() {
String[] data = new String[] { " ", "Brayan", "Ava", " ", null };
for (String value : data) {
analyzer.analyze(value);
}
TextLengthStatistics stats = analyzer.getResult().get(0);
// Min
Assert.assertEquals(1, stats.getMinTextLength(), 0);
Assert.assertEquals(3, stats.getMinTextLengthIgnoreBlank(), 0);
// Max
Assert.assertEquals(10, stats.getMaxTextLength(), 0);
Assert.assertEquals(6, stats.getMaxTextLengthIgnoreBlank(), 0);
// Avg
Assert.assertEquals(5, stats.getAvgTextLength(), 0);
Assert.assertEquals(4.5, stats.getAvgTextLengthIgnoreBlank(), 0);
}
@Test
public void testMerge() {
String[] data = new String[] { " ", "Brayan", "Ava", " ", null };
String[] data2 = new String[] { " ", "Brayan", "Ava", " ", null };
Analyzer<TextLengthStatistics> analyzer1 = new TextLengthAnalyzer();
Runnable r1 = new Runnable() {
@Override
public void run() {
analyzer1.init();
for (String record : data) {
analyzer1.analyze(record);
}
analyzer1.end();
};
};
try {
analyzer1.close();
} catch (Exception e1) {
e1.printStackTrace();
}
Analyzer<TextLengthStatistics> analyzer2 = new TextLengthAnalyzer();
Runnable r2 = new Runnable() {
@Override
public void run() {
analyzer2.init();
for (String record : data2) {
analyzer2.analyze(record);
}
analyzer2.end();
};
};
List<Thread> workers = new ArrayList<>();
workers.add(new Thread(r1));
workers.add(new Thread(r2));
for (Thread worker : workers) {
worker.start();
}
for (Thread worker : workers) {
try {
worker.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
Analyzer<TextLengthStatistics> mergedAnalyzer = analyzer1.merge(analyzer2);
TextLengthStatistics stats = mergedAnalyzer.getResult().get(0);
// Min
Assert.assertEquals(1, stats.getMinTextLength(), 0);
Assert.assertEquals(3, stats.getMinTextLengthIgnoreBlank(), 0);
// Max
Assert.assertEquals(10, stats.getMaxTextLength(), 0);
Assert.assertEquals(6, stats.getMaxTextLengthIgnoreBlank(), 0);
// Avg
Assert.assertEquals(5, stats.getAvgTextLength(), 0);
Assert.assertEquals(4.5, stats.getAvgTextLengthIgnoreBlank(), 0);
}
@Test
public void testMoreAnalyzersMerge() {
Analyzer<TextLengthStatistics> analyzer1 = new TextLengthAnalyzer();
Analyzer<TextLengthStatistics> analyzer2 = new TextLengthAnalyzer();
Analyzer<TextLengthStatistics> analyzer3 = new TextLengthAnalyzer();
// Data set 1 with length 6
String[] data1 = new String[] { " ", "France", "Asia", "A long string", "", null };
Runnable r1 = new Runnable() {
@Override
public void run() {
analyzer1.init();
for (String record : data1) {
analyzer1.analyze(record);
}
analyzer1.end();
};
};
try {
analyzer1.close();
} catch (Exception e1) {
e1.printStackTrace();
}
// Data set 2 with length 3
String[] data2 = new String[] { "A", "AB", "ABC" };
Runnable r2 = new Runnable() {
@Override
public void run() {
analyzer2.init();
for (String record : data2) {
analyzer2.analyze(record);
}
analyzer2.end();
};
};
try {
analyzer2.close();
} catch (Exception e1) {
e1.printStackTrace();
}
// Data set 3 with length 4
String[] data3 = new String[] { "computer", "machine", "PC", "laptop" };
Runnable r3 = new Runnable() {
@Override
public void run() {
analyzer3.init();
for (String record : data3) {
analyzer3.analyze(record);
}
analyzer3.end();
};
};
try {
analyzer3.close();
} catch (Exception e1) {
e1.printStackTrace();
}
// Running the analyzers in parallel.
List<Thread> workers = new ArrayList<>();
workers.add(new Thread(r1));
workers.add(new Thread(r2));
workers.add(new Thread(r3));
for (Thread worker : workers) {
worker.start();
}
for (Thread worker : workers) {
try {
worker.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
// Merge the analyzer and assert the result.
Analyzer<TextLengthStatistics> mergedAnalyzer = analyzer1.merge(analyzer2).merge(analyzer3);
TextLengthStatistics stats = mergedAnalyzer.getResult().get(0);
// Min
Assert.assertEquals(0, stats.getMinTextLength(), 0);
Assert.assertEquals(1, stats.getMinTextLengthIgnoreBlank(), 0);
// Max
Assert.assertEquals(13, stats.getMaxTextLength(), 0);
Assert.assertEquals(13, stats.getMaxTextLengthIgnoreBlank(), 0);
// Avg
Assert.assertEquals(4.416667, stats.getAvgTextLength(), 0.00001);
Assert.assertEquals(5.2, stats.getAvgTextLengthIgnoreBlank(), 0);
}
@Test
public void testEmpties() {
String[] data = new String[] { " gmail.", " " };
for (String value : data) {
analyzer.analyze(value);
}
TextLengthStatistics stats = analyzer.getResult().get(0);
Assert.assertEquals(5, stats.getAvgTextLength(), 0);
Assert.assertEquals(8, stats.getAvgTextLengthIgnoreBlank(), 0);
}
}