/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.valuedist;
import java.util.Collection;
import java.util.Set;
import junit.framework.TestCase;
import org.eobjects.analyzer.data.MetaModelInputColumn;
import org.eobjects.analyzer.data.MockInputColumn;
import org.eobjects.analyzer.data.MockInputRow;
import org.eobjects.analyzer.descriptors.AnalyzerBeanDescriptor;
import org.eobjects.analyzer.descriptors.ConfiguredPropertyDescriptor;
import org.eobjects.analyzer.descriptors.Descriptors;
import org.eobjects.analyzer.descriptors.MetricDescriptor;
import org.eobjects.analyzer.descriptors.MetricParameters;
import org.eobjects.analyzer.result.GroupedValueCountingAnalyzerResult;
import org.eobjects.analyzer.result.ValueCountList;
import org.eobjects.analyzer.result.ValueCountingAnalyzerResult;
import org.apache.metamodel.schema.MutableColumn;
public class ValueDistributionAnalyzerTest extends TestCase {
public void testDescriptor() throws Exception {
AnalyzerBeanDescriptor<?> desc = Descriptors.ofAnalyzer(ValueDistributionAnalyzer.class);
assertEquals(0, desc.getInitializeMethods().size());
assertEquals(6, desc.getConfiguredProperties().size());
assertEquals(2, desc.getProvidedProperties().size());
assertEquals("Value distribution", desc.getDisplayName());
}
public void testGetCounts() throws Exception {
ValueDistributionAnalyzer vd = new ValueDistributionAnalyzer(
new MetaModelInputColumn(new MutableColumn("col")), true, null, null);
assertEquals(0, vd.getResult().getUniqueCount().intValue());
assertEquals(0, vd.getResult().getNullCount());
assertEquals(0, vd.getResult().getDistinctCount().intValue());
assertEquals(0, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), "hello", 1);
assertEquals(1, vd.getResult().getUniqueCount().intValue());
assertEquals(1, vd.getResult().getDistinctCount().intValue());
assertEquals(1, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), "world", 1);
assertEquals(2, vd.getResult().getUniqueCount().intValue());
assertEquals(2, vd.getResult().getDistinctCount().intValue());
assertEquals(2, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), "foobar", 2);
assertEquals(2, vd.getResult().getUniqueCount().intValue());
assertEquals(3, vd.getResult().getDistinctCount().intValue());
assertEquals(4, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), "world", 1);
assertEquals(1, vd.getResult().getUniqueCount().intValue());
assertEquals(3, vd.getResult().getDistinctCount().intValue());
assertEquals(5, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), "hello", 3);
assertEquals(0, vd.getResult().getUniqueCount().intValue());
assertEquals(3, vd.getResult().getDistinctCount().intValue());
assertEquals(8, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), null, 1);
assertEquals(0, vd.getResult().getUniqueCount().intValue());
assertEquals(1, vd.getResult().getNullCount());
assertEquals(4, vd.getResult().getDistinctCount().intValue());
assertEquals(9, vd.getResult().getTotalCount());
vd.runInternal(new MockInputRow(), null, 3);
assertEquals(0, vd.getResult().getUniqueCount().intValue());
assertEquals(4, vd.getResult().getNullCount());
assertEquals(4, vd.getResult().getDistinctCount().intValue());
assertEquals(12, vd.getResult().getTotalCount());
}
public void testGetValueCountMetric() throws Exception {
ValueDistributionAnalyzer vd = new ValueDistributionAnalyzer(
new MetaModelInputColumn(new MutableColumn("col")), true, null, null);
vd.runInternal(new MockInputRow(), "hello", 1);
vd.runInternal(new MockInputRow(), "world", 1);
vd.runInternal(new MockInputRow(), "foobar", 2);
vd.runInternal(new MockInputRow(), "world", 1);
vd.runInternal(new MockInputRow(), "hello", 3);
vd.runInternal(new MockInputRow(), null, 1);
vd.runInternal(new MockInputRow(), null, 3);
final ValueCountingAnalyzerResult result = vd.getResult();
final AnalyzerBeanDescriptor<?> desc = Descriptors.ofAnalyzer(ValueDistributionAnalyzer.class);
final MetricDescriptor metric = desc.getResultMetric("Value count");
Collection<String> suggestions = metric.getMetricParameterSuggestions(result);
assertEquals("[hello, foobar, world]", suggestions.toString());
assertEquals(4, metric.getValue(result, new MetricParameters("hello")));
assertEquals(2, metric.getValue(result, new MetricParameters("world")));
assertEquals(6, metric.getValue(result, new MetricParameters("IN [hello,world]")));
assertEquals(8, metric.getValue(result, new MetricParameters("NOT IN [foobar,world]")));
}
public void testGetValueDistribution() throws Exception {
ValueDistributionAnalyzer vd = new ValueDistributionAnalyzer(
new MetaModelInputColumn(new MutableColumn("col")), true, null, null);
vd.runInternal(new MockInputRow(), "hello", 1);
vd.runInternal(new MockInputRow(), "hello", 1);
vd.runInternal(new MockInputRow(), "world", 3);
ValueCountingAnalyzerResult result = vd.getResult();
ValueCountList topValues = ((SingleValueDistributionResult)result).getTopValues();
assertEquals(2, topValues.getActualSize());
assertEquals("[world->3]", topValues.getValueCounts().get(0).toString());
assertEquals("[hello->2]", topValues.getValueCounts().get(1).toString());
assertEquals(0, result.getNullCount());
assertEquals(0, result.getUniqueCount().intValue());
String[] resultLines = result.toString().split("\n");
assertEquals(3, resultLines.length);
assertEquals("Value distribution for: col", resultLines[0]);
assertEquals(" - world: 3", resultLines[1]);
assertEquals(" - hello: 2", resultLines[2]);
}
public void testGroupedRun() throws Exception {
ValueDistributionAnalyzer vd = new ValueDistributionAnalyzer(new MockInputColumn<String>("foo", String.class),
new MockInputColumn<String>("bar", String.class), true, null, null);
vd.runInternal(new MockInputRow(), "Copenhagen N", "2200", 3);
vd.runInternal(new MockInputRow(), "Copenhagen E", "2100", 2);
vd.runInternal(new MockInputRow(), "Copenhagen", "1732", 4);
vd.runInternal(new MockInputRow(), "Coppenhagen", "1732", 3);
ValueCountingAnalyzerResult result = vd.getResult();
assertTrue(result instanceof GroupedValueCountingAnalyzerResult);
String resultString = result.toString();
System.out.println(resultString);
String[] resultLines = resultString.split("\n");
assertEquals(11, resultLines.length);
assertEquals("Value distribution for column: foo", resultLines[0]);
int i=0;
assertEquals("Value distribution for column: foo", resultLines[i++]);
assertEquals("", resultLines[i++]);
assertEquals("Group: 1732", resultLines[i++]);
assertEquals(" - Copenhagen: 4", resultLines[i++]);
assertEquals(" - Coppenhagen: 3", resultLines[i++]);
assertEquals("", resultLines[i++]);
assertEquals("Group: 2100", resultLines[i++]);
assertEquals(" - Copenhagen E: 2", resultLines[i++]);
assertEquals("", resultLines[i++]);
assertEquals("Group: 2200", resultLines[i++]);
assertEquals(" - Copenhagen N: 3", resultLines[i++]);
}
}