/**
* AnalyzerBeans
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.eobjects.analyzer.beans.transform;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import junit.framework.TestCase;
import org.eobjects.analyzer.beans.convert.ConvertToNumberTransformer;
import org.eobjects.analyzer.beans.valuedist.ValueDistributionAnalyzer;
import org.eobjects.analyzer.beans.valuedist.ValueDistributionAnalyzerResult;
import org.eobjects.analyzer.configuration.AnalyzerBeansConfigurationImpl;
import org.eobjects.analyzer.connection.CsvDatastore;
import org.eobjects.analyzer.connection.Datastore;
import org.eobjects.analyzer.data.MutableInputColumn;
import org.eobjects.analyzer.job.AnalysisJob;
import org.eobjects.analyzer.job.builder.AnalysisJobBuilder;
import org.eobjects.analyzer.job.builder.AnalyzerJobBuilder;
import org.eobjects.analyzer.job.builder.TransformerJobBuilder;
import org.eobjects.analyzer.job.runner.AnalysisResultFuture;
import org.eobjects.analyzer.job.runner.AnalysisRunnerImpl;
import org.eobjects.analyzer.reference.Dictionary;
import org.eobjects.analyzer.reference.ReferenceDataCatalogImpl;
import org.eobjects.analyzer.reference.SimpleDictionary;
import org.eobjects.analyzer.reference.SimpleSynonym;
import org.eobjects.analyzer.reference.SimpleSynonymCatalog;
import org.eobjects.analyzer.reference.StringPattern;
import org.eobjects.analyzer.reference.SynonymCatalog;
import org.eobjects.analyzer.result.AnalyzerResult;
public class DictionaryMatcherTransformerTest extends TestCase {
public void testParseAndAssignDictionaries() throws Throwable {
Collection<Dictionary> dictionaries = new ArrayList<Dictionary>();
dictionaries.add(new SimpleDictionary("eobjects.org products", "MetaModel", "DataCleaner", "AnalyzerBeans"));
dictionaries.add(new SimpleDictionary("apache products", "commons-lang", "commons-math", "commons-codec",
"commons-logging"));
dictionaries.add(new SimpleDictionary("logging products", "commons-logging", "log4j", "slf4j", "java.util.Logging"));
Collection<SynonymCatalog> synonymCatalogs = new ArrayList<SynonymCatalog>();
synonymCatalogs.add(new SimpleSynonymCatalog("translated terms", new SimpleSynonym("hello", "howdy", "hi", "yo",
"hey"), new SimpleSynonym("goodbye", "bye", "see you", "hey")));
Collection<StringPattern> stringPatterns = new ArrayList<StringPattern>();
ReferenceDataCatalogImpl ref = new ReferenceDataCatalogImpl(dictionaries, synonymCatalogs, stringPatterns);
Datastore datastore = new CsvDatastore("my database", "src/test/resources/projects.csv");
AnalyzerBeansConfigurationImpl conf = new AnalyzerBeansConfigurationImpl();
AnalysisJobBuilder job = new AnalysisJobBuilder(conf);
job.setDatastore(datastore);
job.addSourceColumns("product", "version");
TransformerJobBuilder<DictionaryMatcherTransformer> tjb1 = job.addTransformer(DictionaryMatcherTransformer.class);
tjb1.setConfiguredProperty(
"Dictionaries",
new Dictionary[] { ref.getDictionary("eobjects.org products"), ref.getDictionary("apache products"),
ref.getDictionary("logging products") });
tjb1.addInputColumn(job.getSourceColumnByName("product"));
List<MutableInputColumn<?>> outputColumns = tjb1.getOutputColumns();
assertEquals(3, outputColumns.size());
outputColumns.get(0).setName("eobjects match");
outputColumns.get(1).setName("apache match");
outputColumns.get(2).setName("logging match");
TransformerJobBuilder<ConvertToNumberTransformer> tjb2 = job.addTransformer(ConvertToNumberTransformer.class);
tjb2.addInputColumn(outputColumns.get(2));
tjb2.getOutputColumns().get(0).setName("logging match -> number");
AnalyzerJobBuilder<ValueDistributionAnalyzer> ajb = job
.addAnalyzer(ValueDistributionAnalyzer.class);
ajb.addInputColumns(tjb1.getOutputColumns());
ajb.addInputColumns(tjb2.getOutputColumns());
assertTrue(job.isConfigured());
AnalysisJob analysisJob = job.toAnalysisJob();
AnalysisResultFuture resultFuture = new AnalysisRunnerImpl(conf).run(analysisJob);
if (!resultFuture.isSuccessful()) {
job.close();
throw resultFuture.getErrors().get(0);
}
List<AnalyzerResult> results = resultFuture.getResults();
assertEquals(4, results.size());
ValueDistributionAnalyzerResult res = (ValueDistributionAnalyzerResult) results.get(0);
assertEquals("eobjects match", res.getName());
assertEquals(8, res.getCount("true").intValue());
assertEquals(4, res.getCount("false").intValue());
res = (ValueDistributionAnalyzerResult) results.get(1);
assertEquals("apache match", res.getName());
assertEquals(2, res.getCount("true").intValue());
assertEquals(10, res.getCount("false").intValue());
res = (ValueDistributionAnalyzerResult) results.get(2);
assertEquals("logging match", res.getName());
assertEquals(3, res.getCount("true").intValue());
assertEquals(9, res.getCount("false").intValue());
res = (ValueDistributionAnalyzerResult) results.get(3);
assertEquals("logging match -> number", res.getName());
assertEquals(3, res.getCount("1").intValue());
assertEquals(9, res.getCount("0").intValue());
job.close();
}
public void testTransform() throws Exception {
Dictionary[] dictionaries = new Dictionary[] {
new SimpleDictionary("danish male names", "kasper", "kim", "asbjørn"),
new SimpleDictionary("danish female names", "trine", "kim", "lene") };
DictionaryMatcherTransformer transformer = new DictionaryMatcherTransformer(null, dictionaries);
assertEquals("[true, false]", Arrays.toString(transformer.transform("kasper")));
assertEquals("[false, false]", Arrays.toString(transformer.transform("foobar")));
assertEquals("[false, true]", Arrays.toString(transformer.transform("trine")));
assertEquals("[true, true]", Arrays.toString(transformer.transform("kim")));
transformer._outputType = MatchOutputType.INPUT_OR_NULL;
assertEquals("[kim, kim]", Arrays.toString(transformer.transform("kim")));
assertEquals("[null, trine]", Arrays.toString(transformer.transform("trine")));
}
}