// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.semantic.statistics;
import static org.junit.Assert.assertEquals;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.Before;
import org.junit.Test;
import org.talend.dataquality.common.inference.Analyzer;
import org.talend.dataquality.common.inference.Analyzers;
import org.talend.dataquality.semantic.classifier.SemanticCategoryEnum;
import org.talend.dataquality.semantic.recognizer.CategoryRecognizerBuilder;
public class SemanticCompoundAnalyzerTest {
private CategoryRecognizerBuilder builder;
private static final String PHONE = "PHONE";
final List<String[]> TEST_RECORDS_PHONE = new ArrayList<String[]>() {
private static final long serialVersionUID = 1L;
{
add(new String[] { "0145689856", "02045689856", "15207777777", "15207777777", "0145689856" });
add(new String[] { "0145689856", "02045689856", "15207777777", "02045689856", "0145689856" });
add(new String[] { "0145689856", "02045689856", "15207777777", "15207777777", "15207777777" });
add(new String[] { "0145689856", "02045689856", "15207777777", "15207777777", "02045689856" });
}
};
final List<String> EXPECTED_CATEGORY_PHONE = Arrays.asList(new String[] { SemanticCategoryEnum.FR_PHONE.name(),
SemanticCategoryEnum.DE_PHONE.name(), SemanticCategoryEnum.US_PHONE.name(), PHONE, PHONE });
@Before
public void setUp() throws Exception {
final URI ddPath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_DD_PATH).toURI();
final URI kwPath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_KW_PATH).toURI();
final URI rePath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_RE_PATH).toURI();
builder = CategoryRecognizerBuilder.newBuilder() //
.ddPath(ddPath) //
.kwPath(kwPath).regexPath(rePath) //
.lucene();
}
@Test
public void testPhone() {
SemanticAnalyzer semanticAnalyzer = new SemanticAnalyzer(builder);
Analyzer<Analyzers.Result> analyzer = Analyzers.with(semanticAnalyzer);
analyzer.init();
for (String[] record : TEST_RECORDS_PHONE) {
analyzer.analyze(record);
}
analyzer.end();
for (int i = 0; i < EXPECTED_CATEGORY_PHONE.size(); i++) {
Analyzers.Result result = analyzer.getResult().get(i);
if (result.exist(SemanticType.class)) {
final SemanticType semanticType = result.get(SemanticType.class);
final String suggestedCategory = semanticType.getSuggestedCategory();
assertEquals("Unexpected Category for i = " + i, EXPECTED_CATEGORY_PHONE.get(i), suggestedCategory);
}
}
}
}