// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.semantic;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.talend.dataquality.common.inference.Analyzer;
import org.talend.dataquality.common.inference.AnalyzerSupplier;
import org.talend.dataquality.common.inference.ConcurrentAnalyzer;
import org.talend.dataquality.semantic.recognizer.CategoryRecognizerBuilder;
import org.talend.dataquality.semantic.statistics.SemanticAnalyzer;
import org.talend.dataquality.semantic.statistics.SemanticType;
public class ConcurrentAnalyzerTest extends SemanticStatisticsTestBase {
private static Logger log = LoggerFactory.getLogger(ConcurrentAnalyzerTest.class);
private AtomicBoolean errorOccurred = new AtomicBoolean();
@Before
public void setUp() throws Exception {
errorOccurred.set(false);
}
@Test
public void testThreadSafeConcurrentAccess() {
try {
URI ddPath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_DD_PATH).toURI();
URI kwPath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_KW_PATH).toURI();
assertNotNull("Keyword dictionary not loaded", kwPath);
assertNotNull("data dictionary not loaded", ddPath);
final CategoryRecognizerBuilder builder = CategoryRecognizerBuilder.newBuilder() //
.ddPath(ddPath) //
.kwPath(kwPath) //
.lucene();
AnalyzerSupplier<Analyzer<SemanticType>> supplier = new AnalyzerSupplier<Analyzer<SemanticType>>() {
@Override
public Analyzer<SemanticType> get() {
return new SemanticAnalyzer(builder);
}
};
final Analyzer<SemanticType> analyzer = ConcurrentAnalyzer.make(supplier, 2);
Runnable r = new Runnable() {
@Override
public void run() {
doConcurrentAccess(analyzer, true);
}
};
List<Thread> workers = new ArrayList<>();
for (int i = 0; i < 20; i++) {
workers.add(new Thread(r));
}
for (Thread worker : workers) {
worker.start();
}
for (Thread worker : workers) {
worker.join();
}
assertEquals("ConcurrentAccess not failed", false, errorOccurred.get());
} catch (URISyntaxException e) {
e.printStackTrace();
fail("Problem while loading dictionaries");
} catch (InterruptedException e) {
e.printStackTrace();
fail("Thread has been interrupted");
}
}
@Test
public void testThreadUnsafeConcurrentAccess() throws Exception {
final URI ddPath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_DD_PATH).toURI();
final URI kwPath = this.getClass().getResource(CategoryRecognizerBuilder.DEFAULT_KW_PATH).toURI();
final CategoryRecognizerBuilder builder = CategoryRecognizerBuilder.newBuilder() //
.ddPath(ddPath) //
.kwPath(kwPath) //
.lucene();
try (Analyzer<SemanticType> analyzer = new SemanticAnalyzer(builder)) {
Runnable r = new Runnable() {
@Override
public void run() {
doConcurrentAccess(analyzer, false);
}
};
List<Thread> workers = new ArrayList<>();
for (int i = 0; i < 20; i++) {
workers.add(new Thread(r));
}
for (Thread worker : workers) {
worker.start();
}
for (Thread worker : workers) {
worker.join();
}
assertEquals("ConcurrentAccess failed", true, errorOccurred.get());
}
}
private void doConcurrentAccess(Analyzer<SemanticType> semanticAnalyzer, boolean isLogEnabled) {
semanticAnalyzer.init();
int datasetID = (int) Math.floor(Math.random() * 4);
try {
for (String[] data : INPUT_RECORDS.get(datasetID)) {
try {
semanticAnalyzer.analyze(data);
} catch (Throwable e) {
errorOccurred.set(true);
if (isLogEnabled) {
log.error(e.getMessage(), e);
}
}
}
semanticAnalyzer.end();
List<SemanticType> result = semanticAnalyzer.getResult();
int columnIndex = 0;
if (result.isEmpty()) {
errorOccurred.set(true);
if (isLogEnabled) {
log.error("result is empty");
}
}
for (SemanticType columnSemanticType : result) {
if (!EXPECTED_CATEGORIES.get(datasetID)[columnIndex].equals(columnSemanticType.getSuggestedCategory())) {
errorOccurred.set(true);
if (isLogEnabled) {
log.error("assertion fails on column[" + columnIndex + "] of dataset[" + datasetID + "}. expected: "
+ EXPECTED_CATEGORIES.get(datasetID)[columnIndex] + " actual: "
+ columnSemanticType.getSuggestedCategory());
}
}
columnIndex++;
}
} catch (Exception e) {
errorOccurred.set(true);
if (isLogEnabled) {
log.error(e.getMessage(), e);
}
} finally {
try {
semanticAnalyzer.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}