// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.semantic;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.talend.dataquality.semantic.classifier.SemanticCategoryEnum;
/**
* created by talend on 2015-07-28 Detailled comment.
*
*/
class SemanticStatisticsTestBase {
protected final List<List<String[]>> INPUT_RECORDS = new ArrayList<List<String[]>>() {
private static final long serialVersionUID = 1L;
{
add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("customers_100_bug_TDQ10380.csv")));
add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("avengers.csv")));
add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("gender.csv")));
add(getRecords(SemanticStatisticsTestBase.class.getResourceAsStream("dataset_with_invalid_records.csv")));
}
};
protected final List<String[]> EXPECTED_CATEGORIES = new ArrayList<String[]>() {
private static final long serialVersionUID = 1L;
{
add(new String[] { // dataset[0]
"", //
SemanticCategoryEnum.FIRST_NAME.getId(), //
SemanticCategoryEnum.CITY.getId(), //
SemanticCategoryEnum.US_STATE_CODE.getId(), //
"", //
SemanticCategoryEnum.CITY.getId(), //
"", //
"", //
"" //
});
add(new String[] { // dataset[1]
"", //
SemanticCategoryEnum.FIRST_NAME.getId(), //
SemanticCategoryEnum.LAST_NAME.getId(), //
"", //
SemanticCategoryEnum.CITY.getId() //
});
add(new String[] { // dataset[2]
SemanticCategoryEnum.FIRST_NAME.getId(), //
"", //
SemanticCategoryEnum.GENDER.getId() //
});
add(new String[] { // dataset[3]
SemanticCategoryEnum.FIRST_NAME.getId(), //
""//
});
}
};
protected static List<String[]> getRecords(InputStream inputStream) {
return getRecords(inputStream, ";");
}
protected static List<String[]> getRecords(InputStream inputStream, String lineSeparator) {
if (inputStream == null) {
throw new IllegalArgumentException("Input stream cannot be null.");
}
try {
List<String[]> records = new ArrayList<String[]>();
final List<String> lines = IOUtils.readLines(inputStream);
for (String line : lines) {
String[] record = StringUtils.splitByWholeSeparatorPreserveAllTokens(line, lineSeparator);
records.add(record);
}
return records;
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
try {
inputStream.close();
} catch (IOException e) {
// Silent ignore
e.printStackTrace();
}
}
}
}