/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.extension.output;
import static org.junit.Assert.*;
import java.util.ArrayList;
import java.util.List;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.Row;
import org.apache.metamodel.util.FileResource;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.connection.UpdateableDatastoreConnection;
import org.datacleaner.data.MockInputColumn;
import org.datacleaner.data.MockInputRow;
import org.junit.After;
import org.junit.Test;
public class CreateCsvFileAnalyzerTest {
private CreateCsvFileAnalyzer analyzer;
@After
public void tearDown() {
if ((analyzer != null) && (analyzer.file != null)) {
((FileResource) analyzer.file).getFile().delete();
}
}
@Test
public void test() throws Exception {
analyzer = new CreateCsvFileAnalyzer();
analyzer.file = new FileResource("target/csvtest.csv");
analyzer.initTempFile();
assertNotNull(analyzer.file);
// Case 1 - file does not exists
assertFalse(analyzer.file.isExists());
assertEquals("csvtest.csv", analyzer.getSuggestedLabel());
analyzer.overwriteFileIfExists = false;
analyzer.validate();
analyzer.overwriteFileIfExists = true;
analyzer.validate();
// Case 2 - file exists
final boolean createNewFile = ((FileResource) analyzer.file).getFile().createNewFile();
assertTrue(createNewFile);
try {
assertTrue(analyzer.file.isExists());
analyzer.overwriteFileIfExists = false;
analyzer.validate();
fail("Exception expected");
} catch (final Exception e) {
assertEquals("The file already exists. Please configure the job to overwrite the existing file.",
e.getMessage());
}
assertTrue(analyzer.file.isExists());
analyzer.overwriteFileIfExists = true;
analyzer.validate();
((FileResource) analyzer.file).getFile().delete();
assertFalse(analyzer.file.isExists());
}
@Test
public void testSortNumerical() throws Exception {
analyzer = new CreateCsvFileAnalyzer();
final InputColumn<String> testColumn = new MockInputColumn<>("TestColumn");
final InputColumn<Integer> idColumn = new MockInputColumn<>("IdToSort", Integer.class);
analyzer.file = new FileResource("target/csvtest-sortnumerical.csv");
analyzer.initTempFile();
assertNotNull(analyzer.file);
final String targetFilename = analyzer.file.getName();
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = testColumn;
analyzer.columns[1] = idColumn;
analyzer.columnToBeSortedOn = idColumn;
analyzer.init();
final InputRow[] rows = new InputRow[13];
rows[0] = new MockInputRow().put(testColumn, "row00").put(idColumn, 7);
rows[1] = new MockInputRow().put(testColumn, "row01").put(idColumn, 9);
rows[2] = new MockInputRow().put(testColumn, "row02").put(idColumn, 2);
rows[3] = new MockInputRow().put(testColumn, "row03").put(idColumn, 3);
rows[4] = new MockInputRow().put(testColumn, "row04").put(idColumn, 4);
rows[5] = new MockInputRow().put(testColumn, "row05").put(idColumn, 12);
rows[6] = new MockInputRow().put(testColumn, "row06").put(idColumn, 6);
rows[7] = new MockInputRow().put(testColumn, "row07").put(idColumn, 0);
rows[8] = new MockInputRow().put(testColumn, "row08").put(idColumn, 8);
rows[9] = new MockInputRow().put(testColumn, "row09").put(idColumn, 1);
rows[10] = new MockInputRow().put(testColumn, "row10").put(idColumn, 10);
rows[11] = new MockInputRow().put(testColumn, "row11").put(idColumn, 11);
rows[12] = new MockInputRow().put(testColumn, "row12").put(idColumn, 5);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final List<Integer> resultIds = new ArrayList<>(13);
final CsvDatastore outputDatastore = new CsvDatastore("csvtest-sortnumerical", analyzer.file);
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final DataContext dataContext = outputDatastoreConnection.getDataContext();
try (DataSet dataSet = dataContext.query().from(targetFilename).selectAll().execute()) {
while (dataSet.next()) {
final Row row = dataSet.getRow();
final Integer idValue = Integer.parseInt((String) row.getValue(1));
resultIds.add(idValue);
}
}
}
assertEquals("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]", resultIds.toString());
}
@Test
public void testSortLexicographic() throws Exception {
final CreateCsvFileAnalyzer analyzer = new CreateCsvFileAnalyzer();
final InputColumn<String> testColumn = new MockInputColumn<>("TestColumn");
final InputColumn<String> idColumn = new MockInputColumn<>("IdToSort", String.class);
analyzer.file = new FileResource("target/csvtest-sortnumerical.csv");
analyzer.initTempFile();
assertNotNull(analyzer.file);
final String targetFilename = analyzer.file.getName();
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = testColumn;
analyzer.columns[1] = idColumn;
analyzer.columnToBeSortedOn = idColumn;
analyzer.init();
final InputRow[] rows = new InputRow[13];
rows[0] = new MockInputRow().put(testColumn, "row00").put(idColumn, 7);
rows[1] = new MockInputRow().put(testColumn, "row01").put(idColumn, 9);
rows[2] = new MockInputRow().put(testColumn, "row02").put(idColumn, 2);
rows[3] = new MockInputRow().put(testColumn, "row03").put(idColumn, 3);
rows[4] = new MockInputRow().put(testColumn, "row04").put(idColumn, 4);
rows[5] = new MockInputRow().put(testColumn, "row05").put(idColumn, 12);
rows[6] = new MockInputRow().put(testColumn, "row06").put(idColumn, 6);
rows[7] = new MockInputRow().put(testColumn, "row07").put(idColumn, 0);
rows[8] = new MockInputRow().put(testColumn, "row08").put(idColumn, 8);
rows[9] = new MockInputRow().put(testColumn, "row09").put(idColumn, 1);
rows[10] = new MockInputRow().put(testColumn, "row10").put(idColumn, 10);
rows[11] = new MockInputRow().put(testColumn, "row11").put(idColumn, 11);
rows[12] = new MockInputRow().put(testColumn, "row12").put(idColumn, 5);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final List<String> resultIds = new ArrayList<>(13);
final CsvDatastore outputDatastore = new CsvDatastore("csvtest-sortnumerical", analyzer.file);
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final DataContext dataContext = outputDatastoreConnection.getDataContext();
try (DataSet dataSet = dataContext.query().from(targetFilename).selectAll().execute()) {
while (dataSet.next()) {
final Row row = dataSet.getRow();
final String idValue = (String) row.getValue(1);
resultIds.add(idValue);
}
}
}
assertEquals("[0, 1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]", resultIds.toString());
}
@Test
public void testSortLexicographicCaseSensitivity() throws Exception {
final CreateCsvFileAnalyzer analyzer = new CreateCsvFileAnalyzer();
final InputColumn<String> sortColumn = new MockInputColumn<>("SortColumn");
final InputColumn<String> someColumn = new MockInputColumn<>("SomeColumn", String.class);
analyzer.file = new FileResource("target/csvtest-sortlexicographiccasesensitivity.csv");
analyzer.initTempFile();
assertNotNull(analyzer.file);
final String targetFilename = analyzer.file.getName();
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = sortColumn;
analyzer.columns[1] = someColumn;
analyzer.columnToBeSortedOn = sortColumn;
analyzer.init();
final InputRow[] rows = new InputRow[8];
rows[0] = new MockInputRow().put(sortColumn, "Claudia").put(someColumn, 1);
rows[1] = new MockInputRow().put(sortColumn, "Dennis").put(someColumn, 2);
rows[2] = new MockInputRow().put(sortColumn, "Kasper").put(someColumn, 3);
rows[3] = new MockInputRow().put(sortColumn, "Tomasz").put(someColumn, 4);
rows[4] = new MockInputRow().put(sortColumn, "claudia").put(someColumn, 5);
rows[5] = new MockInputRow().put(sortColumn, "dennis").put(someColumn, 6);
rows[6] = new MockInputRow().put(sortColumn, "kasper").put(someColumn, 7);
rows[7] = new MockInputRow().put(sortColumn, "tomasz").put(someColumn, 8);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final List<String> resultIds = new ArrayList<>(13);
final CsvDatastore outputDatastore =
new CsvDatastore("csvtest-sortlexicographiccasesensitivity", analyzer.file);
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final DataContext dataContext = outputDatastoreConnection.getDataContext();
try (DataSet dataSet = dataContext.query().from(targetFilename).selectAll().execute()) {
while (dataSet.next()) {
final Row row = dataSet.getRow();
final String idValue = (String) row.getValue(0);
resultIds.add(idValue);
}
}
}
assertEquals("[Claudia, claudia, Dennis, dennis, Kasper, kasper, Tomasz, tomasz]", resultIds.toString());
}
@Test
public void testCustomColumnHeaders() throws Exception {
final CreateCsvFileAnalyzer analyzer = new CreateCsvFileAnalyzer();
final InputColumn<String> stringColumn = new MockInputColumn<>("StringColumn");
final InputColumn<Integer> integerColumn = new MockInputColumn<>("IntegerColumn");
analyzer.file = new FileResource("target/csvtest-customcolumnheaders.csv");
analyzer.initTempFile();
assertNotNull(analyzer.file);
final String targetFilename = analyzer.file.getName();
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = stringColumn;
analyzer.columns[1] = integerColumn;
analyzer.fields = new String[2];
analyzer.fields[0] = "CustomNameForStringColumn";
analyzer.fields[1] = "CustomNameForIntegerColumn";
analyzer.init();
final InputRow[] rows = new InputRow[13];
rows[0] = new MockInputRow().put(stringColumn, "row00").put(integerColumn, 7);
rows[1] = new MockInputRow().put(stringColumn, "row01").put(integerColumn, 9);
rows[2] = new MockInputRow().put(stringColumn, "row02").put(integerColumn, 2);
rows[3] = new MockInputRow().put(stringColumn, "row03").put(integerColumn, 3);
rows[4] = new MockInputRow().put(stringColumn, "row04").put(integerColumn, 4);
rows[5] = new MockInputRow().put(stringColumn, "row05").put(integerColumn, 12);
rows[6] = new MockInputRow().put(stringColumn, "row06").put(integerColumn, 6);
rows[7] = new MockInputRow().put(stringColumn, "row07").put(integerColumn, 0);
rows[8] = new MockInputRow().put(stringColumn, "row08").put(integerColumn, 8);
rows[9] = new MockInputRow().put(stringColumn, "row09").put(integerColumn, 1);
rows[10] = new MockInputRow().put(stringColumn, "row10").put(integerColumn, 10);
rows[11] = new MockInputRow().put(stringColumn, "row11").put(integerColumn, 11);
rows[12] = new MockInputRow().put(stringColumn, "row12").put(integerColumn, 5);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final CsvDatastore outputDatastore = new CsvDatastore("csvtest-customcolumnheaders", analyzer.file);
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final String[] columnNames =
outputDatastoreConnection.getSchemaNavigator().getDefaultSchema().getTableByName(targetFilename)
.getColumnNames();
assertEquals(2, columnNames.length);
assertEquals("CustomNameForStringColumn", columnNames[0]);
assertEquals("CustomNameForIntegerColumn", columnNames[1]);
}
}
}