/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.extension.output;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.Row;
import org.apache.metamodel.util.FileResource;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.configuration.DataCleanerConfigurationImpl;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.ExcelDatastore;
import org.datacleaner.connection.UpdateableDatastoreConnection;
import org.datacleaner.data.MockInputColumn;
import org.datacleaner.data.MockInputRow;
import org.datacleaner.descriptors.FilterDescriptor;
import org.datacleaner.job.builder.AnalysisJobBuilder;
import org.easymock.EasyMock;
import org.junit.Test;
import junit.framework.TestCase;
public class CreateExcelSpreadsheetAnalyzerTest extends TestCase {
private File generatedFile;
@Override
protected void tearDown() throws Exception {
if (generatedFile != null) {
generatedFile.delete();
}
super.tearDown();
}
@Test
public void testValidateSheetName() throws Exception {
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
analyzer.sheetName = "foo";
analyzer.validate();
analyzer.sheetName = "foo.bar";
try {
analyzer.validate();
fail("Exception expected");
} catch (final Exception e) {
assertEquals("Sheet name cannot contain '.'", e.getMessage());
}
}
@Test
public void testFixSheetName() throws Exception {
final Datastore datastore = EasyMock.createMock(Datastore.class);
final FilterDescriptor<?, ?> filterDescriptor = EasyMock.createMock(FilterDescriptor.class);
EasyMock.expect(datastore.openConnection()).andReturn(null);
EasyMock.expect(datastore.getName()).andReturn("data:store");
EasyMock.expect(filterDescriptor.getDisplayName()).andReturn("my fil-ter");
EasyMock.replay(datastore, filterDescriptor);
final AnalysisJobBuilder ajb = new AnalysisJobBuilder(new DataCleanerConfigurationImpl());
ajb.setDatastore(datastore);
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
analyzer.configureForFilterOutcome(ajb, filterDescriptor, "OUT.COME");
assertEquals("data-store-my fil-ter-OUT-COME", analyzer.sheetName);
EasyMock.verify(datastore, filterDescriptor);
}
@Test
public void testValidateOverwriteFile() throws Exception {
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
analyzer.sheetName = "foo";
analyzer.overwriteSheetIfExists = false;
assertNotNull(analyzer.file);
assertFalse(analyzer.file.exists());
analyzer.validate();
analyzer.overwriteSheetIfExists = true;
analyzer.validate();
assertFalse(analyzer.file.exists());
analyzer.file = new File("src/test/resources/multiple_Sheets.xlsx");
assertTrue(analyzer.file.exists());
analyzer.validate();
try {
analyzer.overwriteSheetIfExists = false;
assertFalse(analyzer.overwriteSheetIfExists);
analyzer.sheetName = "Sheet1";
analyzer.validate();
fail("Exception expected");
} catch (final Exception e) {
assertEquals("The sheet 'Sheet1' already exists. Please select another sheet name.", e.getMessage());
}
analyzer.overwriteSheetIfExists = true;
assertTrue(analyzer.overwriteSheetIfExists);
analyzer.sheetName = "Sheet1";
analyzer.validate();
analyzer.sheetName = "Bar";
assertTrue(analyzer.overwriteSheetIfExists);
analyzer.validate();
analyzer.overwriteSheetIfExists = false;
assertFalse(analyzer.overwriteSheetIfExists);
analyzer.validate();
}
@Test
public void testSortNumerical() throws Exception {
final String filename = "target/exceltest-sortnumerical.xlsx";
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
final InputColumn<String> testColumn = new MockInputColumn<>("TestColumn");
// Point of focus: MockInputColumn is of type Input, so it should be sorted as numbers
final InputColumn<Integer> idColumn = new MockInputColumn<>("IdToSort", Integer.class);
generatedFile = new File(filename);
analyzer.file = generatedFile;
assertNotNull(analyzer.file);
analyzer.sheetName = "foo";
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = testColumn;
analyzer.columns[1] = idColumn;
analyzer.columnToBeSortedOn = idColumn;
analyzer.validate();
analyzer.init();
final InputRow[] rows = new InputRow[13];
rows[0] = new MockInputRow().put(testColumn, "row00").put(idColumn, 7);
rows[1] = new MockInputRow().put(testColumn, "row01").put(idColumn, 9);
rows[2] = new MockInputRow().put(testColumn, "row02").put(idColumn, 2);
rows[3] = new MockInputRow().put(testColumn, "row03").put(idColumn, 3);
rows[4] = new MockInputRow().put(testColumn, "row04").put(idColumn, 4);
rows[5] = new MockInputRow().put(testColumn, "row05").put(idColumn, 12);
rows[6] = new MockInputRow().put(testColumn, "row06").put(idColumn, 6);
rows[7] = new MockInputRow().put(testColumn, "row07").put(idColumn, 0);
rows[8] = new MockInputRow().put(testColumn, "row08").put(idColumn, 8);
rows[9] = new MockInputRow().put(testColumn, "row09").put(idColumn, 1);
rows[10] = new MockInputRow().put(testColumn, "row10").put(idColumn, 10);
rows[11] = new MockInputRow().put(testColumn, "row11").put(idColumn, 11);
rows[12] = new MockInputRow().put(testColumn, "row12").put(idColumn, 5);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final List<Integer> resultIds = new ArrayList<>(13);
final ExcelDatastore outputDatastore =
new ExcelDatastore(filename, new FileResource(analyzer.file), analyzer.file.getAbsolutePath());
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final DataContext dataContext = outputDatastoreConnection.getDataContext();
try (DataSet dataSet = dataContext.query().from("foo").selectAll().execute()) {
while (dataSet.next()) {
final Row row = dataSet.getRow();
final Integer idValue = Integer.parseInt((String) row.getValue(1));
resultIds.add(idValue);
}
}
}
assertEquals("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]", resultIds.toString());
}
@Test
public void testSortLexicographic() throws Exception {
final String filename = "target/exceltest-sortlexicographic.xlsx";
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
final InputColumn<String> testColumn = new MockInputColumn<>("TestColumn");
// Point of focus: MockInputColumn is of type String, so it should be sorted alphabetically
final InputColumn<String> idColumn = new MockInputColumn<>("IdToSort", String.class);
generatedFile = new File(filename);
analyzer.file = generatedFile;
assertNotNull(analyzer.file);
analyzer.sheetName = "foo";
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = testColumn;
analyzer.columns[1] = idColumn;
analyzer.columnToBeSortedOn = idColumn;
analyzer.validate();
analyzer.init();
final InputRow[] rows = new InputRow[13];
rows[0] = new MockInputRow().put(testColumn, "row00").put(idColumn, 7);
rows[1] = new MockInputRow().put(testColumn, "row01").put(idColumn, 9);
rows[2] = new MockInputRow().put(testColumn, "row02").put(idColumn, 2);
rows[3] = new MockInputRow().put(testColumn, "row03").put(idColumn, 3);
rows[4] = new MockInputRow().put(testColumn, "row04").put(idColumn, 4);
rows[5] = new MockInputRow().put(testColumn, "row05").put(idColumn, 12);
rows[6] = new MockInputRow().put(testColumn, "row06").put(idColumn, 6);
rows[7] = new MockInputRow().put(testColumn, "row07").put(idColumn, 0);
rows[8] = new MockInputRow().put(testColumn, "row08").put(idColumn, 8);
rows[9] = new MockInputRow().put(testColumn, "row09").put(idColumn, 1);
rows[10] = new MockInputRow().put(testColumn, "row10").put(idColumn, 10);
rows[11] = new MockInputRow().put(testColumn, "row11").put(idColumn, 11);
rows[12] = new MockInputRow().put(testColumn, "row12").put(idColumn, 5);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final List<Integer> resultIds = new ArrayList<>(13);
final ExcelDatastore outputDatastore =
new ExcelDatastore(filename, new FileResource(analyzer.file), analyzer.file.getAbsolutePath());
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final DataContext dataContext = outputDatastoreConnection.getDataContext();
try (DataSet dataSet = dataContext.query().from("foo").selectAll().execute()) {
while (dataSet.next()) {
final Row row = dataSet.getRow();
final Integer idValue = Integer.parseInt((String) row.getValue(1));
resultIds.add(idValue);
}
}
}
assertEquals("[0, 1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]", resultIds.toString());
}
@Test
public void testSortLexicographicCaseSensitivity() throws Exception {
final String filename = "target/exceltest-sortlexicographiccasesensitivity.xlsx";
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
final InputColumn<String> sortColumn = new MockInputColumn<>("SortColumn");
final InputColumn<String> someColumn = new MockInputColumn<>("SomeColumn", String.class);
generatedFile = new File(filename);
analyzer.file = generatedFile;
assertNotNull(analyzer.file);
analyzer.sheetName = "foo";
assertNotNull(analyzer.file);
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = sortColumn;
analyzer.columns[1] = someColumn;
analyzer.columnToBeSortedOn = sortColumn;
analyzer.validate();
analyzer.init();
final InputRow[] rows = new InputRow[8];
rows[0] = new MockInputRow().put(sortColumn, "Claudia").put(someColumn, 1);
rows[1] = new MockInputRow().put(sortColumn, "Dennis").put(someColumn, 2);
rows[2] = new MockInputRow().put(sortColumn, "Kasper").put(someColumn, 3);
rows[3] = new MockInputRow().put(sortColumn, "Tomasz").put(someColumn, 4);
rows[4] = new MockInputRow().put(sortColumn, "claudia").put(someColumn, 5);
rows[5] = new MockInputRow().put(sortColumn, "dennis").put(someColumn, 6);
rows[6] = new MockInputRow().put(sortColumn, "kasper").put(someColumn, 7);
rows[7] = new MockInputRow().put(sortColumn, "tomasz").put(someColumn, 8);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final List<String> resultIds = new ArrayList<>(13);
final ExcelDatastore outputDatastore =
new ExcelDatastore(filename, new FileResource(analyzer.file), analyzer.file.getAbsolutePath());
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final DataContext dataContext = outputDatastoreConnection.getDataContext();
try (DataSet dataSet = dataContext.query().from("foo").selectAll().execute()) {
while (dataSet.next()) {
final Row row = dataSet.getRow();
final String value = (String) row.getValue(0);
resultIds.add(value);
}
}
}
assertEquals("[Claudia, claudia, Dennis, dennis, Kasper, kasper, Tomasz, tomasz]", resultIds.toString());
}
@Test
public void testCustomColumnHeaders() throws Exception {
final String filename = "target/exceltest-customcolumnheaders.xlsx";
final CreateExcelSpreadsheetAnalyzer analyzer = new CreateExcelSpreadsheetAnalyzer();
final InputColumn<String> stringColumn = new MockInputColumn<>("StringColumn");
final InputColumn<Integer> integerColumn = new MockInputColumn<>("IntegerColumn");
generatedFile = new File(filename);
analyzer.file = generatedFile;
analyzer.initTempFile();
assertNotNull(analyzer.file);
analyzer.sheetName = "foo";
analyzer.columns = new InputColumn<?>[2];
analyzer.columns[0] = stringColumn;
analyzer.columns[1] = integerColumn;
analyzer.fields = new String[2];
analyzer.fields[0] = "CustomNameForStringColumn";
analyzer.fields[1] = "CustomNameForIntegerColumn";
analyzer.init();
final InputRow[] rows = new InputRow[13];
rows[0] = new MockInputRow().put(stringColumn, "row00").put(integerColumn, 7);
rows[1] = new MockInputRow().put(stringColumn, "row01").put(integerColumn, 9);
rows[2] = new MockInputRow().put(stringColumn, "row02").put(integerColumn, 2);
rows[3] = new MockInputRow().put(stringColumn, "row03").put(integerColumn, 3);
rows[4] = new MockInputRow().put(stringColumn, "row04").put(integerColumn, 4);
rows[5] = new MockInputRow().put(stringColumn, "row05").put(integerColumn, 12);
rows[6] = new MockInputRow().put(stringColumn, "row06").put(integerColumn, 6);
rows[7] = new MockInputRow().put(stringColumn, "row07").put(integerColumn, 0);
rows[8] = new MockInputRow().put(stringColumn, "row08").put(integerColumn, 8);
rows[9] = new MockInputRow().put(stringColumn, "row09").put(integerColumn, 1);
rows[10] = new MockInputRow().put(stringColumn, "row10").put(integerColumn, 10);
rows[11] = new MockInputRow().put(stringColumn, "row11").put(integerColumn, 11);
rows[12] = new MockInputRow().put(stringColumn, "row12").put(integerColumn, 5);
for (int i = 0; i < rows.length; i++) {
analyzer.run(rows[i], i);
}
analyzer.getResult();
final ExcelDatastore outputDatastore =
new ExcelDatastore(filename, new FileResource(analyzer.file), analyzer.file.getAbsolutePath());
try (UpdateableDatastoreConnection outputDatastoreConnection = outputDatastore.openConnection()) {
final String[] columnNames =
outputDatastoreConnection.getSchemaNavigator().getDefaultSchema().getTableByName(analyzer.sheetName)
.getColumnNames();
assertEquals(2, columnNames.length);
assertEquals("CustomNameForStringColumn", columnNames[0]);
assertEquals("CustomNameForIntegerColumn", columnNames[1]);
}
}
}