/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.configuration;
import static org.junit.Assert.*;
import java.io.File;
import java.net.URI;
import java.util.Arrays;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.metamodel.schema.TableType;
import org.apache.metamodel.util.ClasspathResource;
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.FileResource;
import org.apache.metamodel.util.Resource;
import org.datacleaner.connection.CouchDbDatastore;
import org.datacleaner.connection.CsvDatastore;
import org.datacleaner.connection.DataHubDatastore;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.ExcelDatastore;
import org.datacleaner.connection.FixedWidthDatastore;
import org.datacleaner.connection.JdbcDatastore;
import org.datacleaner.connection.JsonDatastore;
import org.datacleaner.connection.MongoDbDatastore;
import org.datacleaner.connection.SalesforceDatastore;
import org.datacleaner.metamodel.datahub.DataHubSecurityMode;
import org.datacleaner.reference.DatastoreDictionary;
import org.datacleaner.reference.DatastoreSynonymCatalog;
import org.datacleaner.reference.RegexStringPattern;
import org.datacleaner.reference.SimpleDictionary;
import org.datacleaner.reference.SimpleStringPattern;
import org.datacleaner.reference.TextFileDictionary;
import org.datacleaner.reference.TextFileSynonymCatalog;
import org.datacleaner.server.DirectConnectionHadoopClusterInformation;
import org.datacleaner.server.DirectoryBasedHadoopClusterInformation;
import org.datacleaner.server.EnvironmentBasedHadoopClusterInformation;
import org.datacleaner.test.MockHadoopConfigHelper;
import org.datacleaner.util.HadoopResource;
import org.datacleaner.util.xml.XmlUtils;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.junit.rules.TestName;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class DomConfigurationWriterTest {
private static final String PASSWORD_ENCODED = "enc:00em6E9KEO9FG42CH0yrVQ==";
@Rule
public TemporaryFolder _temporaryFolder = new TemporaryFolder();
@Rule
public TestName testName = new TestName();
private DomConfigurationWriter configurationWriter;
@Before
public void setUp() throws Exception {
configurationWriter = new DomConfigurationWriter();
}
@Test
public void testExternalizeCsvDatastore() throws Exception {
final CsvDatastore ds = new CsvDatastore("foo", "foo.txt");
ds.setDescription("bar");
final Element elem = configurationWriter.toElement(ds, "baz.txt");
final String str = transform(elem);
assertEquals("<csv-datastore description=\"bar\" name=\"foo\">\n" + " <filename>baz.txt</filename>\n"
+ " <quote-char>\"</quote-char>\n" + " <separator-char>,</separator-char>\n"
+ " <escape-char>\\</escape-char>\n" + " <encoding>UTF-8</encoding>\n"
+ " <fail-on-inconsistencies>true</fail-on-inconsistencies>\n"
+ " <multiline-values>true</multiline-values>\n" + " <header-line-number>1</header-line-number>\n"
+ "</csv-datastore>\n", str);
}
@Test
public void testExternalizeExcelDatastore() throws Exception {
final ExcelDatastore ds = new ExcelDatastore("foo", new FileResource("foo.txt"), "foo.txt");
ds.setDescription("bar");
final Element elem = configurationWriter.toElement(ds, "baz.txt");
final String str = transform(elem);
assertEquals(
"<excel-datastore description=\"bar\" name=\"foo\">\n <filename>baz.txt</filename>\n</excel-datastore>\n",
str);
}
@Test
public void testIsExternalizableAndExternalize() throws Exception {
final Resource resource = new ClasspathResource("foo.txt");
final CsvDatastore unsupportedDatastore = new CsvDatastore("foo", resource);
assertFalse(configurationWriter.isExternalizable(unsupportedDatastore));
try {
configurationWriter.externalize(unsupportedDatastore);
fail("Exception expected");
} catch (final UnsupportedOperationException e) {
assertEquals("Unsupported resource type: ClasspathResource[foo.txt]", e.getMessage());
}
final CsvDatastore datastore1 = new CsvDatastore("foo", "src/test/resources/example-dates.csv");
assertTrue(configurationWriter.isExternalizable(datastore1));
final Element elem = configurationWriter.externalize(datastore1);
final String str = transform(elem);
final char sep = File.separatorChar;
assertEquals("<csv-datastore name=\"foo\">\n" + " <filename>src" + sep + "test" + sep + "resources" + sep
+ "example-dates.csv</filename>\n <quote-char>\"</quote-char>\n"
+ " <separator-char>,</separator-char>\n <escape-char>\\</escape-char>\n <encoding>UTF-8</encoding>\n"
+ " <fail-on-inconsistencies>true</fail-on-inconsistencies>\n"
+ " <multiline-values>true</multiline-values>\n" + " <header-line-number>1</header-line-number>\n"
+ "</csv-datastore>\n", str);
}
@Test
public void testExternalizeJdbcDatastore() throws Exception {
final JdbcDatastore datastore1 = new JdbcDatastore("foo ds 1", "jdbc:foo//bar", "foo.bar.Baz");
assertTrue(configurationWriter.isExternalizable(datastore1));
final String str1 = transform(configurationWriter.externalize(datastore1));
assertEquals("<jdbc-datastore name=\"foo ds 1\">\n" + " <url>jdbc:foo//bar</url>\n"
+ " <driver>foo.bar.Baz</driver>\n" + " <multiple-connections>true</multiple-connections>\n"
+ "</jdbc-datastore>\n", str1);
final JdbcDatastore datastore2 =
new JdbcDatastore("foo ds 2", "JNDI_URL", new TableType[] { TableType.VIEW, TableType.ALIAS },
"mycatalog");
assertTrue(configurationWriter.isExternalizable(datastore2));
final String str2 = transform(configurationWriter.externalize(datastore2));
assertEquals("<jdbc-datastore name=\"foo ds 2\">\n" + " <datasource-jndi-url>JNDI_URL</datasource-jndi-url>\n"
+ " <table-types>\n <table-type>VIEW</table-type>\n <table-type>ALIAS</table-type>\n </table-types>\n"
+ " <catalog-name>mycatalog</catalog-name>\n</jdbc-datastore>\n", str2);
final Element documentElement = configurationWriter.getDocument().getDocumentElement();
final NodeList jdbcDatastoreElements1 = documentElement.getElementsByTagName("jdbc-datastore");
assertEquals(2, jdbcDatastoreElements1.getLength());
boolean removed = configurationWriter.removeDatastore("foo ds");
assertFalse(removed);
removed = configurationWriter.removeDatastore("foo ds 1");
assertTrue(removed);
final NodeList jdbcDatastoreElements2 = documentElement.getElementsByTagName("jdbc-datastore");
assertEquals(1, jdbcDatastoreElements2.getLength());
}
@Test
public void testExternalizeJdbcDatastoreWithPassword() throws Exception {
final Datastore ds1 = new JdbcDatastore("name", "jdbcUrl", "driverClass", "username", "password", true,
new TableType[] { TableType.ALIAS }, "catalogName");
final Element externalized = configurationWriter.externalize(ds1);
assertEquals("<jdbc-datastore name=\"name\">\n" + " <url>jdbcUrl</url>\n" + " <driver>driverClass</driver>\n"
+ " <username>username</username>\n" + " <password>" + PASSWORD_ENCODED + "</password>\n"
+ " <multiple-connections>true</multiple-connections>\n" + " <table-types>\n"
+ " <table-type>ALIAS</table-type>\n" + " </table-types>\n"
+ " <catalog-name>catalogName</catalog-name>\n" + "</jdbc-datastore>\n", transform(externalized));
}
@Test
public void testExternalizeMongoDbDatastoreWithPassword() throws Exception {
final Datastore ds1 = new MongoDbDatastore("name", "hostname", 1234, "database", "user", "password");
final Element externalized = configurationWriter.externalize(ds1);
assertEquals("<mongodb-datastore name=\"name\">\n" + " <hostname>hostname</hostname>\n <port>1234</port>\n"
+ " <database-name>database</database-name>\n" + " <username>user</username>\n" + " <password>"
+ PASSWORD_ENCODED + "</password>\n</mongodb-datastore>\n", transform(externalized));
}
@Test
public void testExternalizeCouchDbDatastoreWithPassword() throws Exception {
final Datastore ds1 = new CouchDbDatastore("name", "hostname", 1234, "user", "password", true, null);
final Element externalized = configurationWriter.externalize(ds1);
assertEquals(
"<couchdb-datastore name=\"name\">\n" + " <hostname>hostname</hostname>\n" + " <port>1234</port>\n"
+ " <username>user</username>\n" + " <password>" + PASSWORD_ENCODED + "</password>\n"
+ " <ssl>true</ssl>\n" + "</couchdb-datastore>\n", transform(externalized));
}
@Test
public void testExternalizeSalesforceDatastoreWithPassword() throws Exception {
final Datastore ds1 = new SalesforceDatastore("name", "username", "password", "securityToken");
final Element externalized = configurationWriter.externalize(ds1);
assertEquals("<salesforce-datastore name=\"name\">\n" + " <username>username</username>\n" + " <password>"
+ PASSWORD_ENCODED + "</password>\n"
+ " <security-token>securityToken</security-token>\n</salesforce-datastore>\n",
transform(externalized));
}
@Test
public void testExternalizeDataHubDatastoreWithPassword() throws Exception {
final Datastore datastore = new DataHubDatastore("name", "hostname", 1234, "user", "password", false, false,
DataHubSecurityMode.DEFAULT);
final Element externalized = configurationWriter.externalize(datastore);
final StringBuilder expectedConfiguration = new StringBuilder();
// @formatter:off
expectedConfiguration
.append("<datahub-datastore name=\"name\">\n")
.append(" <host>hostname</host>\n")
.append(" <port>1234</port>\n")
.append(" <username>user</username>\n")
.append(" <password>" + PASSWORD_ENCODED + "</password>\n")
.append(" <https>false</https>\n")
.append(" <acceptunverifiedsslpeers>false</acceptunverifiedsslpeers>\n")
.append(" <datahubsecuritymode>DEFAULT</datahubsecuritymode>\n")
.append("</datahub-datastore>\n");
// @formatter:on
assertEquals(expectedConfiguration.toString(), transform(externalized));
}
@Test
public void testExternalizeJsonDatastore() throws Exception {
final JsonDatastore jsonDatastore = new JsonDatastore("my Json", new FileResource("c:/test/json.json"));
jsonDatastore.setDescription("My Json datastore");
final Element elem = configurationWriter.toElement(jsonDatastore, "json.json");
final String str = transform(elem);
assertEquals("<json-datastore description=\"My Json datastore\" name=\"my Json\">\n"
+ " <filename>json.json</filename>\n" + "</json-datastore>\n", str);
}
@Test
public void testWriteAndReadAllDictionaries() throws Exception {
configurationWriter.externalize(new SimpleDictionary("simple dict", false, "foo", "bar", "baz"));
configurationWriter.externalize(new TextFileDictionary("textfile dict", "/foo/bar.txt", "UTF8", false));
configurationWriter.externalize(new DatastoreDictionary("ds dict", "orderdb", "products.productname", false));
final String str = transform(configurationWriter.getDocument());
final File file = new File("target/" + getClass().getSimpleName() + "-" + testName.getMethodName() + ".xml");
FileHelper.writeStringAsFile(file, str);
final DataCleanerConfiguration configuration = new JaxbConfigurationReader().create(file);
assertEquals("[ds dict, simple dict, textfile dict]",
Arrays.toString(configuration.getReferenceDataCatalog().getDictionaryNames()));
final SimpleDictionary simpleDictionary =
(SimpleDictionary) configuration.getReferenceDataCatalog().getDictionary("simple dict");
assertEquals(false, simpleDictionary.isCaseSensitive());
assertEquals("[bar, baz, foo]",
simpleDictionary.getValueSet().stream().sorted().collect(Collectors.toList()).toString());
final TextFileDictionary textFileDictionary =
(TextFileDictionary) configuration.getReferenceDataCatalog().getDictionary("textfile dict");
assertEquals(false, textFileDictionary.isCaseSensitive());
assertEquals("UTF8", textFileDictionary.getEncoding());
assertTrue(textFileDictionary.getFilename().endsWith("bar.txt"));
final DatastoreDictionary datastoreDictionary =
(DatastoreDictionary) configuration.getReferenceDataCatalog().getDictionary("ds dict");
assertEquals(false, datastoreDictionary.isLoadIntoMemory());
assertEquals("orderdb", datastoreDictionary.getDatastoreName());
assertEquals("products.productname", datastoreDictionary.getQualifiedColumnName());
}
@Test
public void testWriteAndReadAllSynonymCatalogs() throws Exception {
configurationWriter.externalize(new TextFileSynonymCatalog("textfile sc", "/foo/bar.txt", false, "UTF8"));
configurationWriter.externalize(new DatastoreSynonymCatalog("ds sc", "orderdb", "products.productname",
new String[] { "products.productline", "product.producttype" }, false));
final String str = transform(configurationWriter.getDocument());
final File file = new File("target/" + getClass().getSimpleName() + "-" + testName.getMethodName() + ".xml");
FileHelper.writeStringAsFile(file, str);
final DataCleanerConfiguration configuration = new JaxbConfigurationReader().create(file);
assertEquals("[ds sc, textfile sc]",
Arrays.toString(configuration.getReferenceDataCatalog().getSynonymCatalogNames()));
final TextFileSynonymCatalog textFileSynonymCatalog =
(TextFileSynonymCatalog) configuration.getReferenceDataCatalog().getSynonymCatalog("textfile sc");
assertEquals("UTF8", textFileSynonymCatalog.getEncoding());
assertTrue(textFileSynonymCatalog.getFilename().endsWith("bar.txt"));
final DatastoreSynonymCatalog datastoreSynonymCatalog =
(DatastoreSynonymCatalog) configuration.getReferenceDataCatalog().getSynonymCatalog("ds sc");
assertEquals(false, datastoreSynonymCatalog.isLoadIntoMemory());
assertEquals("orderdb", datastoreSynonymCatalog.getDatastoreName());
assertEquals("products.productname", datastoreSynonymCatalog.getMasterTermColumnPath());
assertEquals("[products.productline, product.producttype]",
Arrays.toString(datastoreSynonymCatalog.getSynonymColumnPaths()));
}
@Test
public void testWriteAndReadAllServers() throws Exception {
configurationWriter
.externalize(new EnvironmentBasedHadoopClusterInformation("environment", "Environment-based cluster"));
configurationWriter.externalize(
new DirectoryBasedHadoopClusterInformation("directory", "Directory-based cluster", "C:\\Users\\Test",
"file:///C:/Users/Test2"));
configurationWriter.externalize(
new DirectConnectionHadoopClusterInformation("namenode", "Namenode-based cluster",
URI.create("hdfs://localhost:8020/")));
final String str = transform(configurationWriter.getDocument());
// "Default" hadoop cluster reference should never be written
assertFalse(str.contains(HadoopResource.DEFAULT_CLUSTERREFERENCE));
final File file = new File("target/" + getClass().getSimpleName() + "-" + testName.getMethodName() + ".xml");
FileHelper.writeStringAsFile(file, str);
final DataCleanerConfiguration configuration = new JaxbConfigurationReader().create(file);
final ServerInformationCatalog serverInformationCatalog = configuration.getServerInformationCatalog();
assertEquals("[directory, environment, namenode, org.datacleaner.hadoop.environment]",
Arrays.toString(serverInformationCatalog.getServerNames()));
assertNotNull(serverInformationCatalog.getServer("environment"));
final DirectoryBasedHadoopClusterInformation directoryBasedHadoopClusterInformation =
(DirectoryBasedHadoopClusterInformation) serverInformationCatalog.getServer("directory");
assertArrayEquals(new String[] { "C:\\Users\\Test", "file:///C:/Users/Test2" },
directoryBasedHadoopClusterInformation.getDirectories());
final DirectConnectionHadoopClusterInformation directConnectionHadoopClusterInformation =
(DirectConnectionHadoopClusterInformation) serverInformationCatalog.getServer("namenode");
assertEquals(URI.create("hdfs://localhost:8020/"), directConnectionHadoopClusterInformation.getNameNodeUri());
configurationWriter.removeHadoopClusterServerInformation("namenode");
final String str2 = transform(configurationWriter.getDocument());
assertFalse(str2, str2.contains("namenode"));
}
@Test
public void testWriteAndReadHadoopResourceDatastore() throws Exception {
final MockHadoopConfigHelper helper = new MockHadoopConfigHelper(_temporaryFolder);
helper.generateCoreFile();
// Prepare "environment"
try {
System.setProperty(EnvironmentBasedHadoopClusterInformation.HADOOP_CONF_DIR,
helper.getConfFolder().getAbsolutePath());
final HadoopResource hadoopResource =
new HadoopResource(URI.create("example-dates.csv"), new Configuration(),
HadoopResource.DEFAULT_CLUSTERREFERENCE);
configurationWriter.externalize(new CsvDatastore("csvDatastore", hadoopResource));
final String str = transform(configurationWriter.getDocument());
final File file =
new File("target/" + getClass().getSimpleName() + "-" + testName.getMethodName() + ".xml");
FileHelper.writeStringAsFile(file, str);
final DataCleanerConfiguration configuration = new JaxbConfigurationReader().create(file);
final CsvDatastore csvDatastore =
(CsvDatastore) configuration.getDatastoreCatalog().getDatastore("csvDatastore");
final HadoopResource resource = (HadoopResource) csvDatastore.getResource();
assertNotNull(resource);
assertEquals("example-dates.csv", resource.getFilepath());
assertEquals(helper.getPath(), resource.getHadoopConfiguration().get("fs.defaultFS"));
} finally {
System.clearProperty(EnvironmentBasedHadoopClusterInformation.HADOOP_CONF_DIR);
}
}
@Test
public void testWriteAndReadAllStringPatterns() throws Exception {
configurationWriter.externalize(new SimpleStringPattern("simple sp", "aaaa@aaaa.aaa"));
configurationWriter.externalize(new RegexStringPattern("regex pattern", ".*", false));
final String str = transform(configurationWriter.getDocument());
final File file = new File("target/" + getClass().getSimpleName() + "-" + testName.getMethodName() + ".xml");
FileHelper.writeStringAsFile(file, str);
final DataCleanerConfiguration configuration = new JaxbConfigurationReader().create(file);
assertEquals("[regex pattern, simple sp]",
Arrays.toString(configuration.getReferenceDataCatalog().getStringPatternNames()));
final SimpleStringPattern simpleStringPattern =
(SimpleStringPattern) configuration.getReferenceDataCatalog().getStringPattern("simple sp");
assertEquals("aaaa@aaaa.aaa", simpleStringPattern.getExpression());
final RegexStringPattern regexStringPattern =
(RegexStringPattern) configuration.getReferenceDataCatalog().getStringPattern("regex pattern");
assertEquals(".*", regexStringPattern.getExpression());
assertEquals(false, regexStringPattern.isMatchEntireString());
}
@Test
public void testWriteAndReadFixedWidthDatastore() throws Exception {
final FileResource fileResource = new FileResource("test.csv");
final FixedWidthDatastore fixedWidthDatastore =
new FixedWidthDatastore("my fixed width ds", fileResource, fileResource.getName(), "UTF-8", 20, false,
true, true, 1);
fixedWidthDatastore.setDescription("bar");
final Element externalized = configurationWriter.externalize(fixedWidthDatastore);
final String str = transform(externalized);
assertEquals("<fixed-width-datastore description=\"bar\" name=\"my fixed width ds\">\n"
+ " <filename>test.csv</filename>\n" + " <encoding>UTF-8</encoding>\n" + " <width-specification>\n"
+ " <fixed-value-width>20</fixed-value-width>\n" + " </width-specification>\n"
+ " <header-line-number>1</header-line-number>\n"
+ " <fail-on-inconsistencies>false</fail-on-inconsistencies>\n"
+ " <skip-ebcdic-header>true</skip-ebcdic-header>\n" + " <eol-present>true</eol-present>\n"
+ "</fixed-width-datastore>\n", str);
final FixedWidthDatastore fixedWidthDatastore2 =
new FixedWidthDatastore("my fixed width ds 2", fileResource, fileResource.getName(), "UTF-8",
new int[] { 19, 22 }, false, false, true, 1, null);
fixedWidthDatastore2.setDescription("bar");
final Element externalized2 = configurationWriter.externalize(fixedWidthDatastore2);
final String str2 = transform(externalized2);
assertEquals("<fixed-width-datastore description=\"bar\" name=\"my fixed width ds 2\">\n"
+ " <filename>test.csv</filename>\n" + " <encoding>UTF-8</encoding>\n" + " <width-specification>\n"
+ " <value-width>19</value-width>\n" + " <value-width>22</value-width>\n"
+ " </width-specification>\n" + " <header-line-number>1</header-line-number>\n"
+ " <fail-on-inconsistencies>false</fail-on-inconsistencies>\n"
+ " <skip-ebcdic-header>false</skip-ebcdic-header>\n" + " <eol-present>true</eol-present>\n"
+ "</fixed-width-datastore>\n", str2);
}
private String transform(final Node elem) throws Exception {
return XmlUtils.writeDocumentToString(elem, false).replace("\r", "");
}
}