/** * DataCleaner (community edition) * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.datacleaner.spark; import org.apache.metamodel.csv.CsvConfiguration; import org.apache.metamodel.util.FileHelper; import org.apache.metamodel.util.FileResource; import org.apache.metamodel.util.HdfsResource; import org.datacleaner.connection.CsvDatastore; import org.datacleaner.connection.ExcelDatastore; import org.datacleaner.connection.FixedWidthDatastore; import org.datacleaner.connection.JsonDatastore; import org.datacleaner.connection.Neo4jDatastore; import org.datacleaner.spark.utils.HadoopJobExecutionUtils; import org.junit.Test; import junit.framework.TestCase; public class HadoopConfigurationUtilsTest extends TestCase { @Test public void testCsvMultilines() { final HdfsResource hdfsResource = new HdfsResource("hdfs://datacleaner/files/test.csv"); final CsvDatastore csvDatastore = new CsvDatastore("MyCsv", hdfsResource); assertTrue(csvDatastore.getCsvConfiguration().isMultilineValues()); assertEquals(FileHelper.UTF_8_ENCODING, csvDatastore.getEncoding()); assertFalse(HadoopJobExecutionUtils.isValidSourceDatastore(csvDatastore)); } @Test public void testCsvSingleLine() { final HdfsResource hdfsResource = new HdfsResource("hdfs://datacleaner/files/test.csv"); final CsvConfiguration csvConfiguration = new CsvConfiguration(0, true, false); final CsvDatastore csvDatastore = new CsvDatastore("MyCsv", hdfsResource, csvConfiguration); assertFalse(csvDatastore.getCsvConfiguration().isMultilineValues()); assertEquals(FileHelper.UTF_8_ENCODING, csvDatastore.getEncoding()); assertTrue(HadoopJobExecutionUtils.isValidSourceDatastore(csvDatastore)); } @Test public void testJson() { final HdfsResource hdfsResource = new HdfsResource("hdfs://datacleaner/files/test.csv"); final JsonDatastore jsonDatastore = new JsonDatastore("test", hdfsResource); assertTrue(HadoopJobExecutionUtils.isValidSourceDatastore(jsonDatastore)); } @Test public void testInvalidDatastore() { final ExcelDatastore excelDatastore = new ExcelDatastore("MyTest", new FileResource("C://test"), "Test"); assertFalse(HadoopJobExecutionUtils.isValidSourceDatastore(excelDatastore)); final Neo4jDatastore neo4jDatastore = new Neo4jDatastore("neo", "localhost", "me", "password"); assertFalse(HadoopJobExecutionUtils.isValidSourceDatastore(neo4jDatastore)); } public void testFixedWidthDatastore() { final HdfsResource hdfsResource = new HdfsResource("hdfs://datacleaner/employees-fixed-width.txt"); final int[] widths = new int[] { 19, 22 }; final FixedWidthDatastore datastore = new FixedWidthDatastore("My datastore", hdfsResource, hdfsResource.getFilepath(), "UTF-8", widths, false, false, false, 0, null); assertTrue(HadoopJobExecutionUtils.isValidSourceDatastore(datastore)); } }