/*
* Copyright 2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.xd.integration.test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Collection;
import java.util.Iterator;
import java.util.UUID;
import org.apache.hadoop.fs.FileStatus;
import org.junit.After;
import org.junit.Before;
import org.junit.FixMethodOrder;
import org.junit.Test;
import org.junit.runners.MethodSorters;
import org.springframework.util.StringUtils;
import org.springframework.xd.test.fixtures.IncrementalJdbcHdfsJob;
import org.springframework.xd.test.fixtures.JdbcHdfsJob;
import org.springframework.xd.test.fixtures.JdbcSink;
import org.springframework.xd.test.fixtures.PartitionedJdbcHdfsJob;
/**
* Verifies that this job will read the specified table and place the results in the specified directory and file on
* hdfs.
*
* @author Glenn Renfro
*/
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
public class JdbcHdfsTest extends AbstractJobTest {
private final static String DEFAULT_TABLE_NAME = "jdbchdfstest";
private JdbcSink jdbcSink;
private String tableName;
private String jobName;
/**
* Removes the table created from a previous test. Also deletes the result hdfs directory.
*/
@Before
public void initialize() {
jobName = "jdbchdfs" + UUID.randomUUID().toString();
jdbcSink = sinks.jdbc();
tableName = DEFAULT_TABLE_NAME;
jdbcSink.tableName(tableName);
cleanup();
if (hadoopUtil.fileExists(JdbcHdfsJob.DEFAULT_DIRECTORY)) {
hadoopUtil.fileRemove(JdbcHdfsJob.DEFAULT_DIRECTORY);
}
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testJdbcHdfsJobWithSql() {
// Deploy stream and job.
String data = UUID.randomUUID().toString();
jdbcSink.getJdbcTemplate().getDataSource();
JdbcHdfsJob job = jobs.jdbcHdfsJob();
// Use a trigger to send data to JDBC
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
sources.httpSource("dataSender").postData(data);
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-0.csv";
assertPathsExists(path);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
Iterator<FileStatus> statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", data.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", data + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testJdbcHdfsJobWithColumnsAndTable() {
// Deploy stream and job.
String data = UUID.randomUUID().toString();
jdbcSink.getJdbcTemplate().getDataSource();
JdbcHdfsJob job = new JdbcHdfsJob(JdbcHdfsJob.DEFAULT_DIRECTORY, JdbcHdfsJob.DEFAULT_FILE_NAME, null, "payload", "jdbchdfstest");
// Use a trigger to send data to JDBC
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
sources.httpSource("dataSender").postData(data);
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-0.csv";
assertPathsExists(path);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
Iterator<FileStatus> statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", data.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", data + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testIncrementalJdbcHdfsJobWithColumnsAndTable() {
// Deploy stream and job.
jdbcSink.getJdbcTemplate().getDataSource();
IncrementalJdbcHdfsJob job = jobs.incrementalJdbcHdfsJob();
// Use a trigger to send data to JDBC
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
String file1Contents = StringUtils.arrayToDelimitedString(new String[] {"1", "2", "3"}, "\n");
sources.httpSource("dataSender").postData("1");
sources.httpSource("dataSender").postData("2");
sources.httpSource("dataSender").postData("3");
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0-0.csv";
assertPathsExists(path);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
Iterator<FileStatus> statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", file1Contents.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", file1Contents + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
String file2Contents = StringUtils.arrayToDelimitedString(new String[] {"4", "5", "6"}, "\n");
sources.httpSource("dataSender").postData("4");
sources.httpSource("dataSender").postData("5");
sources.httpSource("dataSender").postData("6");
jobLaunch(jobName);
waitForJobToComplete(jobName, 2);
// Evaluate the results of the test.
path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0-1.csv";
assertPathsExists(path);
fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", file2Contents.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", file2Contents + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testIncrementalJdbcHdfsJobWithColumnsAndTableNothingNew() {
// Deploy stream and job.
jdbcSink.getJdbcTemplate().getDataSource();
IncrementalJdbcHdfsJob job = jobs.incrementalJdbcHdfsJob();
// Use a trigger to send data to JDBC
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
String file1Contents = StringUtils.arrayToDelimitedString(new String[] {"1", "2", "3"}, "\n");
sources.httpSource("dataSender").postData("1");
sources.httpSource("dataSender").postData("2");
sources.httpSource("dataSender").postData("3");
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0-0.csv";
assertPathsExists(path);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
Iterator<FileStatus> statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", file1Contents.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", file1Contents + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
jobLaunch(jobName);
waitForJobToComplete(jobName, 2);
// Evaluate the results of the test.
String dir = JdbcHdfsJob.DEFAULT_DIRECTORY + "/";
path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0-0.csv";
assertPathsExists(path);
fileStatuses = hadoopUtil.listDir(dir);
assertEquals("The number of files in list result should only be 2. The directory and file itself. ", 2,
fileStatuses.size());
statuses = fileStatuses.iterator();
statuses.next();
assertEquals("File size should match the data size +1 for the //n", file1Contents.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", file1Contents + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testIncrementalJdbcHdfsJobWithColumnsAndTablePartitionDifferentFromCheck() {
// Deploy stream and job.
jdbcSink.getJdbcTemplate().getDataSource();
IncrementalJdbcHdfsJob job = new IncrementalJdbcHdfsJob(IncrementalJdbcHdfsJob.DEFAULT_DIRECTORY, IncrementalJdbcHdfsJob.DEFAULT_FILE_NAME, IncrementalJdbcHdfsJob.DEFAULT_TABLE, "payload,checkColumn", "payload", 3, "checkColumn", -1);
// Use a trigger to send data to JDBC
jdbcSink.columns("payload,checkColumn");
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
sources.httpSource("dataSender").postData("{\"payload\": 1, \"checkColumn\": 1}");
sources.httpSource("dataSender").postData("{\"payload\": 2, \"checkColumn\": 1}");
sources.httpSource("dataSender").postData("{\"payload\": 3, \"checkColumn\": 1}");
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String dir = JdbcHdfsJob.DEFAULT_DIRECTORY + "/";
String path0 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0" + "-0.csv";
String path1 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p1" + "-0.csv";
String path2 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p2" + "-0.csv";
assertPathsExists(path0, path1, path2);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(dir);
assertEquals("The number of files in list result should only be 4. The directory itself and 3 files. ", 4,
fileStatuses.size());
for (FileStatus fileStatus : fileStatuses) {
if (!fileStatus.isDirectory()) {
assertTrue("The file should be of reasonable size", fileStatus.getLen() > 2 && fileStatus.getLen() < 10);
}
}
sources.httpSource("dataSender").postData("{\"payload\": 4, \"checkColumn\": 2}");
sources.httpSource("dataSender").postData("{\"payload\": 5, \"checkColumn\": 2}");
sources.httpSource("dataSender").postData("{\"payload\": 6, \"checkColumn\": 2}");
jobLaunch(jobName);
waitForJobToComplete(jobName, 2);
// Evaluate the results of the test.
String path3 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0" + "-1.csv";
String path4 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p1" + "-1.csv";
String path5 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p2" + "-1.csv";
assertPathsExists(path3, path4, path5);
fileStatuses = hadoopUtil.listDir(dir);
assertEquals("The number of files in list result should only be 7. The directory itself and 5 files. ", 7,
fileStatuses.size());
for (FileStatus fileStatus : fileStatuses) {
if (!fileStatus.isDirectory()) {
assertTrue("The file should be of reasonable size", fileStatus.getLen() > 2 && fileStatus.getLen() < 10);
}
}
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testIncrementalJdbcHdfsJobWithOverride() {
// Deploy stream and job.
jdbcSink.getJdbcTemplate().getDataSource();
IncrementalJdbcHdfsJob job = jobs.incrementalJdbcHdfsJob();
// Use a trigger to send data to JDBC
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
String file1Contents = StringUtils.arrayToDelimitedString(new String[] {"1", "2", "3"}, "\n");
sources.httpSource("dataSender").postData("1");
sources.httpSource("dataSender").postData("2");
sources.httpSource("dataSender").postData("3");
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0-0.csv";
assertPathsExists(path);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
Iterator<FileStatus> statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", file1Contents.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", file1Contents + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
String file2Contents = StringUtils.arrayToDelimitedString(new String[] {"3", "4", "5", "6"}, "\n");
sources.httpSource("dataSender").postData("4");
sources.httpSource("dataSender").postData("5");
sources.httpSource("dataSender").postData("6");
launchJob(jobName, "{\"overrideCheckColumnValue\" : 2}");
waitForJobToComplete(jobName, 2);
// Evaluate the results of the test.
path = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0-1.csv";
assertPathsExists(path);
fileStatuses = hadoopUtil.listDir(path);
assertEquals("The number of files in list result should only be 1. The file itself. ", 1,
fileStatuses.size());
statuses = fileStatuses.iterator();
assertEquals("File size should match the data size +1 for the //n", file2Contents.length() + 1, statuses.next().getLen());
assertEquals("The data returned from hadoop was different than was sent. ", file2Contents + "\n",
hadoopUtil.getFileContentsFromHdfs(path));
}
/**
* Asserts that jdbcHdfsJob has written the test data from a jdbc source table to a hdfs file system.
*
*/
@Test
public void testPartitionedJdbcHdfsJobWithColumnsTable() {
// Deploy stream and job.
jdbcSink.columns(PartitionedJdbcHdfsJob.DEFAULT_COLUMN_NAMES);
String data0 = "{\"id\":1,\"name\":\"Sven\"}";
String data1 = "{\"id\":2,\"name\":\"Anna\"}";
String data2 = "{\"id\":3,\"name\":\"Nisse\"}";
jdbcSink.getJdbcTemplate().getDataSource();
PartitionedJdbcHdfsJob job = jobs.partitionedJdbcHdfsJob();
// Use a trigger to send data to JDBC
stream("dataSender", sources.http() + XD_DELIMITER
+ jdbcSink);
sources.httpSource("dataSender").postData(data0);
sources.httpSource("dataSender").postData(data1);
sources.httpSource("dataSender").postData(data2);
job(jobName, job.toDSL(), true);
jobLaunch(jobName);
waitForJobToComplete(jobName);
// Evaluate the results of the test.
String dir = JdbcHdfsJob.DEFAULT_DIRECTORY + "/";
String path0 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p0" + "-0.csv";
String path1 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p1" + "-0.csv";
String path2 = JdbcHdfsJob.DEFAULT_DIRECTORY + "/" + JdbcHdfsJob.DEFAULT_FILE_NAME + "-p2" + "-0.csv";
assertPathsExists(path0, path1, path2);
Collection<FileStatus> fileStatuses = hadoopUtil.listDir(dir);
assertEquals("The number of files should be 4. The directory and 3 files. ", 4, fileStatuses.size());
for (FileStatus fileStatus : fileStatuses) {
if (!fileStatus.isDirectory()) {
assertTrue("The file should be of reasonable size", fileStatus.getLen() > 5 && fileStatus.getLen() < 10);
}
}
}
private void assertPathsExists(String... paths) {
for (String path : paths) {
assertTrue(path + " is missing from hdfs",
hadoopUtil.waitForPath(WAIT_TIME, path));// wait up to 10 seconds for file to be closed
}
}
/**
* Being a good steward of the database remove the result table from the database.
*/
@After
public void cleanup() {
if (jdbcSink != null) {
jdbcSink.dropTable(tableName);
}
}
}