package org.embulk.standards.guess; import com.google.common.collect.ImmutableList; import org.embulk.config.ConfigDiff; import org.embulk.config.ConfigSource; import org.embulk.config.DataSource; import org.embulk.test.TestingEmbulk; import org.junit.Rule; import org.junit.Test; import java.io.IOException; import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertThat; public class TestCsvGuessPlugin { private static final String RESOURCE_NAME_PREFIX = "org/embulk/standards/guess/csv/test/"; @Rule public TestingEmbulk embulk = TestingEmbulk.builder() .build(); @Test public void testSimple() throws Exception { assertGuessByResource(embulk, "test_simple_seed.yml", "test_simple.csv", "test_simple_guessed.yml"); } @Test public void testFor1Rows() throws Exception { assertGuessByResource(embulk, "test_1_rows_seed.yml", "test_1_rows.csv", "test_1_rows_guessed.yml"); } @Test public void testFor1RowsWithTrimNeeded() throws Exception { assertGuessByResource(embulk, "test_1_rows_with_trim_needed_seed.yml", "test_1_rows_with_trim_needed.csv", "test_1_rows_with_trim_needed_guessed.yml"); } @Test public void testFor1RowsAndHeader() throws Exception { assertGuessByResource(embulk, "test_1_rows_and_header_seed.yml", "test_1_rows_and_header.csv", "test_1_rows_and_header_guessed.yml"); } @Test public void testFor1RowsAndHeaderWithTrimNeeded() throws Exception { assertGuessByResource(embulk, "test_1_rows_and_header_with_trim_needed_seed.yml", "test_1_rows_and_header_with_trim_needed.csv", "test_1_rows_and_header_with_trim_needed_guessed.yml"); } @Test public void testFor2Rows() throws Exception { assertGuessByResource(embulk, "test_2_rows_seed.yml", "test_2_rows.csv", "test_2_rows_guessed.yml"); } @Test public void testFor2RowsAndHeader() throws Exception { assertGuessByResource(embulk, "test_2_rows_and_header_seed.yml", "test_2_rows_and_header.csv", "test_2_rows_and_header_guessed.yml"); } @Test public void testFor1IntSingleColumnRow() throws Exception { assertGuessByResource(embulk, "test_1_int_single_column_row_seed.yml", "test_1_int_single_column_row.csv", "test_1_int_single_column_row_guessed.yml"); } @Test public void testFor1StringSingleColumnRow() throws Exception { assertGuessByResource(embulk, "test_1_string_single_column_row_seed.yml", "test_1_string_single_column_row.csv", "test_1_string_single_column_row_guessed.yml"); } @Test public void testFor2StringSingleColumnRows() throws Exception { assertGuessByResource(embulk, "test_2_string_single_column_rows_seed.yml", "test_2_string_single_column_rows.csv", "test_2_string_single_column_rows_guessed.yml"); } @Test public void testFor1StringSingleColumnAndHeader() throws Exception { assertGuessByResource(embulk, "test_1_string_single_column_row_and_header_seed.yml", "test_1_string_single_column_row_and_header.csv", "test_1_string_single_column_row_and_header_guessed.yml"); } @Test public void testFor2IntSingleColumnRows() throws Exception { assertGuessByResource(embulk, "test_2_int_single_column_rows_seed.yml", "test_2_int_single_column_rows.csv", "test_2_int_single_column_rows_guessed.yml"); } @Test public void testFor1IntSingleColumnAndHeader() throws Exception { assertGuessByResource(embulk, "test_1_int_single_column_row_and_header_seed.yml", "test_1_int_single_column_row_and_header.csv", "test_1_int_single_column_row_and_header_guessed.yml"); } @Test public void testIntSingleColumnWithHeader() throws Exception { assertGuessByResource(embulk, "test_int_single_column_with_header_seed.yml", "test_int_single_column_with_header.csv", "test_int_single_column_with_header_guessed.yml"); } @Test public void testIntSingleColumn() throws Exception { assertGuessByResource(embulk, "test_int_single_column_seed.yml", "test_int_single_column.csv", "test_int_single_column_guessed.yml"); } @Test public void testDoubleSingleColumn() throws Exception { assertGuessByResource(embulk, "test_double_single_column_seed.yml", "test_double_single_column.csv", "test_double_single_column_guessed.yml"); } @Test public void testStringSingleColumnWithHeader() throws Exception { assertGuessByResource(embulk, "test_string_single_column_with_header_seed.yml", "test_string_single_column_with_header.csv", "test_string_single_column_with_header_guessed.yml"); } @Test public void testStringSingleColumn() throws Exception { assertGuessByResource(embulk, "test_string_single_column_seed.yml", "test_string_single_column.csv", "test_string_single_column_guessed.yml"); } @Test public void suggestTabAsDelimiter() throws Exception { assertGuessByResource(embulk, "test_tab_delimiter_seed.yml", "test_tab_delimiter.csv", "test_tab_delimiter_guessed.yml"); } @Test public void suggestSemicolonAsDelimiter() throws Exception { assertGuessByResource(embulk, "test_semicolon_delimiter_seed.yml", "test_semicolon_delimiter.csv", "test_semicolon_delimiter_guessed.yml"); } @Test public void suggestSingleQuoteAsQuote() throws Exception { assertGuessByResource(embulk, "test_single_quote_seed.yml", "test_single_quote.csv", "test_single_quote_guessed.yml"); } @Test public void suggestBackslashAsEscape() throws Exception { assertGuessByResource(embulk, "test_backslash_escape_seed.yml", "test_backslash_escape.csv", "test_backslash_escape_guessed.yml"); } @Test public void skipSuggestIfEmptySampleRecords() throws Exception { // This test checks that the CSV guess doesn't suggest anything by invalid formatted CSV file. assertGuessByResource(embulk, "test_skip_suggest_if_empty_sample_records_seed.yml", "test_skip_suggest_if_empty_sample_records.csv", "test_skip_suggest_if_empty_sample_records_guessed.yml"); } static void assertGuessByResource(TestingEmbulk embulk, String seedYamlResourceName, String sourceCsvResourceName, String resultResourceName) throws IOException { ConfigSource seed = embulk.loadYamlResource(RESOURCE_NAME_PREFIX + seedYamlResourceName); ConfigDiff guessed = embulk.parserBuilder() .parser(seed) .exec(embulk.newConfig().set("exclude_guess_plugins", ImmutableList.of("json"))) .inputResource(RESOURCE_NAME_PREFIX + sourceCsvResourceName) .guess(); assertThat(guessed, is((DataSource) embulk.loadYamlResource(RESOURCE_NAME_PREFIX + resultResourceName))); } }