/* * Licensed to CRATE Technology GmbH ("Crate") under one or more contributor * license agreements. See the NOTICE file distributed with this work for * additional information regarding copyright ownership. Crate licenses * this file to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * However, if you have executed another commercial license agreement * with Crate these terms will supersede the license and you may use the * software solely pursuant to the terms of the relevant commercial agreement. */ package io.crate.integrationtests; import com.carrotsearch.randomizedtesting.LifecycleScope; import com.google.common.collect.ImmutableList; import io.crate.action.sql.SQLActionException; import io.crate.testing.SQLResponse; import io.crate.testing.TestingHelpers; import org.elasticsearch.test.ESIntegTestCase; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Locale; import static com.carrotsearch.randomizedtesting.RandomizedTest.newTempDir; import static org.hamcrest.Matchers.*; import static org.hamcrest.core.Is.is; @ESIntegTestCase.ClusterScope(numDataNodes = 2, randomDynamicTemplates = false) public class CopyIntegrationTest extends SQLHttpIntegrationTest { private final String copyFilePath = Paths.get(getClass().getResource("/essetup/data/copy").toURI()).toUri().toString(); private final String nestedArrayCopyFilePath = Paths.get(getClass().getResource("/essetup/data/nested_array").toURI()).toUri().toString(); private Setup setup = new Setup(sqlExecutor); @Rule public TemporaryFolder folder = new TemporaryFolder(); public CopyIntegrationTest() throws URISyntaxException { } @Test public void testCopyFromFile() throws Exception { execute("create table quotes (id int primary key, " + "quote string index using fulltext) with (number_of_replicas = 0)"); ensureYellow(); execute("copy quotes from ?", new Object[]{copyFilePath + "test_copy_from.json"}); assertEquals(3L, response.rowCount()); refresh(); execute("select * from quotes"); assertEquals(3L, response.rowCount()); assertThat(response.rows()[0].length, is(2)); execute("select quote from quotes where id = 1"); assertThat(response.rows()[0][0], is("Don't pa\u00f1ic.")); } @Test public void testCopyFromFileWithUmlautsWhitespacesAndGlobs() throws Exception { execute("create table t (id int primary key, name string) clustered into 1 shards with (number_of_replicas = 0)"); File tmpFolder = folder.newFolder("äwesöme földer"); File file = new File(tmpFolder, "süpär.json"); List<String> lines = Collections.singletonList("{\"id\": 1, \"name\": \"Arthur\"}"); Files.write(file.toPath(), lines, StandardCharsets.UTF_8); execute("copy t from ?", new Object[]{Paths.get(tmpFolder.toURI()).toUri().toString() + "s*.json"}); assertThat(response.rowCount(), is(1L)); } @Test public void testCopyFromWithOverwriteDuplicates() throws Exception { execute("create table t (id int primary key) with (number_of_replicas = 0)"); ensureYellow(); execute("insert into t (id) values (?)", new Object[][]{ new Object[]{1}, new Object[]{2}, new Object[]{3}, new Object[]{4} }); execute("refresh table t"); File tmpExport = folder.newFolder("tmpExport"); String uriTemplate = Paths.get(tmpExport.toURI()).toUri().toString(); execute("copy t to directory ?", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(4L)); execute("copy t from ?", new Object[]{uriTemplate + "*"}); assertThat(response.rowCount(), is(0L)); execute("copy t from ? with (overwrite_duplicates = true, shared=true)", new Object[]{uriTemplate + "*"}); assertThat(response.rowCount(), is(4L)); execute("refresh table t"); execute("select count(*) from t"); assertThat(((Long) response.rows()[0][0]), is(4L)); } @Test public void testCopyFromFileWithoutPK() throws Exception { execute("create table quotes (id int, " + "quote string index using fulltext) with (number_of_replicas=0)"); ensureYellow(); execute("copy quotes from ?", new Object[]{copyFilePath + "test_copy_from.json"}); assertEquals(6L, response.rowCount()); refresh(); execute("select * from quotes"); assertEquals(6L, response.rowCount()); assertThat(response.rows()[0].length, is(2)); } @Test public void testCopyFromDirectory() throws Exception { execute("create table quotes (id int primary key, " + "quote string index using fulltext) with (number_of_replicas=0)"); ensureYellow(); execute("copy quotes from ? with (shared=true)", new Object[]{copyFilePath + "test_copy_from.json"}); assertEquals(3L, response.rowCount()); refresh(); execute("select * from quotes"); assertEquals(3L, response.rowCount()); } @Test public void testCopyFromFilePattern() throws Exception { execute("create table quotes (id int primary key, " + "quote string index using fulltext) with (number_of_replicas=0)"); ensureYellow(); execute("copy quotes from ?", new Object[]{copyFilePath + "test_copy_from.json"}); assertEquals(3L, response.rowCount()); refresh(); execute("select * from quotes"); assertEquals(3L, response.rowCount()); } @Test public void testCopyFromFileWithEmptyLine() throws Exception { execute("create table foo (id integer primary key) clustered into 1 shards with (number_of_replicas=0)"); ensureYellow(); File newFile = folder.newFile(); try (OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(newFile), StandardCharsets.UTF_8)) { writer.write("{\"id\":1}\n"); writer.write("\n"); writer.write("{\"id\":2}\n"); } execute("copy foo from ?", new Object[]{Paths.get(newFile.toURI()).toUri().toString()}); assertEquals(2L, response.rowCount()); refresh(); execute("select * from foo order by id"); assertThat(response.rows()[0][0], is(1)); assertThat(response.rows()[1][0], is(2)); } @Test public void testCopyFromInvalidJson() throws Exception { execute("create table foo (id integer primary key) clustered into 1 shards with (number_of_replicas=0)"); ensureYellow(); File newFile = folder.newFile(); try (OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(newFile), StandardCharsets.UTF_8)) { writer.write("{|}"); } expectedException.expect(SQLActionException.class); expectedException.expectMessage("Failed to parse JSON in line: 1 in file:"); execute("copy foo from ?", new Object[]{Paths.get(newFile.toURI()).toUri().toString()}); } @Test public void testCopyFromFileWithPartition() throws Exception { execute("create table quotes (id int, " + "quote string) partitioned by (id)"); ensureGreen(); execute("copy quotes partition (id = 1) from ? with (shared=true)", new Object[]{ copyFilePath + "test_copy_from.json"}); refresh(); execute("select * from quotes"); assertEquals(3L, response.rowCount()); } @Test public void testCopyFromFileWithCompression() throws Exception { execute("create table quotes (id int, " + "quote string)"); ensureGreen(); execute("copy quotes from ? with (compression='gzip')", new Object[]{copyFilePath + "test_copy_from.gz"}); refresh(); execute("select * from quotes"); assertEquals(6L, response.rowCount()); } @Test public void testCopyFromWithGeneratedColumn() throws Exception { execute("create table quotes (" + " id int," + " quote string," + " gen_quote as concat(quote, ' This is awesome!')" + ")"); ensureYellow(); execute("copy quotes from ? with (shared=true)", new Object[]{copyFilePath + "test_copy_from.json"}); refresh(); execute("select gen_quote from quotes limit 1"); assertThat((String) response.rows()[0][0], endsWith("This is awesome!")); } @Test public void testCopyFromWithInvalidGivenGeneratedColumn() throws Exception { execute("create table quotes (" + " id int," + " quote as cast(id as string)" + ")"); ensureYellow(); execute("copy quotes from ? with (shared=true)", new Object[]{copyFilePath + "test_copy_from.json"}); assertThat(response.rowCount(), is(3L)); refresh(); // quote is not generated through expression but read from source without validation execute("select quote from quotes order by id limit 1"); assertThat((String) response.rows()[0][0], is("Don't pañic.")); } @Test public void testCopyFromToPartitionedTableWithGeneratedColumn() throws Exception { execute("create table quotes (" + " id int," + " quote string," + " gen_quote as concat(quote, ' Partitioned by awesomeness!')" + ") partitioned by (gen_quote)"); ensureYellow(); execute("copy quotes from ? with (shared=true)", new Object[]{copyFilePath + "test_copy_from.json"}); refresh(); execute("select gen_quote from quotes limit 1"); assertThat((String) response.rows()[0][0], endsWith("Partitioned by awesomeness!")); } @Test public void testCopyFromToPartitionedTableWithNullValue() throws Exception { execute("CREATE TABLE times (" + " time timestamp" + ") partitioned by (time)"); ensureYellow(); execute("copy times from ? with (shared=true)", new Object[]{copyFilePath + "test_copy_from_null_value.json"}); refresh(); execute("select time from times"); assertThat(response.rowCount(), is(1L)); assertNull(response.rows()[0][0]); } @Test public void testCopyFromIntoPartitionWithInvalidGivenGeneratedColumnAsPartitionKey() throws Exception { // test that rows are imported into defined partition even that the partition value does not match the // generated column expression value execute("create table quotes (" + " id int," + " quote string," + " id_str as cast(id+1 as string)" + ") partitioned by (id_str)"); ensureYellow(); execute("copy quotes partition (id_str = 1) from ? with (shared=true)", new Object[]{ copyFilePath + "test_copy_from.json"}); assertThat(response.rowCount(), is(3L)); refresh(); execute("select * from quotes where id_str = 1"); assertThat(response.rowCount(), is(3L)); } @Test public void testCopyToFile() throws Exception { expectedException.expect(SQLActionException.class); expectedException.expectMessage(containsString("Using COPY TO without specifying a DIRECTORY is not supported")); execute("create table singleshard (name string) clustered into 1 shards with (number_of_replicas = 0)"); ensureYellow(); execute("copy singleshard to '/tmp/file.json'"); } @Test public void testCopyToDirectory() throws Exception { this.setup.groupBySetup(); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy characters to DIRECTORY ?", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(7L)); String[] list = folder.getRoot().list(); assertThat(list, is(notNullValue())); assertThat(list.length, greaterThanOrEqualTo(1)); for (String file : list) { assertThat(file, startsWith("characters_")); } List<String> lines = new ArrayList<>(7); DirectoryStream<Path> stream = Files.newDirectoryStream(Paths.get(folder.getRoot().toURI()), "*.json"); for (Path path : stream) { lines.addAll(Files.readAllLines(path, StandardCharsets.UTF_8)); } assertThat(lines.size(), is(7)); for (String line : lines) { assertThat(line, startsWith("{")); assertThat(line, endsWith("}")); } } @Test public void testCopyToWithCompression() throws Exception { execute("create table singleshard (name string) clustered into 1 shards with (number_of_replicas = 0)"); ensureYellow(); execute("insert into singleshard (name) values ('foo')"); execute("refresh table singleshard"); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy singleshard to DIRECTORY ? with (compression='gzip')", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(1L)); String[] list = folder.getRoot().list(); assertThat(list, is(notNullValue())); assertThat(list.length, is(1)); String file = list[0]; assertThat(file, both(startsWith("singleshard_")).and(endsWith(".json.gz"))); long size = Files.size(Paths.get(folder.getRoot().toURI().resolve(file))); assertThat(size, is(35L)); } @Test public void testCopyColumnsToDirectory() throws Exception { this.setup.groupBySetup(); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy characters (name, details['job']) to DIRECTORY ?", new Object[]{uriTemplate}); assertThat(response.cols().length, is(0)); assertThat(response.rowCount(), is(7L)); List<String> lines = new ArrayList<>(7); DirectoryStream<Path> stream = Files.newDirectoryStream(Paths.get(folder.getRoot().toURI()), "*.json"); for (Path entry : stream) { lines.addAll(Files.readAllLines(entry, StandardCharsets.UTF_8)); } Path path = Paths.get(folder.getRoot().toURI().resolve("characters_0_.json")); assertTrue(path.toFile().exists()); assertThat(lines.size(), is(7)); boolean foundJob = false; boolean foundName = false; for (String line : lines) { foundName = foundName || line.contains("Arthur Dent"); foundJob = foundJob || line.contains("Sandwitch Maker"); assertThat(line.split(",").length, is(2)); assertThat(line.trim(), startsWith("[")); assertThat(line.trim(), endsWith("]")); } assertTrue(foundJob); assertTrue(foundName); } @Test public void testCopyToFileColumnsJsonObjectOutput() throws Exception { execute("create table singleshard (name string, test object as (foo string)) clustered into 1 shards with (number_of_replicas = 0)"); ensureYellow(); execute("insert into singleshard (name, test) values ('foobar', {foo='bar'})"); execute("refresh table singleshard"); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy singleshard (name, test['foo']) to DIRECTORY ? with (format='json_object')", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(1L)); String[] list = folder.getRoot().list(); assertThat(list, is(notNullValue())); assertThat(list.length, is(1)); List<String> lines = Files.readAllLines( Paths.get(folder.getRoot().toURI().resolve(list[0])), StandardCharsets.UTF_8); assertThat(lines.size(), is(1)); for (String line : lines) { assertThat(line, startsWith("{")); assertThat(line, endsWith("}")); } } @Test public void testCopyToWithWhere() throws Exception { this.setup.groupBySetup(); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy characters where gender = 'female' to DIRECTORY ?", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(2L)); } @Test public void testCopyToWithWhereNoMatch() throws Exception { this.setup.groupBySetup(); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy characters where gender = 'foo' to DIRECTORY ?", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(0L)); } @Test public void testCopyFromNestedArrayRow() throws Exception { // assert that rows with nested arrays aren't imported execute("create table users (id int, " + "name string) with (number_of_replicas=0)"); ensureYellow(); execute("copy users from ? with (shared=true)", new Object[]{ nestedArrayCopyFilePath + "nested_array_copy_from.json"}); assertEquals(1L, response.rowCount()); // only 1 document got inserted refresh(); execute("select * from users"); assertThat(response.rowCount(), is(1L)); assertThat(TestingHelpers.printedTable(response.rows()), is("2| Trillian\n")); } @Test public void testCopyToWithGeneratedColumn() throws Exception { execute("CREATE TABLE foo (\n" + "day TIMESTAMP GENERATED ALWAYS AS date_trunc('day', timestamp),\n" + "timestamp TIMESTAMP\n" + ")\n" + "PARTITIONED BY (day)"); ensureYellow(); execute("insert into foo ( timestamp) values (1454454000377)"); refresh(); String uriTemplate = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy foo to DIRECTORY ?", new Object[]{uriTemplate}); assertThat(response.rowCount(), is(1L)); } @Test public void testCopyFromWithRoutingInPK() throws Exception { execute("create table t (i int primary key, c string primary key, a int)" + " clustered by (c) with (number_of_replicas=0)"); ensureGreen(); execute("insert into t (i, c) values (1, 'clusteredbyvalue'), (2, 'clusteredbyvalue')"); refresh(); String uri = Paths.get(folder.getRoot().toURI()).toUri().toString(); SQLResponse response = execute("copy t to directory ?", new Object[]{uri}); assertThat(response.rowCount(), is(2L)); execute("delete from t"); refresh(); execute("copy t from ? with (shared=true)", new Object[]{uri + "t_*"}); refresh(); // only one shard should have all imported rows, since we have the same routing for both rows response = execute("select count(*) from sys.shards where num_docs>0 and table_name='t'"); assertThat(response.rows()[0][0], is(1L)); } @Test public void testCopyFromTwoHttpUrls() throws Exception { execute("create blob table blobs with (number_of_replicas = 0)"); execute("create table names (id int primary key, name string) with (number_of_replicas = 0)"); ensureYellow(); String r1 = "{\"id\": 1, \"name\":\"Marvin\"}"; String r2 = "{\"id\": 2, \"name\":\"Slartibartfast\"}"; String[] urls = {upload("blobs", r1), upload("blobs", r2)}; execute("copy names from ?", new Object[]{urls}); assertThat(response.rowCount(), is(2L)); execute("refresh table names"); execute("select name from names order by id"); assertThat(TestingHelpers.printedTable(response.rows()), is("Marvin\nSlartibartfast\n")); } @Test public void testCopyFromTwoUriMixedSchemaAndWildcardUse() throws Exception { execute("create blob table blobs with (number_of_replicas = 0)"); execute("create table names (id int primary key, name string) with (number_of_replicas = 0)"); Path tmpDir = newTempDir(LifecycleScope.TEST); File file = new File(tmpDir.toFile(), "names.json"); String r1 = "{\"id\": 1, \"name\": \"Arthur\"}"; String r2 = "{\"id\": 2, \"name\":\"Slartibartfast\"}"; Files.write(file.toPath(), Collections.singletonList(r1), StandardCharsets.UTF_8); String[] urls = {tmpDir.toUri().toString() + "*.json", upload("blobs", r2)}; execute("copy names from ?", new Object[]{urls}); assertThat(response.rowCount(), is(2L)); execute("refresh table names"); execute("select name from names order by id"); assertThat(TestingHelpers.printedTable(response.rows()), is("Arthur\nSlartibartfast\n")); } @Test public void testCopyFromIntoTableWithClusterBy() throws Exception { execute("create table quotes (id int, quote string) " + "clustered by (id)" + "with (number_of_replicas = 0)"); ensureYellow(); execute("copy quotes from ? with (shared = true)", new Object[]{copyFilePath + "test_copy_from.json"}); assertEquals(3L, response.rowCount()); refresh(); execute("select quote from quotes where id = 2"); assertThat((String) response.rows()[0][0], containsString("lot of time")); } @Test public void testCopyFromIntoTableWithPkAndClusterBy() throws Exception { execute("create table quotes (id int primary key, quote string) " + "clustered by (id)" + "with (number_of_replicas = 0)"); ensureYellow(); execute("copy quotes from ?", new Object[]{copyFilePath + "test_copy_from.json"}); assertEquals(3L, response.rowCount()); refresh(); execute("select quote from quotes where id = 3"); assertThat((String) response.rows()[0][0], containsString("Time is an illusion.")); } private Path setUpTableAndSymlink(String tableName) throws IOException { execute(String.format(Locale.ENGLISH, "create table %s (a int) with (number_of_replicas = 0)", tableName)); Path tmpDir = newTempDir(LifecycleScope.TEST); Path target = Files.createDirectories(tmpDir.resolve("target")); File file = new File(target.toFile(), "integers.json"); String r1 = "{\"a\": 1}"; String r2 = "{\"a\": 2}"; String r3 = "{\"a\": 3}"; Files.write(file.toPath(), ImmutableList.of(r1, r2, r3), StandardCharsets.UTF_8); return Files.createSymbolicLink(tmpDir.resolve("link"), target); } @Test public void testCopyFromSymlinkFolderWithWildcard() throws Exception { Path link = setUpTableAndSymlink("t"); execute("copy t from ? with (shared=true)", new Object[]{ link.toUri().toString() + "*" }); assertThat(response.rowCount(), is(3L)); } @Test public void testCopyFromSymlinkFolderWithPrefixedWildcard() throws Exception { Path link = setUpTableAndSymlink("t"); execute("copy t from ? with (shared=true)", new Object[]{ link.toUri().toString() + "i*" }); assertThat(response.rowCount(), is(3L)); } @Test public void testCopyFromSymlinkFolderWithSuffixedWildcard() throws Exception { Path link = setUpTableAndSymlink("t"); execute("copy t from ? with (shared=true)", new Object[]{ link.toUri().toString() + "*.json" }); assertThat(response.rowCount(), is(3L)); } @Test public void testCopyFromFileInSymlinkFolder() throws Exception { Path link = setUpTableAndSymlink("t"); execute("copy t from ? with (shared=true)", new Object[]{ link.toUri().toString() + "integers.json" }); assertThat(response.rowCount(), is(3L)); } }