/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.explore.service;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.dataset.DatasetDefinition;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.DatasetSpecification;
import co.cask.cdap.api.dataset.table.Put;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.explore.client.ExploreExecutionResult;
import co.cask.cdap.proto.ColumnDesc;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.QueryHandle;
import co.cask.cdap.proto.QueryResult;
import co.cask.cdap.proto.QueryStatus;
import co.cask.cdap.test.SlowTests;
import co.cask.tephra.Transaction;
import co.cask.tephra.TransactionAware;
import com.google.common.collect.Lists;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TemporaryFolder;
import java.util.ArrayList;
import java.util.List;
/**
* Tests exploration of Tables.
*/
@Category(SlowTests.class)
public class HiveExploreTableTestRun extends BaseHiveExploreServiceTest {
@ClassRule
public static TemporaryFolder tmpFolder = new TemporaryFolder();
@BeforeClass
public static void start() throws Exception {
initialize(tmpFolder);
Schema schema = Schema.recordOf(
"record",
Schema.Field.of("bool_field", Schema.nullableOf(Schema.of(Schema.Type.BOOLEAN))),
Schema.Field.of("int_field", Schema.nullableOf(Schema.of(Schema.Type.INT))),
Schema.Field.of("long_field", Schema.nullableOf(Schema.of(Schema.Type.LONG))),
Schema.Field.of("float_field", Schema.nullableOf(Schema.of(Schema.Type.FLOAT))),
Schema.Field.of("double_field", Schema.nullableOf(Schema.of(Schema.Type.DOUBLE))),
Schema.Field.of("bytes_field", Schema.nullableOf(Schema.of(Schema.Type.BYTES))),
Schema.Field.of("string_field", Schema.of(Schema.Type.STRING))
);
datasetFramework.addInstance(Table.class.getName(), MY_TABLE, DatasetProperties.builder()
.add(Table.PROPERTY_SCHEMA, schema.toString())
.add(Table.PROPERTY_SCHEMA_ROW_FIELD, "string_field")
.build());
// Accessing dataset instance to perform data operations
Table table = datasetFramework.getDataset(MY_TABLE, DatasetDefinition.NO_ARGUMENTS, null);
Assert.assertNotNull(table);
TransactionAware txTable = (TransactionAware) table;
Transaction tx1 = transactionManager.startShort(100);
txTable.startTx(tx1);
Put put = new Put(Bytes.toBytes("row1"));
put.add("bool_field", false);
put.add("int_field", Integer.MAX_VALUE);
put.add("long_field", Long.MAX_VALUE);
put.add("float_field", 3.14f);
put.add("double_field", 3.14);
put.add("bytes_field", new byte[]{1, 2, 3});
table.put(put);
Assert.assertTrue(txTable.commitTx());
transactionManager.canCommit(tx1, txTable.getTxChanges());
transactionManager.commit(tx1);
txTable.postTxCommit();
Transaction tx2 = transactionManager.startShort(100);
txTable.startTx(tx2);
}
@AfterClass
public static void stop() throws Exception {
datasetFramework.deleteInstance(MY_TABLE);
}
@Test
public void testNoOpOnMissingSchema() throws Exception {
Id.DatasetInstance datasetId = Id.DatasetInstance.from(NAMESPACE_ID, "noschema");
datasetFramework.addInstance(Table.class.getName(), datasetId, DatasetProperties.EMPTY);
try {
DatasetSpecification spec = datasetFramework.getDatasetSpec(datasetId);
Assert.assertEquals(QueryHandle.NO_OP, exploreTableManager.enableDataset(datasetId, spec));
} finally {
datasetFramework.deleteInstance(datasetId);
}
}
@Test
public void testSchema() throws Exception {
runCommand(NAMESPACE_ID, "describe " + MY_TABLE_NAME,
true,
Lists.newArrayList(
new ColumnDesc("col_name", "STRING", 1, "from deserializer"),
new ColumnDesc("data_type", "STRING", 2, "from deserializer"),
new ColumnDesc("comment", "STRING", 3, "from deserializer")
),
Lists.newArrayList(
new QueryResult(Lists.<Object>newArrayList("bool_field", "boolean", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("int_field", "int", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("long_field", "bigint", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("float_field", "float", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("double_field", "double", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("bytes_field", "binary", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("string_field", "string", "from deserializer"))
)
);
}
@Test
public void testSelectStar() throws Exception {
List<ColumnDesc> expectedSchema = Lists.newArrayList(
new ColumnDesc(MY_TABLE_NAME + ".bool_field", "BOOLEAN", 1, null),
new ColumnDesc(MY_TABLE_NAME + ".int_field", "INT", 2, null),
new ColumnDesc(MY_TABLE_NAME + ".long_field", "BIGINT", 3, null),
new ColumnDesc(MY_TABLE_NAME + ".float_field", "FLOAT", 4, null),
new ColumnDesc(MY_TABLE_NAME + ".double_field", "DOUBLE", 5, null),
new ColumnDesc(MY_TABLE_NAME + ".bytes_field", "BINARY", 6, null),
new ColumnDesc(MY_TABLE_NAME + ".string_field", "STRING", 7, null)
);
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + MY_TABLE_NAME).get();
// check schema
Assert.assertEquals(expectedSchema, results.getResultSchema());
List<Object> columns = results.next().getColumns();
// check record1
Assert.assertFalse((Boolean) columns.get(0));
Assert.assertEquals(Integer.MAX_VALUE, columns.get(1));
Assert.assertEquals(Long.MAX_VALUE, columns.get(2));
// why does this come back as a double when it's a float???
Assert.assertTrue(Math.abs(3.14f - (Double) columns.get(3)) < 0.000001);
Assert.assertTrue(Math.abs(3.14 - (Double) columns.get(4)) < 0.000001);
Assert.assertArrayEquals(new byte[]{1, 2, 3}, (byte[]) columns.get(5));
Assert.assertEquals("row1", columns.get(6));
// should not be any more
Assert.assertFalse(results.hasNext());
}
@Test
public void testSelect() throws Exception {
String command = String.format("select int_field, double_field from %s where string_field='row1'", MY_TABLE_NAME);
runCommand(NAMESPACE_ID, command,
true,
Lists.newArrayList(new ColumnDesc("int_field", "INT", 1, null),
new ColumnDesc("double_field", "DOUBLE", 2, null)),
Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(Integer.MAX_VALUE, 3.14)))
);
}
@Test
public void testInsert() throws Exception {
Id.DatasetInstance otherTable = Id.DatasetInstance.from(NAMESPACE_ID, "othertable");
Schema schema = Schema.recordOf(
"record",
Schema.Field.of("value", Schema.of(Schema.Type.INT)),
Schema.Field.of("id", Schema.of(Schema.Type.STRING))
);
datasetFramework.addInstance(Table.class.getName(), otherTable, DatasetProperties.builder()
.add(Table.PROPERTY_SCHEMA, schema.toString())
.add(Table.PROPERTY_SCHEMA_ROW_FIELD, "id")
.build());
try {
String command = String.format("insert into %s select int_field, string_field from %s",
getDatasetHiveName(otherTable), MY_TABLE_NAME);
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
command = String.format("select id, value from %s", getDatasetHiveName(otherTable));
runCommand(NAMESPACE_ID, command,
true,
Lists.newArrayList(new ColumnDesc("id", "STRING", 1, null),
new ColumnDesc("value", "INT", 2, null)),
Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("row1", Integer.MAX_VALUE)))
);
} finally {
datasetFramework.deleteInstance(otherTable);
}
}
@Test
public void testInsertFromJoin() throws Exception {
Id.DatasetInstance userTableID = Id.DatasetInstance.from(NAMESPACE_ID, "users");
Id.DatasetInstance purchaseTableID = Id.DatasetInstance.from(NAMESPACE_ID, "purchases");
Id.DatasetInstance expandedTableID = Id.DatasetInstance.from(NAMESPACE_ID, "expanded");
Schema userSchema = Schema.recordOf(
"user",
Schema.Field.of("id", Schema.of(Schema.Type.STRING)),
Schema.Field.of("name", Schema.of(Schema.Type.STRING)),
Schema.Field.of("email", Schema.of(Schema.Type.STRING))
);
Schema purchaseSchema = Schema.recordOf(
"purchase",
Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)),
Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)),
Schema.Field.of("userid", Schema.of(Schema.Type.STRING)),
Schema.Field.of("ct", Schema.of(Schema.Type.INT)),
Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE))
);
Schema expandedSchema = Schema.recordOf(
"expandedPurchase",
Schema.Field.of("purchaseid", Schema.of(Schema.Type.LONG)),
Schema.Field.of("itemid", Schema.of(Schema.Type.STRING)),
Schema.Field.of("userid", Schema.of(Schema.Type.STRING)),
Schema.Field.of("ct", Schema.of(Schema.Type.INT)),
Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("username", Schema.of(Schema.Type.STRING)),
Schema.Field.of("email", Schema.of(Schema.Type.STRING))
);
datasetFramework.addInstance(Table.class.getName(), userTableID, DatasetProperties.builder()
.add(Table.PROPERTY_SCHEMA, userSchema.toString())
.add(Table.PROPERTY_SCHEMA_ROW_FIELD, "id")
.build());
datasetFramework.addInstance(Table.class.getName(), purchaseTableID, DatasetProperties.builder()
.add(Table.PROPERTY_SCHEMA, purchaseSchema.toString())
.add(Table.PROPERTY_SCHEMA_ROW_FIELD, "purchaseid")
.build());
datasetFramework.addInstance(Table.class.getName(), expandedTableID, DatasetProperties.builder()
.add(Table.PROPERTY_SCHEMA, expandedSchema.toString())
.add(Table.PROPERTY_SCHEMA_ROW_FIELD, "purchaseid")
.build());
Table userTable = datasetFramework.getDataset(userTableID, DatasetDefinition.NO_ARGUMENTS, null);
Table purchaseTable = datasetFramework.getDataset(purchaseTableID, DatasetDefinition.NO_ARGUMENTS, null);
TransactionAware txUserTable = (TransactionAware) userTable;
TransactionAware txPurchaseTable = (TransactionAware) purchaseTable;
Transaction tx1 = transactionManager.startShort(100);
txUserTable.startTx(tx1);
txPurchaseTable.startTx(tx1);
Put put = new Put(Bytes.toBytes("samuel"));
put.add("name", "Samuel Jackson");
put.add("email", "sjackson@gmail.com");
userTable.put(put);
put = new Put(Bytes.toBytes(1L));
put.add("userid", "samuel");
put.add("itemid", "scotch");
put.add("ct", 1);
put.add("price", 56.99d);
purchaseTable.put(put);
txUserTable.commitTx();
txPurchaseTable.commitTx();
List<byte[]> changes = new ArrayList<>();
changes.addAll(txUserTable.getTxChanges());
changes.addAll(txPurchaseTable.getTxChanges());
transactionManager.canCommit(tx1, changes);
transactionManager.commit(tx1);
txUserTable.postTxCommit();
txPurchaseTable.postTxCommit();
try {
String command = String.format(
"insert into table %s select P.purchaseid, P.itemid, P.userid, P.ct, P.price, U.name, U.email from " +
"%s P join %s U on (P.userid = U.id)",
getDatasetHiveName(expandedTableID), getDatasetHiveName(purchaseTableID), getDatasetHiveName(userTableID));
ExploreExecutionResult result = exploreClient.submit(NAMESPACE_ID, command).get();
Assert.assertEquals(QueryStatus.OpStatus.FINISHED, result.getStatus().getStatus());
command = String.format("select purchaseid, itemid, userid, ct, price, username, email from %s",
getDatasetHiveName(expandedTableID));
runCommand(NAMESPACE_ID, command,
true,
Lists.newArrayList(new ColumnDesc("purchaseid", "BIGINT", 1, null),
new ColumnDesc("itemid", "STRING", 2, null),
new ColumnDesc("userid", "STRING", 3, null),
new ColumnDesc("ct", "INT", 4, null),
new ColumnDesc("price", "DOUBLE", 5, null),
new ColumnDesc("username", "STRING", 6, null),
new ColumnDesc("email", "STRING", 7, null)),
Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList(
1L, "scotch", "samuel", 1, 56.99d, "Samuel Jackson", "sjackson@gmail.com")))
);
} finally {
datasetFramework.deleteInstance(userTableID);
datasetFramework.deleteInstance(purchaseTableID);
datasetFramework.deleteInstance(expandedTableID);
}
}
}