/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.explore.service;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.FormatSpecification;
import co.cask.cdap.api.data.format.Formats;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.explore.client.ExploreExecutionResult;
import co.cask.cdap.proto.ColumnDesc;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.QueryResult;
import co.cask.cdap.proto.StreamProperties;
import co.cask.cdap.test.SlowTests;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.EncoderFactory;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TemporaryFolder;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.reflect.Type;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
/**
*
*/
@Category(SlowTests.class)
public class HiveExploreServiceStreamTest extends BaseHiveExploreServiceTest {
@ClassRule
public static TemporaryFolder tmpFolder = new TemporaryFolder();
private static final Gson GSON = new Gson();
private static final String body1 = "userX,actionA,item123";
private static final String body2 = "userY,actionB,item123";
private static final String body3 = "userZ,actionA,item456";
private static final String streamName = "mystream";
private static final String streamTableName = getTableName(streamName);
// headers must be prefixed with the stream name, otherwise they are filtered out.
private static final Map<String, String> headers = ImmutableMap.of("header1", "val1", "header2", "val2");
private static final Type headerType = new TypeToken<Map<String, String>>() { }.getType();
@BeforeClass
public static void start() throws Exception {
// use leveldb implementations, since stream input format examines the filesystem
// to determine input splits.
initialize(CConfiguration.create(), tmpFolder, true);
Id.Stream streamId = Id.Stream.from(NAMESPACE_ID, streamName);
createStream(streamId);
sendStreamEvent(streamId, headers, Bytes.toBytes(body1));
sendStreamEvent(streamId, headers, Bytes.toBytes(body2));
sendStreamEvent(streamId, headers, Bytes.toBytes(body3));
}
@AfterClass
public static void finish() throws Exception {
dropStream(Id.Stream.from(NAMESPACE_ID, streamName));
}
@Test
public void testStreamDefaultSchema() throws Exception {
runCommand(NAMESPACE_ID, "describe " + streamTableName,
true,
Lists.newArrayList(
new ColumnDesc("col_name", "STRING", 1, "from deserializer"),
new ColumnDesc("data_type", "STRING", 2, "from deserializer"),
new ColumnDesc("comment", "STRING", 3, "from deserializer")
),
Lists.newArrayList(
new QueryResult(Lists.<Object>newArrayList("ts", "bigint", "from deserializer")),
new QueryResult(Lists.<Object>newArrayList("headers", "map<string,string>",
"from deserializer")),
new QueryResult(Lists.<Object>newArrayList("body", "string", "from deserializer"))
)
);
}
@Test
public void testSelectStarOnStream() throws Exception {
ExploreExecutionResult results = exploreClient.submit(NAMESPACE_ID, "select * from " + streamTableName).get();
// check schema
List<ColumnDesc> expectedSchema = Lists.newArrayList(
new ColumnDesc(streamTableName + ".ts", "BIGINT", 1, null),
new ColumnDesc(streamTableName + ".headers", "map<string,string>", 2, null),
new ColumnDesc(streamTableName + ".body", "STRING", 3, null)
);
Assert.assertEquals(expectedSchema, results.getResultSchema());
// check each result, without checking timestamp since that changes for each test
// first result
List<Object> columns = results.next().getColumns();
// maps are returned as json objects...
Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
Assert.assertEquals(body1, columns.get(2));
// second result
columns = results.next().getColumns();
Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
Assert.assertEquals(body2, columns.get(2));
// third result
columns = results.next().getColumns();
Assert.assertEquals(headers, GSON.fromJson((String) columns.get(1), headerType));
Assert.assertEquals(body3, columns.get(2));
// should not be any more
Assert.assertFalse(results.hasNext());
}
@Test
public void testSelectFieldOnStream() throws Exception {
runCommand(NAMESPACE_ID, "select body from " + streamTableName,
true,
Lists.newArrayList(new ColumnDesc("body", "STRING", 1, null)),
Lists.newArrayList(
new QueryResult(Lists.<Object>newArrayList(body1)),
new QueryResult(Lists.<Object>newArrayList(body2)),
new QueryResult(Lists.<Object>newArrayList(body3)))
);
runCommand(NAMESPACE_ID,
"select headers[\"header1\"] as h1, headers[\"header2\"] as h2 from " + streamTableName,
true,
Lists.newArrayList(new ColumnDesc("h1", "STRING", 1, null),
new ColumnDesc("h2", "STRING", 2, null)),
Lists.newArrayList(
new QueryResult(Lists.<Object>newArrayList("val1", "val2")),
new QueryResult(Lists.<Object>newArrayList("val1", "val2")),
new QueryResult(Lists.<Object>newArrayList("val1", "val2")))
);
}
@Test
public void testSelectAndFilterQueryOnStream() throws Exception {
runCommand(NAMESPACE_ID, "select body from " + streamTableName + " where ts > " + Long.MAX_VALUE,
false,
Lists.newArrayList(new ColumnDesc("body", "STRING", 1, null)),
Lists.<QueryResult>newArrayList());
}
@Test
public void testStreamNameWithHyphen() throws Exception {
Id.Stream streamId = Id.Stream.from(NAMESPACE_ID, "stream-test");
createStream(streamId);
try {
sendStreamEvent(streamId, Collections.<String, String>emptyMap(), Bytes.toBytes("Dummy"));
// Streams with '-' are replaced with '_'
String cleanStreamName = "stream_test";
runCommand(NAMESPACE_ID, "select body from " + getTableName(cleanStreamName), true,
Lists.newArrayList(new ColumnDesc("body", "STRING", 1, null)),
Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("Dummy"))));
} finally {
dropStream(streamId);
}
}
@Test
public void testJoinOnStreams() throws Exception {
Id.Stream streamId1 = Id.Stream.from(NAMESPACE_ID, "jointest1");
Id.Stream streamId2 = Id.Stream.from(NAMESPACE_ID, "jointest2");
createStream(streamId1);
try {
createStream(streamId2);
try {
sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
sendStreamEvent(streamId1, Collections.<String, String>emptyMap(), Bytes.toBytes("XYZ"));
sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("ABC"));
sendStreamEvent(streamId2, Collections.<String, String>emptyMap(), Bytes.toBytes("DEF"));
runCommand(NAMESPACE_ID,
"select " + getTableName(streamId1) + ".body, " + getTableName(streamId2) + ".body" +
" from " + getTableName(streamId1) + " join " + getTableName(streamId2) +
" on (" + getTableName(streamId1) + ".body = " + getTableName(streamId2) + ".body)",
true,
Lists.newArrayList(new ColumnDesc(getTableName(streamId1) + ".body", "STRING", 1, null),
new ColumnDesc(getTableName(streamId2) + ".body", "STRING", 2, null)),
Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("ABC", "ABC")))
);
} finally {
dropStream(streamId2);
}
} finally {
dropStream(streamId1);
}
}
@Test(expected = ExecutionException.class)
public void testWriteToStreamFails() throws Exception {
exploreClient.submit(NAMESPACE_ID,
"insert into table " + streamTableName + " select * from " + streamTableName).get();
}
@Test
public void testAvroFormattedStream() throws Exception {
Id.Stream streamId = Id.Stream.from(NAMESPACE_ID, "avroStream");
createStream(streamId);
try {
Schema schema = Schema.recordOf(
"purchase",
Schema.Field.of("user", Schema.of(Schema.Type.STRING)),
Schema.Field.of("num", Schema.of(Schema.Type.INT)),
Schema.Field.of("price", Schema.of(Schema.Type.DOUBLE))
);
FormatSpecification formatSpecification = new FormatSpecification(
Formats.AVRO, schema, Collections.<String, String>emptyMap());
StreamProperties properties = new StreamProperties(Long.MAX_VALUE, formatSpecification, 1000);
setStreamProperties(NAMESPACE_ID.getId(), "avroStream", properties);
// our schemas are compatible
org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(schema.toString());
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 5, 3.14));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userX", 10, 2.34));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userY", 1, 1.23));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 50, 45.67));
sendStreamEvent(streamId, createAvroEvent(avroSchema, "userZ", 100, 98.76));
Double xPrice = 5 * 3.14 + 10 * 2.34;
Double yPrice = 1.23;
Double zPrice = 50 * 45.67 + 100 * 98.76;
ExploreExecutionResult result = exploreClient.submit(
NAMESPACE_ID,
"SELECT user, sum(num) as total_num, sum(price * num) as total_price " +
"FROM " + getTableName(streamId) + " GROUP BY user ORDER BY total_price DESC").get();
Assert.assertTrue(result.hasNext());
Assert.assertEquals(
Lists.newArrayList(new ColumnDesc("user", "STRING", 1, null),
new ColumnDesc("total_num", "BIGINT", 2, null),
new ColumnDesc("total_price", "DOUBLE", 3, null)),
result.getResultSchema());
// should get 3 rows
// first row should be for userZ
List<Object> rowColumns = result.next().getColumns();
// toString b/c avro returns a utf8 object for strings
Assert.assertEquals("userZ", rowColumns.get(0).toString());
Assert.assertEquals(150L, rowColumns.get(1));
Assert.assertTrue(Math.abs(zPrice - (Double) rowColumns.get(2)) < 0.0000001);
// 2nd row, should be userX
rowColumns = result.next().getColumns();
Assert.assertEquals("userX", rowColumns.get(0).toString());
Assert.assertEquals(15L, rowColumns.get(1));
Assert.assertTrue(Math.abs(xPrice - (Double) rowColumns.get(2)) < 0.0000001);
// 3rd row, should be userY
rowColumns = result.next().getColumns();
Assert.assertEquals("userY", rowColumns.get(0).toString());
Assert.assertEquals(1L, rowColumns.get(1));
Assert.assertTrue(Math.abs(yPrice - (Double) rowColumns.get(2)) < 0.0000001);
// shouldn't be any more results
Assert.assertFalse(result.hasNext());
} finally {
dropStream(streamId);
}
}
private static String getTableName(Id.Stream streamId) {
return getTableName(streamId.getId());
}
private static String getTableName(String streamName) {
return "stream_" + streamName;
}
private byte[] createAvroEvent(org.apache.avro.Schema schema, Object... values) throws IOException {
GenericRecordBuilder builder = new GenericRecordBuilder(schema);
int i = 0;
for (org.apache.avro.Schema.Field field : schema.getFields()) {
builder.set(field.name(), values[i]);
i++;
}
GenericRecord record = builder.build();
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
writer.write(record, encoder);
encoder.flush();
out.close();
return out.toByteArray();
}
}