/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.druid;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidSerDe;
import org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidWritable;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.junit.Test;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import io.druid.data.input.Row;
import io.druid.jackson.DefaultObjectMapper;
import io.druid.query.Query;
import io.druid.query.Result;
import io.druid.query.groupby.GroupByQuery;
import io.druid.query.select.SelectQuery;
import io.druid.query.select.SelectResultValue;
import io.druid.query.timeseries.TimeseriesQuery;
import io.druid.query.timeseries.TimeseriesResultValue;
import io.druid.query.topn.TopNQuery;
import io.druid.query.topn.TopNResultValue;
/**
* Basic tests for Druid SerDe. The examples are taken from Druid 0.9.1.1
* documentation.
*/
public class TestDruidSerDe {
// Timeseries query
private static final String TIMESERIES_QUERY =
"{ \"queryType\": \"timeseries\", "
+ " \"dataSource\": \"sample_datasource\", "
+ " \"granularity\": \"day\", "
+ " \"descending\": \"true\", "
+ " \"filter\": { "
+ " \"type\": \"and\", "
+ " \"fields\": [ "
+ " { \"type\": \"selector\", \"dimension\": \"sample_dimension1\", \"value\": \"sample_value1\" }, "
+ " { \"type\": \"or\", "
+ " \"fields\": [ "
+ " { \"type\": \"selector\", \"dimension\": \"sample_dimension2\", \"value\": \"sample_value2\" }, "
+ " { \"type\": \"selector\", \"dimension\": \"sample_dimension3\", \"value\": \"sample_value3\" } "
+ " ] "
+ " } "
+ " ] "
+ " }, "
+ " \"aggregations\": [ "
+ " { \"type\": \"longSum\", \"name\": \"sample_name1\", \"fieldName\": \"sample_fieldName1\" }, "
+ " { \"type\": \"doubleSum\", \"name\": \"sample_name2\", \"fieldName\": \"sample_fieldName2\" } "
+ " ], "
+ " \"postAggregations\": [ "
+ " { \"type\": \"arithmetic\", "
+ " \"name\": \"sample_divide\", "
+ " \"fn\": \"/\", "
+ " \"fields\": [ "
+ " { \"type\": \"fieldAccess\", \"name\": \"postAgg__sample_name1\", \"fieldName\": \"sample_name1\" }, "
+ " { \"type\": \"fieldAccess\", \"name\": \"postAgg__sample_name2\", \"fieldName\": \"sample_name2\" } "
+ " ] "
+ " } "
+ " ], "
+ " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]}";
// Timeseries query results
private static final String TIMESERIES_QUERY_RESULTS =
"[ "
+ "{ "
+ " \"timestamp\": \"2012-01-01T00:00:00.000Z\", "
+ " \"result\": { \"sample_name1\": 0, \"sample_name2\": 1.0, \"sample_divide\": 2.2222 } "
+ "}, "
+ "{ "
+ " \"timestamp\": \"2012-01-02T00:00:00.000Z\", "
+ " \"result\": { \"sample_name1\": 2, \"sample_name2\": 3.32, \"sample_divide\": 4 } "
+ "}]";
// Timeseries query results as records
private static final Object[][] TIMESERIES_QUERY_RESULTS_RECORDS = new Object[][] {
new Object[] { new TimestampWritable(new Timestamp(1325376000000L)), new LongWritable(0),
new FloatWritable(1.0F), new FloatWritable(2.2222F) },
new Object[] { new TimestampWritable(new Timestamp(1325462400000L)), new LongWritable(2),
new FloatWritable(3.32F), new FloatWritable(4F) }
};
// TopN query
private static final String TOPN_QUERY =
"{ \"queryType\": \"topN\", "
+ " \"dataSource\": \"sample_data\", "
+ " \"dimension\": \"sample_dim\", "
+ " \"threshold\": 5, "
+ " \"metric\": \"count\", "
+ " \"granularity\": \"all\", "
+ " \"filter\": { "
+ " \"type\": \"and\", "
+ " \"fields\": [ "
+ " { "
+ " \"type\": \"selector\", "
+ " \"dimension\": \"dim1\", "
+ " \"value\": \"some_value\" "
+ " }, "
+ " { "
+ " \"type\": \"selector\", "
+ " \"dimension\": \"dim2\", "
+ " \"value\": \"some_other_val\" "
+ " } "
+ " ] "
+ " }, "
+ " \"aggregations\": [ "
+ " { "
+ " \"type\": \"longSum\", "
+ " \"name\": \"count\", "
+ " \"fieldName\": \"count\" "
+ " }, "
+ " { "
+ " \"type\": \"doubleSum\", "
+ " \"name\": \"some_metric\", "
+ " \"fieldName\": \"some_metric\" "
+ " } "
+ " ], "
+ " \"postAggregations\": [ "
+ " { "
+ " \"type\": \"arithmetic\", "
+ " \"name\": \"sample_divide\", "
+ " \"fn\": \"/\", "
+ " \"fields\": [ "
+ " { "
+ " \"type\": \"fieldAccess\", "
+ " \"name\": \"some_metric\", "
+ " \"fieldName\": \"some_metric\" "
+ " }, "
+ " { "
+ " \"type\": \"fieldAccess\", "
+ " \"name\": \"count\", "
+ " \"fieldName\": \"count\" "
+ " } "
+ " ] "
+ " } "
+ " ], "
+ " \"intervals\": [ "
+ " \"2013-08-31T00:00:00.000/2013-09-03T00:00:00.000\" "
+ " ]}";
// TopN query results
private static final String TOPN_QUERY_RESULTS =
"[ "
+ " { "
+ " \"timestamp\": \"2013-08-31T00:00:00.000Z\", "
+ " \"result\": [ "
+ " { "
+ " \"sample_dim\": \"dim1_val\", "
+ " \"count\": 111, "
+ " \"some_metric\": 10669, "
+ " \"sample_divide\": 96.11711711711712 "
+ " }, "
+ " { "
+ " \"sample_dim\": \"another_dim1_val\", "
+ " \"count\": 88, "
+ " \"some_metric\": 28344, "
+ " \"sample_divide\": 322.09090909090907 "
+ " }, "
+ " { "
+ " \"sample_dim\": \"dim1_val3\", "
+ " \"count\": 70, "
+ " \"some_metric\": 871, "
+ " \"sample_divide\": 12.442857142857143 "
+ " }, "
+ " { "
+ " \"sample_dim\": \"dim1_val4\", "
+ " \"count\": 62, "
+ " \"some_metric\": 815, "
+ " \"sample_divide\": 13.14516129032258 "
+ " }, "
+ " { "
+ " \"sample_dim\": \"dim1_val5\", "
+ " \"count\": 60, "
+ " \"some_metric\": 2787, "
+ " \"sample_divide\": 46.45 "
+ " } "
+ " ] "
+ " }]";
// TopN query results as records
private static final Object[][] TOPN_QUERY_RESULTS_RECORDS = new Object[][] {
new Object[] { new TimestampWritable(new Timestamp(1377907200000L)), new Text("dim1_val"),
new LongWritable(111), new FloatWritable(10669F),
new FloatWritable(96.11711711711712F) },
new Object[] { new TimestampWritable(new Timestamp(1377907200000L)),
new Text("another_dim1_val"), new LongWritable(88), new FloatWritable(28344F),
new FloatWritable(322.09090909090907F) },
new Object[] { new TimestampWritable(new Timestamp(1377907200000L)),
new Text("dim1_val3"), new LongWritable(70), new FloatWritable(871F),
new FloatWritable(12.442857142857143F) },
new Object[] { new TimestampWritable(new Timestamp(1377907200000L)),
new Text("dim1_val4"), new LongWritable(62), new FloatWritable(815F),
new FloatWritable(13.14516129032258F) },
new Object[] { new TimestampWritable(new Timestamp(1377907200000L)),
new Text("dim1_val5"), new LongWritable(60), new FloatWritable(2787F),
new FloatWritable(46.45F) }
};
// GroupBy query
private static final String GROUP_BY_QUERY =
"{ "
+ " \"queryType\": \"groupBy\", "
+ " \"dataSource\": \"sample_datasource\", "
+ " \"granularity\": \"day\", "
+ " \"dimensions\": [\"country\", \"device\"], "
+ " \"limitSpec\": {"
+ " \"type\": \"default\","
+ " \"limit\": 5000,"
+ " \"columns\": [\"country\", \"data_transfer\"] }, "
+ " \"filter\": { "
+ " \"type\": \"and\", "
+ " \"fields\": [ "
+ " { \"type\": \"selector\", \"dimension\": \"carrier\", \"value\": \"AT&T\" }, "
+ " { \"type\": \"or\", "
+ " \"fields\": [ "
+ " { \"type\": \"selector\", \"dimension\": \"make\", \"value\": \"Apple\" }, "
+ " { \"type\": \"selector\", \"dimension\": \"make\", \"value\": \"Samsung\" } "
+ " ] "
+ " } "
+ " ] "
+ " }, "
+ " \"aggregations\": [ "
+ " { \"type\": \"longSum\", \"name\": \"total_usage\", \"fieldName\": \"user_count\" }, "
+ " { \"type\": \"doubleSum\", \"name\": \"data_transfer\", \"fieldName\": \"data_transfer\" } "
+ " ], "
+ " \"postAggregations\": [ "
+ " { \"type\": \"arithmetic\", "
+ " \"name\": \"avg_usage\", "
+ " \"fn\": \"/\", "
+ " \"fields\": [ "
+ " { \"type\": \"fieldAccess\", \"fieldName\": \"data_transfer\" }, "
+ " { \"type\": \"fieldAccess\", \"fieldName\": \"total_usage\" } "
+ " ] "
+ " } "
+ " ], "
+ " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ], "
+ " \"having\": { "
+ " \"type\": \"greaterThan\", "
+ " \"aggregation\": \"total_usage\", "
+ " \"value\": 100 "
+ " }}";
// GroupBy query results
private static final String GROUP_BY_QUERY_RESULTS =
"[ "
+ " { "
+ " \"version\" : \"v1\", "
+ " \"timestamp\" : \"2012-01-01T00:00:00.000Z\", "
+ " \"event\" : { "
+ " \"country\" : \"India\", "
+ " \"device\" : \"phone\", "
+ " \"total_usage\" : 88, "
+ " \"data_transfer\" : 29.91233453, "
+ " \"avg_usage\" : 60.32 "
+ " } "
+ " }, "
+ " { "
+ " \"version\" : \"v1\", "
+ " \"timestamp\" : \"2012-01-01T00:00:12.000Z\", "
+ " \"event\" : { "
+ " \"country\" : \"Spain\", "
+ " \"device\" : \"pc\", "
+ " \"total_usage\" : 16, "
+ " \"data_transfer\" : 172.93494959, "
+ " \"avg_usage\" : 6.333333 "
+ " } "
+ " }]";
// GroupBy query results as records
private static final Object[][] GROUP_BY_QUERY_RESULTS_RECORDS = new Object[][] {
new Object[] { new TimestampWritable(new Timestamp(1325376000000L)), new Text("India"),
new Text("phone"), new LongWritable(88), new FloatWritable(29.91233453F),
new FloatWritable(60.32F) },
new Object[] { new TimestampWritable(new Timestamp(1325376012000L)), new Text("Spain"),
new Text("pc"), new LongWritable(16), new FloatWritable(172.93494959F),
new FloatWritable(6.333333F) }
};
// Select query
private static final String SELECT_QUERY =
"{ \"queryType\": \"select\", "
+ " \"dataSource\": \"wikipedia\", \"descending\": \"false\", "
+ " \"dimensions\":[\"robot\",\"namespace\",\"anonymous\",\"unpatrolled\",\"page\",\"language\",\"newpage\",\"user\"], "
+ " \"metrics\":[\"count\",\"added\",\"delta\",\"variation\",\"deleted\"], "
+ " \"granularity\": \"all\", "
+ " \"intervals\": [ \"2013-01-01/2013-01-02\" ], "
+ " \"pagingSpec\":{\"pagingIdentifiers\": {}, \"threshold\":5} }";
// Select query results
private static final String SELECT_QUERY_RESULTS =
"[{ "
+ " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", "
+ " \"result\" : { "
+ " \"pagingIdentifiers\" : { "
+ " \"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\" : 4 }, "
+ " \"events\" : [ { "
+ " \"segmentId\" : \"wikipedia_editstream_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\", "
+ " \"offset\" : 0, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", "
+ " \"robot\" : \"1\", "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
+ " \"page\" : \"11._korpus_(NOVJ)\", "
+ " \"language\" : \"sl\", "
+ " \"newpage\" : \"0\", "
+ " \"user\" : \"EmausBot\", "
+ " \"count\" : 1.0, "
+ " \"added\" : 39.0, "
+ " \"delta\" : 39.0, "
+ " \"variation\" : 39.0, "
+ " \"deleted\" : 0.0 "
+ " } "
+ " }, { "
+ " \"segmentId\" : \"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\", "
+ " \"offset\" : 1, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:00.000Z\", "
+ " \"robot\" : \"0\", "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
+ " \"page\" : \"112_U.S._580\", "
+ " \"language\" : \"en\", "
+ " \"newpage\" : \"1\", "
+ " \"user\" : \"MZMcBride\", "
+ " \"count\" : 1.0, "
+ " \"added\" : 70.0, "
+ " \"delta\" : 70.0, "
+ " \"variation\" : 70.0, "
+ " \"deleted\" : 0.0 "
+ " } "
+ " }, { "
+ " \"segmentId\" : \"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\", "
+ " \"offset\" : 2, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", "
+ " \"robot\" : \"0\", "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
+ " \"page\" : \"113_U.S._243\", "
+ " \"language\" : \"en\", "
+ " \"newpage\" : \"1\", "
+ " \"user\" : \"MZMcBride\", "
+ " \"count\" : 1.0, "
+ " \"added\" : 77.0, "
+ " \"delta\" : 77.0, "
+ " \"variation\" : 77.0, "
+ " \"deleted\" : 0.0 "
+ " } "
+ " }, { "
+ " \"segmentId\" : \"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\", "
+ " \"offset\" : 3, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", "
+ " \"robot\" : \"0\", "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
+ " \"page\" : \"113_U.S._73\", "
+ " \"language\" : \"en\", "
+ " \"newpage\" : \"1\", "
+ " \"user\" : \"MZMcBride\", "
+ " \"count\" : 1.0, "
+ " \"added\" : 70.0, "
+ " \"delta\" : 70.0, "
+ " \"variation\" : 70.0, "
+ " \"deleted\" : 0.0 "
+ " } "
+ " }, { "
+ " \"segmentId\" : \"wikipedia_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\", "
+ " \"offset\" : 4, "
+ " \"event\" : { "
+ " \"timestamp\" : \"2013-01-01T00:00:12.000Z\", "
+ " \"robot\" : \"0\", "
+ " \"namespace\" : \"article\", "
+ " \"anonymous\" : \"0\", "
+ " \"unpatrolled\" : \"0\", "
+ " \"page\" : \"113_U.S._756\", "
+ " \"language\" : \"en\", "
+ " \"newpage\" : \"1\", "
+ " \"user\" : \"MZMcBride\", "
+ " \"count\" : 1.0, "
+ " \"added\" : 68.0, "
+ " \"delta\" : 68.0, "
+ " \"variation\" : 68.0, "
+ " \"deleted\" : 0.0 "
+ " } "
+ " } ] }} ]";
// Select query results as records
private static final Object[][] SELECT_QUERY_RESULTS_RECORDS = new Object[][] {
new Object[] { new TimestampWritable(new Timestamp(1356998400000L)), new Text("1"),
new Text("article"), new Text("0"), new Text("0"),
new Text("11._korpus_(NOVJ)"), new Text("sl"), new Text("0"),
new Text("EmausBot"),
new FloatWritable(1.0F), new FloatWritable(39.0F), new FloatWritable(39.0F),
new FloatWritable(39.0F), new FloatWritable(0.0F) },
new Object[] { new TimestampWritable(new Timestamp(1356998400000L)), new Text("0"),
new Text("article"), new Text("0"), new Text("0"),
new Text("112_U.S._580"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new FloatWritable(1.0F), new FloatWritable(70.0F), new FloatWritable(70.0F),
new FloatWritable(70.0F), new FloatWritable(0.0F) },
new Object[] { new TimestampWritable(new Timestamp(1356998412000L)), new Text("0"),
new Text("article"), new Text("0"), new Text("0"),
new Text("113_U.S._243"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new FloatWritable(1.0F), new FloatWritable(77.0F), new FloatWritable(77.0F),
new FloatWritable(77.0F), new FloatWritable(0.0F) },
new Object[] { new TimestampWritable(new Timestamp(1356998412000L)), new Text("0"),
new Text("article"), new Text("0"), new Text("0"),
new Text("113_U.S._73"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new FloatWritable(1.0F), new FloatWritable(70.0F), new FloatWritable(70.0F),
new FloatWritable(70.0F), new FloatWritable(0.0F) },
new Object[] { new TimestampWritable(new Timestamp(1356998412000L)), new Text("0"),
new Text("article"), new Text("0"), new Text("0"),
new Text("113_U.S._756"), new Text("en"), new Text("1"), new Text("MZMcBride"),
new FloatWritable(1.0F), new FloatWritable(68.0F), new FloatWritable(68.0F),
new FloatWritable(68.0F), new FloatWritable(0.0F) }
};
/**
* Test the default behavior of the objects and object inspectors.
* @throws IOException
* @throws IllegalAccessException
* @throws IllegalArgumentException
* @throws SecurityException
* @throws NoSuchFieldException
* @throws JsonMappingException
* @throws JsonParseException
* @throws InvocationTargetException
* @throws NoSuchMethodException
*/
@Test
public void testDruidDeserializer()
throws SerDeException, JsonParseException, JsonMappingException,
NoSuchFieldException, SecurityException, IllegalArgumentException,
IllegalAccessException, IOException, InterruptedException,
NoSuchMethodException, InvocationTargetException {
// Create, initialize, and test the SerDe
QTestDruidSerDe serDe = new QTestDruidSerDe();
Configuration conf = new Configuration();
Properties tbl;
// Timeseries query
tbl = createPropertiesQuery("sample_datasource", Query.TIMESERIES, TIMESERIES_QUERY);
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeQueryResults(serDe, Query.TIMESERIES, TIMESERIES_QUERY,
TIMESERIES_QUERY_RESULTS, TIMESERIES_QUERY_RESULTS_RECORDS
);
// TopN query
tbl = createPropertiesQuery("sample_data", Query.TOPN, TOPN_QUERY);
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeQueryResults(serDe, Query.TOPN, TOPN_QUERY,
TOPN_QUERY_RESULTS, TOPN_QUERY_RESULTS_RECORDS
);
// GroupBy query
tbl = createPropertiesQuery("sample_datasource", Query.GROUP_BY, GROUP_BY_QUERY);
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeQueryResults(serDe, Query.GROUP_BY, GROUP_BY_QUERY,
GROUP_BY_QUERY_RESULTS, GROUP_BY_QUERY_RESULTS_RECORDS
);
// Select query
tbl = createPropertiesQuery("wikipedia", Query.SELECT, SELECT_QUERY);
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
deserializeQueryResults(serDe, Query.SELECT, SELECT_QUERY,
SELECT_QUERY_RESULTS, SELECT_QUERY_RESULTS_RECORDS
);
}
private static Properties createPropertiesQuery(String dataSource, String queryType,
String jsonQuery
) {
Properties tbl = new Properties();
// Set the configuration parameters
tbl.setProperty(Constants.DRUID_DATA_SOURCE, dataSource);
tbl.setProperty(Constants.DRUID_QUERY_JSON, jsonQuery);
tbl.setProperty(Constants.DRUID_QUERY_TYPE, queryType);
return tbl;
}
private static void deserializeQueryResults(DruidSerDe serDe, String queryType, String jsonQuery,
String resultString, Object[][] records
) throws SerDeException, JsonParseException,
JsonMappingException, IOException, NoSuchFieldException, SecurityException,
IllegalArgumentException, IllegalAccessException, InterruptedException,
NoSuchMethodException, InvocationTargetException {
// Initialize
Query<?> query = null;
DruidQueryRecordReader<?, ?> reader = null;
List<?> resultsList = null;
ObjectMapper mapper = new DefaultObjectMapper();
switch (queryType) {
case Query.TIMESERIES:
query = mapper.readValue(jsonQuery, TimeseriesQuery.class);
reader = new DruidTimeseriesQueryRecordReader();
resultsList = mapper.readValue(resultString,
new TypeReference<List<Result<TimeseriesResultValue>>>() {
}
);
break;
case Query.TOPN:
query = mapper.readValue(jsonQuery, TopNQuery.class);
reader = new DruidTopNQueryRecordReader();
resultsList = mapper.readValue(resultString,
new TypeReference<List<Result<TopNResultValue>>>() {
}
);
break;
case Query.GROUP_BY:
query = mapper.readValue(jsonQuery, GroupByQuery.class);
reader = new DruidGroupByQueryRecordReader();
resultsList = mapper.readValue(resultString,
new TypeReference<List<Row>>() {
}
);
break;
case Query.SELECT:
query = mapper.readValue(jsonQuery, SelectQuery.class);
reader = new DruidSelectQueryRecordReader();
resultsList = mapper.readValue(resultString,
new TypeReference<List<Result<SelectResultValue>>>() {
}
);
break;
}
// Set query and fields access
Field field1 = DruidQueryRecordReader.class.getDeclaredField("query");
field1.setAccessible(true);
field1.set(reader, query);
if (reader instanceof DruidGroupByQueryRecordReader) {
Method method1 = DruidGroupByQueryRecordReader.class.getDeclaredMethod("initExtractors");
method1.setAccessible(true);
method1.invoke(reader);
}
Field field2 = DruidQueryRecordReader.class.getDeclaredField("results");
field2.setAccessible(true);
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
// Check mapred
Iterator<?> results = resultsList.iterator();
field2.set(reader, results);
DruidWritable writable = new DruidWritable();
int pos = 0;
while (reader.next(NullWritable.get(), writable)) {
Object row = serDe.deserialize(writable);
Object[] expectedFieldsData = records[pos];
assertEquals(expectedFieldsData.length, fieldRefs.size());
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
pos++;
}
assertEquals(pos, records.length);
// Check mapreduce
results = resultsList.iterator();
field2.set(reader, results);
pos = 0;
while (reader.nextKeyValue()) {
Object row = serDe.deserialize(reader.getCurrentValue());
Object[] expectedFieldsData = records[pos];
assertEquals(expectedFieldsData.length, fieldRefs.size());
for (int i = 0; i < fieldRefs.size(); i++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
assertEquals("Field " + i, expectedFieldsData[i], fieldData);
}
pos++;
}
assertEquals(pos, records.length);
}
private static final String COLUMN_NAMES = "__time,c0,c1,c2,c3,c4,c5,c6,c7";
private static final String COLUMN_TYPES = "timestamp,string,double,float,decimal(38,18),bigint,int,smallint,tinyint";
private static final Object[] ROW_OBJECT = new Object[] {
new TimestampWritable(new Timestamp(1377907200000L)),
new Text("dim1_val"),
new DoubleWritable(10669.3D),
new FloatWritable(10669.45F),
new HiveDecimalWritable(HiveDecimal.create(1064.34D)),
new LongWritable(1113939),
new IntWritable(1112123),
new ShortWritable((short) 12),
new ByteWritable((byte) 0),
new TimestampWritable(new Timestamp(1377907200000L)) // granularity
};
private static final DruidWritable DRUID_WRITABLE = new DruidWritable(
ImmutableMap.<String, Object>builder()
.put("__time", 1377907200000L)
.put("c0", "dim1_val")
.put("c1", 10669.3D)
.put("c2", 10669.45F)
.put("c3", 1064.34D)
.put("c4", 1113939L)
.put("c5", 1112123)
.put("c6", (short) 12)
.put("c7", (byte) 0)
.put("__time_granularity", 1377907200000L)
.build());
/**
* Test the default behavior of the objects and object inspectors.
* @throws IOException
* @throws IllegalAccessException
* @throws IllegalArgumentException
* @throws SecurityException
* @throws NoSuchFieldException
* @throws JsonMappingException
* @throws JsonParseException
* @throws InvocationTargetException
* @throws NoSuchMethodException
*/
@Test
public void testDruidSerializer()
throws SerDeException, JsonParseException, JsonMappingException,
NoSuchFieldException, SecurityException, IllegalArgumentException,
IllegalAccessException, IOException, InterruptedException,
NoSuchMethodException, InvocationTargetException {
// Create, initialize, and test the SerDe
DruidSerDe serDe = new DruidSerDe();
Configuration conf = new Configuration();
Properties tbl;
// Mixed source (all types)
tbl = createPropertiesSource(COLUMN_NAMES, COLUMN_TYPES);
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
serializeObject(tbl, serDe, ROW_OBJECT, DRUID_WRITABLE);
}
private static Properties createPropertiesSource(String columnNames, String columnTypes) {
Properties tbl = new Properties();
// Set the configuration parameters
tbl.setProperty(serdeConstants.LIST_COLUMNS, columnNames);
tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnTypes);
return tbl;
}
private static void serializeObject(Properties properties, DruidSerDe serDe,
Object[] rowObject, DruidWritable druidWritable) throws SerDeException {
// Build OI with timestamp granularity column
final List<String> columnNames = new ArrayList<>();
final List<PrimitiveTypeInfo> columnTypes = new ArrayList<>();
List<ObjectInspector> inspectors = new ArrayList<>();
columnNames.addAll(Utilities.getColumnNames(properties));
columnNames.add(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME);
columnTypes.addAll(Lists.transform(Utilities.getColumnTypes(properties),
new Function<String, PrimitiveTypeInfo>() {
@Override
public PrimitiveTypeInfo apply(String type) {
return TypeInfoFactory.getPrimitiveTypeInfo(type);
}
}
));
columnTypes.add(TypeInfoFactory.getPrimitiveTypeInfo("timestamp"));
inspectors.addAll(Lists.transform(columnTypes,
new Function<PrimitiveTypeInfo, ObjectInspector>() {
@Override
public ObjectInspector apply(PrimitiveTypeInfo type) {
return PrimitiveObjectInspectorFactory
.getPrimitiveWritableObjectInspector(type);
}
}
));
ObjectInspector inspector = ObjectInspectorFactory
.getStandardStructObjectInspector(columnNames, inspectors);
// Serialize
DruidWritable writable = (DruidWritable) serDe.serialize(rowObject, inspector);
// Check result
assertEquals(DRUID_WRITABLE.getValue().size(), writable.getValue().size());
for (Entry<String, Object> e: DRUID_WRITABLE.getValue().entrySet()) {
assertEquals(e.getValue(), writable.getValue().get(e.getKey()));
}
}
}