/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.druid;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat;
import org.apache.hadoop.hive.druid.io.HiveDruidSplit;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.joda.time.Interval;
import org.joda.time.chrono.ISOChronology;
import org.junit.Test;
import io.druid.query.Query;
import junit.framework.TestCase;
public class TestHiveDruidQueryBasedInputFormat extends TestCase {
@SuppressWarnings("unchecked")
@Test
public void testCreateSplitsIntervals() throws Exception {
DruidQueryBasedInputFormat input = new DruidQueryBasedInputFormat();
Method method1 = DruidQueryBasedInputFormat.class.getDeclaredMethod("createSplitsIntervals",
List.class, int.class
);
method1.setAccessible(true);
List<Interval> intervals;
List<List<Interval>> resultList;
List<List<Interval>> expectedResultList;
// Test 1 : single split, create 4
intervals = new ArrayList<>();
intervals.add(new Interval(1262304000000L, 1293840000000L, ISOChronology.getInstanceUTC()));
resultList = (List<List<Interval>>) method1.invoke(input, intervals, 4);
expectedResultList = new ArrayList<>();
expectedResultList.add(Arrays
.asList(new Interval(1262304000000L, 1270188000000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1270188000000L, 1278072000000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1278072000000L, 1285956000000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1285956000000L, 1293840000000L, ISOChronology.getInstanceUTC())));
assertEquals(expectedResultList, resultList);
// Test 2 : two splits, create 4
intervals = new ArrayList<>();
intervals.add(new Interval(1262304000000L, 1293840000000L, ISOChronology.getInstanceUTC()));
intervals.add(new Interval(1325376000000L, 1356998400000L, ISOChronology.getInstanceUTC()));
resultList = (List<List<Interval>>) method1.invoke(input, intervals, 4);
expectedResultList = new ArrayList<>();
expectedResultList.add(Arrays
.asList(new Interval(1262304000000L, 1278093600000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1278093600000L, 1293840000000L, ISOChronology.getInstanceUTC()),
new Interval(1325376000000L, 1325419200000L, ISOChronology.getInstanceUTC())
));
expectedResultList.add(Arrays
.asList(new Interval(1325419200000L, 1341208800000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1341208800000L, 1356998400000L, ISOChronology.getInstanceUTC())));
assertEquals(expectedResultList, resultList);
// Test 3 : two splits, create 5
intervals = new ArrayList<>();
intervals.add(new Interval(1262304000000L, 1293840000000L, ISOChronology.getInstanceUTC()));
intervals.add(new Interval(1325376000000L, 1356998400000L, ISOChronology.getInstanceUTC()));
resultList = (List<List<Interval>>) method1.invoke(input, intervals, 5);
expectedResultList = new ArrayList<>();
expectedResultList.add(Arrays
.asList(new Interval(1262304000000L, 1274935680000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1274935680000L, 1287567360000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1287567360000L, 1293840000000L, ISOChronology.getInstanceUTC()),
new Interval(1325376000000L, 1331735040000L, ISOChronology.getInstanceUTC())
));
expectedResultList.add(Arrays
.asList(new Interval(1331735040000L, 1344366720000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1344366720000L, 1356998400000L, ISOChronology.getInstanceUTC())));
assertEquals(expectedResultList, resultList);
// Test 4 : three splits, different ranges, create 6
intervals = new ArrayList<>();
intervals.add(new Interval(1199145600000L, 1201824000000L,
ISOChronology.getInstanceUTC()
)); // one month
intervals.add(new Interval(1325376000000L, 1356998400000L,
ISOChronology.getInstanceUTC()
)); // one year
intervals.add(new Interval(1407283200000L, 1407888000000L,
ISOChronology.getInstanceUTC()
)); // 7 days
resultList = (List<List<Interval>>) method1.invoke(input, intervals, 6);
expectedResultList = new ArrayList<>();
expectedResultList.add(Arrays
.asList(new Interval(1199145600000L, 1201824000000L, ISOChronology.getInstanceUTC()),
new Interval(1325376000000L, 1328515200000L, ISOChronology.getInstanceUTC())
));
expectedResultList.add(Arrays
.asList(new Interval(1328515200000L, 1334332800000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1334332800000L, 1340150400000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1340150400000L, 1345968000000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1345968000000L, 1351785600000L, ISOChronology.getInstanceUTC())));
expectedResultList.add(Arrays
.asList(new Interval(1351785600000L, 1356998400000L, ISOChronology.getInstanceUTC()),
new Interval(1407283200000L, 1407888000000L, ISOChronology.getInstanceUTC())
));
assertEquals(expectedResultList, resultList);
}
private static final String TIMESERIES_QUERY =
"{ \"queryType\": \"timeseries\", "
+ " \"dataSource\": \"sample_datasource\", "
+ " \"granularity\": \"day\", "
+ " \"descending\": \"true\", "
+ " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]}";
private static final String TIMESERIES_QUERY_SPLIT =
"[HiveDruidSplit{{\"queryType\":\"timeseries\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"sample_datasource\"},"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2012-01-01T00:00:00.000-08:00/2012-01-03T00:00:00.000-08:00\"]},"
+ "\"descending\":true,"
+ "\"filter\":null,"
+ "\"granularity\":{\"type\":\"duration\",\"duration\":86400000,\"origin\":\"1969-12-31T16:00:00.000-08:00\"},"
+ "\"aggregations\":[],"
+ "\"postAggregations\":[],"
+ "\"context\":null}, [localhost:8082]}]";
private static final String TOPN_QUERY =
"{ \"queryType\": \"topN\", "
+ " \"dataSource\": \"sample_data\", "
+ " \"dimension\": \"sample_dim\", "
+ " \"threshold\": 5, "
+ " \"metric\": \"count\", "
+ " \"aggregations\": [ "
+ " { "
+ " \"type\": \"longSum\", "
+ " \"name\": \"count\", "
+ " \"fieldName\": \"count\" "
+ " }, "
+ " { "
+ " \"type\": \"doubleSum\", "
+ " \"name\": \"some_metric\", "
+ " \"fieldName\": \"some_metric\" "
+ " } "
+ " ], "
+ " \"granularity\": \"all\", "
+ " \"intervals\": [ "
+ " \"2013-08-31T00:00:00.000/2013-09-03T00:00:00.000\" "
+ " ]}";
private static final String TOPN_QUERY_SPLIT =
"[HiveDruidSplit{{\"queryType\":\"topN\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"sample_data\"},"
+ "\"dimension\":{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"sample_dim\",\"outputName\":\"sample_dim\"},"
+ "\"metric\":{\"type\":\"LegacyTopNMetricSpec\",\"metric\":\"count\"},"
+ "\"threshold\":5,"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2013-08-31T00:00:00.000-07:00/2013-09-03T00:00:00.000-07:00\"]},"
+ "\"filter\":null,"
+ "\"granularity\":{\"type\":\"all\"},"
+ "\"aggregations\":[{\"type\":\"longSum\",\"name\":\"count\",\"fieldName\":\"count\"},"
+ "{\"type\":\"doubleSum\",\"name\":\"some_metric\",\"fieldName\":\"some_metric\"}],"
+ "\"postAggregations\":[],"
+ "\"context\":null,"
+ "\"descending\":false}, [localhost:8082]}]";
private static final String GROUP_BY_QUERY =
"{ \"queryType\": \"groupBy\", "
+ " \"dataSource\": \"sample_datasource\", "
+ " \"granularity\": \"day\", "
+ " \"dimensions\": [\"country\", \"device\"], "
+ " \"limitSpec\": {"
+ " \"type\": \"default\","
+ " \"limit\": 5000,"
+ " \"columns\": [\"country\", \"data_transfer\"] }, "
+ " \"aggregations\": [ "
+ " { \"type\": \"longSum\", \"name\": \"total_usage\", \"fieldName\": \"user_count\" }, "
+ " { \"type\": \"doubleSum\", \"name\": \"data_transfer\", \"fieldName\": \"data_transfer\" } "
+ " ], "
+ " \"intervals\": [ \"2012-01-01T00:00:00.000/2012-01-03T00:00:00.000\" ]"
+ " }";
private static final String GROUP_BY_QUERY_SPLIT =
"[HiveDruidSplit{{\"queryType\":\"groupBy\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"sample_datasource\"},"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2012-01-01T00:00:00.000-08:00/2012-01-03T00:00:00.000-08:00\"]},"
+ "\"filter\":null,"
+ "\"granularity\":{\"type\":\"duration\",\"duration\":86400000,\"origin\":\"1969-12-31T16:00:00.000-08:00\"},"
+ "\"dimensions\":[{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"country\",\"outputName\":\"country\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"device\",\"outputName\":\"device\"}],"
+ "\"aggregations\":[{\"type\":\"longSum\",\"name\":\"total_usage\",\"fieldName\":\"user_count\"},"
+ "{\"type\":\"doubleSum\",\"name\":\"data_transfer\",\"fieldName\":\"data_transfer\"}],"
+ "\"postAggregations\":[],"
+ "\"having\":null,"
+ "\"limitSpec\":{\"type\":\"default\",\"columns\":[{\"dimension\":\"country\",\"direction\":\"ascending\",\"dimensionOrder\":{\"type\":\"lexicographic\"}},"
+ "{\"dimension\":\"data_transfer\",\"direction\":\"ascending\",\"dimensionOrder\":{\"type\":\"lexicographic\"}}],\"limit\":5000},"
+ "\"context\":null,"
+ "\"descending\":false}, [localhost:8082]}]";
private static final String SELECT_QUERY =
"{ \"queryType\": \"select\", "
+ " \"dataSource\": \"wikipedia\", \"descending\": \"false\", "
+ " \"dimensions\":[\"robot\",\"namespace\",\"anonymous\",\"unpatrolled\",\"page\",\"language\",\"newpage\",\"user\"], "
+ " \"metrics\":[\"count\",\"added\",\"delta\",\"variation\",\"deleted\"], "
+ " \"granularity\": \"all\", "
+ " \"intervals\": [ \"2013-01-01/2013-01-02\" ], "
+ " \"pagingSpec\":{\"pagingIdentifiers\": {}, \"threshold\":5}, "
+ " \"context\":{\"druid.query.fetch\":true}}";
private static final String SELECT_QUERY_SPLIT =
"[HiveDruidSplit{{\"queryType\":\"select\","
+ "\"dataSource\":{\"type\":\"table\",\"name\":\"wikipedia\"},"
+ "\"intervals\":{\"type\":\"LegacySegmentSpec\",\"intervals\":[\"2013-01-01T00:00:00.000-08:00/2013-01-02T00:00:00.000-08:00\"]},"
+ "\"descending\":false,"
+ "\"filter\":null,"
+ "\"granularity\":{\"type\":\"all\"},"
+ "\"dimensions\":[{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"robot\",\"outputName\":\"robot\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"namespace\",\"outputName\":\"namespace\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"anonymous\",\"outputName\":\"anonymous\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"unpatrolled\",\"outputName\":\"unpatrolled\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"page\",\"outputName\":\"page\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"language\",\"outputName\":\"language\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"newpage\",\"outputName\":\"newpage\"},"
+ "{\"type\":\"LegacyDimensionSpec\",\"dimension\":\"user\",\"outputName\":\"user\"}],"
+ "\"metrics\":[\"count\",\"added\",\"delta\",\"variation\",\"deleted\"],"
+ "\"pagingSpec\":{\"pagingIdentifiers\":{},\"threshold\":5,\"fromNext\":false},"
+ "\"context\":{\"druid.query.fetch\":true}}, [localhost:8082]}]";
@Test
public void testTimeZone() throws Exception {
DruidQueryBasedInputFormat input = new DruidQueryBasedInputFormat();
Method method1 = DruidQueryBasedInputFormat.class.getDeclaredMethod(
"getInputSplits", Configuration.class);
method1.setAccessible(true);
// Create, initialize, and test
Configuration conf = createPropertiesQuery("sample_datasource", Query.TIMESERIES, TIMESERIES_QUERY);
HiveDruidSplit[] resultSplits = (HiveDruidSplit[]) method1.invoke(input, conf);
assertEquals(TIMESERIES_QUERY_SPLIT, Arrays.toString(resultSplits));
conf = createPropertiesQuery("sample_datasource", Query.TOPN, TOPN_QUERY);
resultSplits = (HiveDruidSplit[]) method1.invoke(input, conf);
assertEquals(TOPN_QUERY_SPLIT, Arrays.toString(resultSplits));
conf = createPropertiesQuery("sample_datasource", Query.GROUP_BY, GROUP_BY_QUERY);
resultSplits = (HiveDruidSplit[]) method1.invoke(input, conf);
assertEquals(GROUP_BY_QUERY_SPLIT, Arrays.toString(resultSplits));
conf = createPropertiesQuery("sample_datasource", Query.SELECT, SELECT_QUERY);
resultSplits = (HiveDruidSplit[]) method1.invoke(input, conf);
assertEquals(SELECT_QUERY_SPLIT, Arrays.toString(resultSplits));
}
private static Configuration createPropertiesQuery(String dataSource, String queryType,
String jsonQuery) {
Configuration conf = new Configuration();
// Set the configuration parameters
conf.set(FileInputFormat.INPUT_DIR, "/my/dir");
conf.set(HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS.varname, "localhost:8082");
conf.set(Constants.DRUID_DATA_SOURCE, dataSource);
conf.set(Constants.DRUID_QUERY_JSON, jsonQuery);
conf.set(Constants.DRUID_QUERY_TYPE, queryType);
conf.setBoolean(HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE.varname, false);
return conf;
}
}