/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.druid.io; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.calcite.adapter.druid.DruidDateTimeUtils; import org.apache.calcite.adapter.druid.DruidTable; import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.Constants; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.druid.DruidStorageHandler; import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils; import org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader; import org.apache.hadoop.hive.druid.serde.DruidWritable; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.jboss.netty.handler.codec.http.HttpMethod; import org.joda.time.Interval; import org.joda.time.Period; import org.joda.time.chrono.ISOChronology; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonMappingException; import com.google.common.collect.Lists; import com.metamx.common.lifecycle.Lifecycle; import com.metamx.http.client.HttpClient; import com.metamx.http.client.HttpClientConfig; import com.metamx.http.client.HttpClientInit; import com.metamx.http.client.Request; import io.druid.query.BaseQuery; import io.druid.query.Druids; import io.druid.query.Druids.SegmentMetadataQueryBuilder; import io.druid.query.Druids.SelectQueryBuilder; import io.druid.query.Druids.TimeBoundaryQueryBuilder; import io.druid.query.LocatedSegmentDescriptor; import io.druid.query.Query; import io.druid.query.Result; import io.druid.query.SegmentDescriptor; import io.druid.query.metadata.metadata.SegmentAnalysis; import io.druid.query.metadata.metadata.SegmentMetadataQuery; import io.druid.query.select.PagingSpec; import io.druid.query.select.SelectQuery; import io.druid.query.spec.MultipleIntervalSegmentSpec; import io.druid.query.spec.MultipleSpecificSegmentSpec; import io.druid.query.timeboundary.TimeBoundaryQuery; import io.druid.query.timeboundary.TimeBoundaryResultValue; /** * Druid query based input format. * * Given a query and the Druid broker address, it will send it, and retrieve * and parse the results. */ public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidWritable> implements org.apache.hadoop.mapred.InputFormat<NullWritable, DruidWritable> { protected static final Logger LOG = LoggerFactory.getLogger(DruidQueryBasedInputFormat.class); @Override public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { return getInputSplits(job); } @Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { return Arrays.<InputSplit>asList(getInputSplits(context.getConfiguration())); } @SuppressWarnings("deprecation") private HiveDruidSplit[] getInputSplits(Configuration conf) throws IOException { String address = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS ); if (StringUtils.isEmpty(address)) { throw new IOException("Druid broker address not specified in configuration"); } String druidQuery = StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON)); String druidQueryType; if (StringUtils.isEmpty(druidQuery)) { // Empty, maybe because CBO did not run; we fall back to // full Select query if (LOG.isWarnEnabled()) { LOG.warn("Druid query is empty; creating Select query"); } String dataSource = conf.get(Constants.DRUID_DATA_SOURCE); if (dataSource == null) { throw new IOException("Druid data source cannot be empty"); } druidQuery = createSelectStarQuery(dataSource); druidQueryType = Query.SELECT; } else { druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE); if (druidQueryType == null) { throw new IOException("Druid query type not recognized"); } } // hive depends on FileSplits Job job = new Job(conf); JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job); Path[] paths = FileInputFormat.getInputPaths(jobContext); // We need to deserialize and serialize query so intervals are written in the JSON // Druid query with user timezone, as this is default Hive time semantics. // Then, create splits with the Druid queries. switch (druidQueryType) { case Query.TIMESERIES: case Query.TOPN: case Query.GROUP_BY: return new HiveDruidSplit[] { new HiveDruidSplit(deserializeSerialize(druidQuery), paths[0], new String[] {address}) }; case Query.SELECT: SelectQuery selectQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue( druidQuery, SelectQuery.class); boolean distributed = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE); if (distributed) { return distributeSelectQuery(conf, address, selectQuery, paths[0]); } else { return splitSelectQuery(conf, address, selectQuery, paths[0]); } default: throw new IOException("Druid query type not recognized"); } } private static String createSelectStarQuery(String dataSource) throws IOException { // Create Select query SelectQueryBuilder builder = new Druids.SelectQueryBuilder(); builder.dataSource(dataSource); final List<Interval> intervals = Arrays.asList(); builder.intervals(intervals); builder.pagingSpec(PagingSpec.newSpec(1)); Map<String, Object> context = new HashMap<>(); context.put(Constants.DRUID_QUERY_FETCH, false); builder.context(context); return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(builder.build()); } /* New method that distributes the Select query by creating splits containing * information about different Druid nodes that have the data for the given * query. */ private static HiveDruidSplit[] distributeSelectQuery(Configuration conf, String address, SelectQuery query, Path dummyPath) throws IOException { // If it has a limit, we use it and we do not distribute the query final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false); if (isFetch) { return new HiveDruidSplit[] { new HiveDruidSplit( DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[]{address} ) }; } final String intervals = StringUtils.join(query.getIntervals(), ","); // Comma-separated intervals without brackets final String request = String.format( "http://%s/druid/v2/datasources/%s/candidates?intervals=%s", address, query.getDataSource().getNames().get(0), URLEncoder.encode(intervals, "UTF-8")); final InputStream response; try { response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(), new Request(HttpMethod.GET, new URL(request))); } catch (Exception e) { throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } // Retrieve results final List<LocatedSegmentDescriptor> segmentDescriptors; try { segmentDescriptors = DruidStorageHandlerUtils.JSON_MAPPER.readValue(response, new TypeReference<List<LocatedSegmentDescriptor>>() {}); } catch (Exception e) { response.close(); throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } // Create one input split for each segment final int numSplits = segmentDescriptors.size(); final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()]; for (int i = 0; i < numSplits; i++) { final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i); final String[] hosts = new String[locatedSD.getLocations().size()]; for (int j = 0; j < locatedSD.getLocations().size(); j++) { hosts[j] = locatedSD.getLocations().get(j).getHost(); } // Create partial Select query final SegmentDescriptor newSD = new SegmentDescriptor( locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber()); final SelectQuery partialQuery = query.withQuerySegmentSpec( new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD))); splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts); } return splits; } /* Method that splits Select query depending on the threshold so read can be * parallelized. We will only contact the Druid broker to obtain all results. */ private static HiveDruidSplit[] splitSelectQuery(Configuration conf, String address, SelectQuery query, Path dummyPath ) throws IOException { final int selectThreshold = HiveConf.getIntVar( conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_THRESHOLD); final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false); if (isFetch) { // If it has a limit, we use it and we do not split the query return new HiveDruidSplit[] { new HiveDruidSplit( DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] {address} ) }; } // We do not have the number of rows, thus we need to execute a // Segment Metadata query to obtain number of rows SegmentMetadataQueryBuilder metadataBuilder = new Druids.SegmentMetadataQueryBuilder(); metadataBuilder.dataSource(query.getDataSource()); metadataBuilder.intervals(query.getIntervals()); metadataBuilder.merge(true); metadataBuilder.analysisTypes(); SegmentMetadataQuery metadataQuery = metadataBuilder.build(); InputStream response; try { response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(), DruidStorageHandlerUtils.createRequest(address, metadataQuery) ); } catch (Exception e) { throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } // Retrieve results List<SegmentAnalysis> metadataList; try { metadataList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response, new TypeReference<List<SegmentAnalysis>>() { } ); } catch (Exception e) { response.close(); throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } if (metadataList == null) { throw new IOException("Connected to Druid but could not retrieve datasource information"); } if (metadataList.isEmpty()) { // There are no rows for that time range, we can submit query as it is return new HiveDruidSplit[] { new HiveDruidSplit( DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] {address} ) }; } if (metadataList.size() != 1) { throw new IOException("Information about segments should have been merged"); } final long numRows = metadataList.get(0).getNumRows(); query = query.withPagingSpec(PagingSpec.newSpec(Integer.MAX_VALUE)); if (numRows <= selectThreshold) { // We are not going to split it return new HiveDruidSplit[] { new HiveDruidSplit( DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] {address} ) }; } // If the query does not specify a timestamp, we obtain the total time using // a Time Boundary query. Then, we use the information to split the query // following the Select threshold configuration property final List<Interval> intervals = new ArrayList<>(); if (query.getIntervals().size() == 1 && query.getIntervals().get(0).withChronology( ISOChronology.getInstanceUTC()).equals(DruidTable.DEFAULT_INTERVAL)) { // Default max and min, we should execute a time boundary query to get a // more precise range TimeBoundaryQueryBuilder timeBuilder = new Druids.TimeBoundaryQueryBuilder(); timeBuilder.dataSource(query.getDataSource()); TimeBoundaryQuery timeQuery = timeBuilder.build(); try { response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(), DruidStorageHandlerUtils.createRequest(address, timeQuery) ); } catch (Exception e) { throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } // Retrieve results List<Result<TimeBoundaryResultValue>> timeList; try { timeList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response, new TypeReference<List<Result<TimeBoundaryResultValue>>>() { } ); } catch (Exception e) { response.close(); throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e)); } if (timeList == null || timeList.isEmpty()) { throw new IOException( "Connected to Druid but could not retrieve time boundary information"); } if (timeList.size() != 1) { throw new IOException("We should obtain a single time boundary"); } intervals.add(new Interval(timeList.get(0).getValue().getMinTime().getMillis(), timeList.get(0).getValue().getMaxTime().getMillis(), ISOChronology.getInstanceUTC() )); } else { intervals.addAll(query.getIntervals()); } // Create (numRows/default threshold) input splits int numSplits = (int) Math.ceil((double) numRows / selectThreshold); List<List<Interval>> newIntervals = createSplitsIntervals(intervals, numSplits); HiveDruidSplit[] splits = new HiveDruidSplit[numSplits]; for (int i = 0; i < numSplits; i++) { // Create partial Select query final SelectQuery partialQuery = query.withQuerySegmentSpec( new MultipleIntervalSegmentSpec(newIntervals.get(i))); splits[i] = new HiveDruidSplit( DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, new String[] {address}); } return splits; } private static List<List<Interval>> createSplitsIntervals(List<Interval> intervals, int numSplits ) { long startTime = intervals.get(0).getStartMillis(); long endTime = startTime; long currTime = 0; List<List<Interval>> newIntervals = new ArrayList<>(); long totalTime = 0; for (Interval interval: intervals) { totalTime += interval.getEndMillis() - interval.getStartMillis(); } for (int i = 0, posIntervals = 0; i < numSplits; i++) { final long rangeSize = Math.round((double) (totalTime * (i + 1)) / numSplits) - Math.round((double) (totalTime * i) / numSplits); // Create the new interval(s) List<Interval> currentIntervals = new ArrayList<>(); while (posIntervals < intervals.size()) { final Interval interval = intervals.get(posIntervals); final long expectedRange = rangeSize - currTime; if (interval.getEndMillis() - startTime >= expectedRange) { endTime = startTime + expectedRange; currentIntervals.add(new Interval(startTime, endTime, ISOChronology.getInstanceUTC())); startTime = endTime; currTime = 0; break; } endTime = interval.getEndMillis(); currentIntervals.add(new Interval(startTime, endTime, ISOChronology.getInstanceUTC())); currTime += (endTime - startTime); startTime = intervals.get(++posIntervals).getStartMillis(); } newIntervals.add(currentIntervals); } assert endTime == intervals.get(intervals.size() - 1).getEndMillis(); return newIntervals; } private static String deserializeSerialize(String druidQuery) throws JsonParseException, JsonMappingException, IOException { BaseQuery<?> deserializedQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue( druidQuery, BaseQuery.class); return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(deserializedQuery); } @Override public org.apache.hadoop.mapred.RecordReader<NullWritable, DruidWritable> getRecordReader( org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter ) throws IOException { // We need to provide a different record reader for every type of Druid query. // The reason is that Druid results format is different for each type. final DruidQueryRecordReader<?, ?> reader; final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE); if (druidQueryType == null) { reader = new DruidSelectQueryRecordReader(); // By default reader.initialize((HiveDruidSplit) split, job); return reader; } switch (druidQueryType) { case Query.TIMESERIES: reader = new DruidTimeseriesQueryRecordReader(); break; case Query.TOPN: reader = new DruidTopNQueryRecordReader(); break; case Query.GROUP_BY: reader = new DruidGroupByQueryRecordReader(); break; case Query.SELECT: reader = new DruidSelectQueryRecordReader(); break; default: throw new IOException("Druid query type not recognized"); } reader.initialize((HiveDruidSplit) split, job); return reader; } @Override public RecordReader<NullWritable, DruidWritable> createRecordReader(InputSplit split, TaskAttemptContext context ) throws IOException, InterruptedException { // We need to provide a different record reader for every type of Druid query. // The reason is that Druid results format is different for each type. final String druidQueryType = context.getConfiguration().get(Constants.DRUID_QUERY_TYPE); if (druidQueryType == null) { return new DruidSelectQueryRecordReader(); // By default } final DruidQueryRecordReader<?, ?> reader; switch (druidQueryType) { case Query.TIMESERIES: reader = new DruidTimeseriesQueryRecordReader(); break; case Query.TOPN: reader = new DruidTopNQueryRecordReader(); break; case Query.GROUP_BY: reader = new DruidGroupByQueryRecordReader(); break; case Query.SELECT: reader = new DruidSelectQueryRecordReader(); break; default: throw new IOException("Druid query type not recognized"); } return reader; } }