DruidQueryBasedInputFormat.java example

Explorer
hive-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.druid.io;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.calcite.adapter.druid.DruidDateTimeUtils;
import org.apache.calcite.adapter.druid.DruidTable;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.DruidStorageHandler;
import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils;
import org.apache.hadoop.hive.druid.serde.DruidGroupByQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidSelectQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidTimeseriesQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidTopNQueryRecordReader;
import org.apache.hadoop.hive.druid.serde.DruidWritable;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.jboss.netty.handler.codec.http.HttpMethod;
import org.joda.time.Interval;
import org.joda.time.Period;
import org.joda.time.chrono.ISOChronology;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.common.collect.Lists;
import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.HttpClientConfig;
import com.metamx.http.client.HttpClientInit;
import com.metamx.http.client.Request;

import io.druid.query.BaseQuery;
import io.druid.query.Druids;
import io.druid.query.Druids.SegmentMetadataQueryBuilder;
import io.druid.query.Druids.SelectQueryBuilder;
import io.druid.query.Druids.TimeBoundaryQueryBuilder;
import io.druid.query.LocatedSegmentDescriptor;
import io.druid.query.Query;
import io.druid.query.Result;
import io.druid.query.SegmentDescriptor;
import io.druid.query.metadata.metadata.SegmentAnalysis;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.query.select.PagingSpec;
import io.druid.query.select.SelectQuery;
import io.druid.query.spec.MultipleIntervalSegmentSpec;
import io.druid.query.spec.MultipleSpecificSegmentSpec;
import io.druid.query.timeboundary.TimeBoundaryQuery;
import io.druid.query.timeboundary.TimeBoundaryResultValue;

/**
 * Druid query based input format.
 *
 * Given a query and the Druid broker address, it will send it, and retrieve
 * and parse the results.
 */
public class DruidQueryBasedInputFormat extends InputFormat<NullWritable, DruidWritable>
        implements org.apache.hadoop.mapred.InputFormat<NullWritable, DruidWritable> {

  protected static final Logger LOG = LoggerFactory.getLogger(DruidQueryBasedInputFormat.class);

  @Override
  public org.apache.hadoop.mapred.InputSplit[] getSplits(JobConf job, int numSplits)
          throws IOException {
    return getInputSplits(job);
  }

  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    return Arrays.<InputSplit>asList(getInputSplits(context.getConfiguration()));
  }

  @SuppressWarnings("deprecation")
  private HiveDruidSplit[] getInputSplits(Configuration conf) throws IOException {
    String address = HiveConf.getVar(conf,
            HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS
    );
    if (StringUtils.isEmpty(address)) {
      throw new IOException("Druid broker address not specified in configuration");
    }
    String druidQuery = StringEscapeUtils.unescapeJava(conf.get(Constants.DRUID_QUERY_JSON));
    String druidQueryType;
    if (StringUtils.isEmpty(druidQuery)) {
      // Empty, maybe because CBO did not run; we fall back to
      // full Select query
      if (LOG.isWarnEnabled()) {
        LOG.warn("Druid query is empty; creating Select query");
      }
      String dataSource = conf.get(Constants.DRUID_DATA_SOURCE);
      if (dataSource == null) {
        throw new IOException("Druid data source cannot be empty");
      }
      druidQuery = createSelectStarQuery(dataSource);
      druidQueryType = Query.SELECT;
    } else {
      druidQueryType = conf.get(Constants.DRUID_QUERY_TYPE);
      if (druidQueryType == null) {
        throw new IOException("Druid query type not recognized");
      }
    }

    // hive depends on FileSplits
    Job job = new Job(conf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] paths = FileInputFormat.getInputPaths(jobContext);

    // We need to deserialize and serialize query so intervals are written in the JSON
    // Druid query with user timezone, as this is default Hive time semantics.
    // Then, create splits with the Druid queries.
    switch (druidQueryType) {
      case Query.TIMESERIES:
      case Query.TOPN:
      case Query.GROUP_BY:
        return new HiveDruidSplit[] { new HiveDruidSplit(deserializeSerialize(druidQuery),
                paths[0], new String[] {address}) };
      case Query.SELECT:
        SelectQuery selectQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(
                druidQuery, SelectQuery.class);
        boolean distributed = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_DISTRIBUTE);
        if (distributed) {
          return distributeSelectQuery(conf, address, selectQuery, paths[0]);
        } else {
          return splitSelectQuery(conf, address, selectQuery, paths[0]);
        }
      default:
        throw new IOException("Druid query type not recognized");
    }
  }

  private static String createSelectStarQuery(String dataSource) throws IOException {
    // Create Select query
    SelectQueryBuilder builder = new Druids.SelectQueryBuilder();
    builder.dataSource(dataSource);
    final List<Interval> intervals = Arrays.asList();
    builder.intervals(intervals);
    builder.pagingSpec(PagingSpec.newSpec(1));
    Map<String, Object> context = new HashMap<>();
    context.put(Constants.DRUID_QUERY_FETCH, false);
    builder.context(context);
    return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(builder.build());
  }

  /* New method that distributes the Select query by creating splits containing
   * information about different Druid nodes that have the data for the given
   * query. */
  private static HiveDruidSplit[] distributeSelectQuery(Configuration conf, String address,
      SelectQuery query, Path dummyPath) throws IOException {
    // If it has a limit, we use it and we do not distribute the query
    final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
    if (isFetch) {
      return new HiveDruidSplit[] { new HiveDruidSplit(
              DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
              new String[]{address} ) };
    }

    final String intervals =
            StringUtils.join(query.getIntervals(), ","); // Comma-separated intervals without brackets
    final String request = String.format(
            "http://%s/druid/v2/datasources/%s/candidates?intervals=%s",
            address, query.getDataSource().getNames().get(0), URLEncoder.encode(intervals, "UTF-8"));
    final InputStream response;
    try {
      response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(), new Request(HttpMethod.GET, new URL(request)));
    } catch (Exception e) {
      throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }

    // Retrieve results
    final List<LocatedSegmentDescriptor> segmentDescriptors;
    try {
      segmentDescriptors = DruidStorageHandlerUtils.JSON_MAPPER.readValue(response,
              new TypeReference<List<LocatedSegmentDescriptor>>() {});
    } catch (Exception e) {
      response.close();
      throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }

    // Create one input split for each segment
    final int numSplits = segmentDescriptors.size();
    final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
    for (int i = 0; i < numSplits; i++) {
      final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
      final String[] hosts = new String[locatedSD.getLocations().size()];
      for (int j = 0; j < locatedSD.getLocations().size(); j++) {
        hosts[j] = locatedSD.getLocations().get(j).getHost();
      }
      // Create partial Select query
      final SegmentDescriptor newSD = new SegmentDescriptor(
              locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
      final SelectQuery partialQuery = query.withQuerySegmentSpec(
              new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
      splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery),
              dummyPath, hosts);
    }
    return splits;
  }

  /* Method that splits Select query depending on the threshold so read can be
   * parallelized. We will only contact the Druid broker to obtain all results. */
  private static HiveDruidSplit[] splitSelectQuery(Configuration conf, String address,
          SelectQuery query, Path dummyPath
  ) throws IOException {
    final int selectThreshold = HiveConf.getIntVar(
            conf, HiveConf.ConfVars.HIVE_DRUID_SELECT_THRESHOLD);

    final boolean isFetch = query.getContextBoolean(Constants.DRUID_QUERY_FETCH, false);
    if (isFetch) {
      // If it has a limit, we use it and we do not split the query
      return new HiveDruidSplit[] { new HiveDruidSplit(
              DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
              new String[] {address} ) };
    }

    // We do not have the number of rows, thus we need to execute a
    // Segment Metadata query to obtain number of rows
    SegmentMetadataQueryBuilder metadataBuilder = new Druids.SegmentMetadataQueryBuilder();
    metadataBuilder.dataSource(query.getDataSource());
    metadataBuilder.intervals(query.getIntervals());
    metadataBuilder.merge(true);
    metadataBuilder.analysisTypes();
    SegmentMetadataQuery metadataQuery = metadataBuilder.build();
    InputStream response;
    try {
      response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(),
              DruidStorageHandlerUtils.createRequest(address, metadataQuery)
      );
    } catch (Exception e) {
      throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }

    // Retrieve results
    List<SegmentAnalysis> metadataList;
    try {
      metadataList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response,
              new TypeReference<List<SegmentAnalysis>>() {
              }
      );
    } catch (Exception e) {
      response.close();
      throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
    }
    if (metadataList == null) {
      throw new IOException("Connected to Druid but could not retrieve datasource information");
    }
    if (metadataList.isEmpty()) {
      // There are no rows for that time range, we can submit query as it is
      return new HiveDruidSplit[] { new HiveDruidSplit(
              DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
              new String[] {address} ) };
    }
    if (metadataList.size() != 1) {
      throw new IOException("Information about segments should have been merged");
    }

    final long numRows = metadataList.get(0).getNumRows();

    query = query.withPagingSpec(PagingSpec.newSpec(Integer.MAX_VALUE));
    if (numRows <= selectThreshold) {
      // We are not going to split it
      return new HiveDruidSplit[] { new HiveDruidSplit(
              DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath,
              new String[] {address} ) };
    }

    // If the query does not specify a timestamp, we obtain the total time using
    // a Time Boundary query. Then, we use the information to split the query
    // following the Select threshold configuration property
    final List<Interval> intervals = new ArrayList<>();
    if (query.getIntervals().size() == 1 && query.getIntervals().get(0).withChronology(
            ISOChronology.getInstanceUTC()).equals(DruidTable.DEFAULT_INTERVAL)) {
      // Default max and min, we should execute a time boundary query to get a
      // more precise range
      TimeBoundaryQueryBuilder timeBuilder = new Druids.TimeBoundaryQueryBuilder();
      timeBuilder.dataSource(query.getDataSource());
      TimeBoundaryQuery timeQuery = timeBuilder.build();
      try {
        response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(),
                DruidStorageHandlerUtils.createRequest(address, timeQuery)
        );
      } catch (Exception e) {
        throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
      }

      // Retrieve results
      List<Result<TimeBoundaryResultValue>> timeList;
      try {
        timeList = DruidStorageHandlerUtils.SMILE_MAPPER.readValue(response,
                new TypeReference<List<Result<TimeBoundaryResultValue>>>() {
                }
        );
      } catch (Exception e) {
        response.close();
        throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
      }
      if (timeList == null || timeList.isEmpty()) {
        throw new IOException(
                "Connected to Druid but could not retrieve time boundary information");
      }
      if (timeList.size() != 1) {
        throw new IOException("We should obtain a single time boundary");
      }

      intervals.add(new Interval(timeList.get(0).getValue().getMinTime().getMillis(),
              timeList.get(0).getValue().getMaxTime().getMillis(), ISOChronology.getInstanceUTC()
      ));
    } else {
      intervals.addAll(query.getIntervals());
    }

    // Create (numRows/default threshold) input splits
    int numSplits = (int) Math.ceil((double) numRows / selectThreshold);
    List<List<Interval>> newIntervals = createSplitsIntervals(intervals, numSplits);
    HiveDruidSplit[] splits = new HiveDruidSplit[numSplits];
    for (int i = 0; i < numSplits; i++) {
      // Create partial Select query
      final SelectQuery partialQuery = query.withQuerySegmentSpec(
              new MultipleIntervalSegmentSpec(newIntervals.get(i)));
      splits[i] = new HiveDruidSplit(
              DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath,
              new String[] {address});
    }
    return splits;
  }

  private static List<List<Interval>> createSplitsIntervals(List<Interval> intervals, int numSplits
  ) {

    long startTime = intervals.get(0).getStartMillis();
    long endTime = startTime;
    long currTime = 0;
    List<List<Interval>> newIntervals = new ArrayList<>();
    long totalTime = 0;
    for (Interval interval: intervals) {
      totalTime += interval.getEndMillis() - interval.getStartMillis();
    }
    for (int i = 0, posIntervals = 0; i < numSplits; i++) {
      final long rangeSize = Math.round((double) (totalTime * (i + 1)) / numSplits) -
              Math.round((double) (totalTime * i) / numSplits);
      // Create the new interval(s)
      List<Interval> currentIntervals = new ArrayList<>();
      while (posIntervals < intervals.size()) {
        final Interval interval = intervals.get(posIntervals);
        final long expectedRange = rangeSize - currTime;
        if (interval.getEndMillis() - startTime >= expectedRange) {
          endTime = startTime + expectedRange;
          currentIntervals.add(new Interval(startTime, endTime, ISOChronology.getInstanceUTC()));
          startTime = endTime;
          currTime = 0;
          break;
        }
        endTime = interval.getEndMillis();
        currentIntervals.add(new Interval(startTime, endTime, ISOChronology.getInstanceUTC()));
        currTime += (endTime - startTime);
        startTime = intervals.get(++posIntervals).getStartMillis();
      }
      newIntervals.add(currentIntervals);
    }
    assert endTime == intervals.get(intervals.size() - 1).getEndMillis();
    return newIntervals;
  }

  private static String deserializeSerialize(String druidQuery)
          throws JsonParseException, JsonMappingException, IOException {
    BaseQuery<?> deserializedQuery = DruidStorageHandlerUtils.JSON_MAPPER.readValue(
            druidQuery, BaseQuery.class);
    return DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(deserializedQuery);
  }

  @Override
  public org.apache.hadoop.mapred.RecordReader<NullWritable, DruidWritable> getRecordReader(
          org.apache.hadoop.mapred.InputSplit split, JobConf job, Reporter reporter
  )
          throws IOException {
    // We need to provide a different record reader for every type of Druid query.
    // The reason is that Druid results format is different for each type.
    final DruidQueryRecordReader<?, ?> reader;
    final String druidQueryType = job.get(Constants.DRUID_QUERY_TYPE);
    if (druidQueryType == null) {
      reader = new DruidSelectQueryRecordReader(); // By default
      reader.initialize((HiveDruidSplit) split, job);
      return reader;
    }
    switch (druidQueryType) {
      case Query.TIMESERIES:
        reader = new DruidTimeseriesQueryRecordReader();
        break;
      case Query.TOPN:
        reader = new DruidTopNQueryRecordReader();
        break;
      case Query.GROUP_BY:
        reader = new DruidGroupByQueryRecordReader();
        break;
      case Query.SELECT:
        reader = new DruidSelectQueryRecordReader();
        break;
      default:
        throw new IOException("Druid query type not recognized");
    }
    reader.initialize((HiveDruidSplit) split, job);
    return reader;
  }

  @Override
  public RecordReader<NullWritable, DruidWritable> createRecordReader(InputSplit split,
          TaskAttemptContext context
  ) throws IOException, InterruptedException {
    // We need to provide a different record reader for every type of Druid query.
    // The reason is that Druid results format is different for each type.
    final String druidQueryType = context.getConfiguration().get(Constants.DRUID_QUERY_TYPE);
    if (druidQueryType == null) {
      return new DruidSelectQueryRecordReader(); // By default
    }
    final DruidQueryRecordReader<?, ?> reader;
    switch (druidQueryType) {
      case Query.TIMESERIES:
        reader = new DruidTimeseriesQueryRecordReader();
        break;
      case Query.TOPN:
        reader = new DruidTopNQueryRecordReader();
        break;
      case Query.GROUP_BY:
        reader = new DruidGroupByQueryRecordReader();
        break;
      case Query.SELECT:
        reader = new DruidSelectQueryRecordReader();
        break;
      default:
        throw new IOException("Druid query type not recognized");
    }
    return reader;
  }

}