/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.druid.serde;
import com.google.common.collect.Iterators;
import com.metamx.common.lifecycle.Lifecycle;
import com.metamx.http.client.HttpClient;
import com.metamx.http.client.HttpClientConfig;
import com.metamx.http.client.HttpClientInit;
import io.druid.query.BaseQuery;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.DruidStorageHandler;
import org.apache.hadoop.hive.druid.DruidStorageHandlerUtils;
import org.apache.hadoop.hive.druid.io.HiveDruidSplit;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.joda.time.Period;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.List;
/**
* Base record reader for given a Druid query. This class contains the logic to
* send the query to the broker and retrieve the results. The transformation to
* emit records needs to be done by the classes that extend the reader.
*
* The key for each record will be a NullWritable, while the value will be a
* DruidWritable containing the timestamp as well as all values resulting from
* the query.
*/
public abstract class DruidQueryRecordReader<T extends BaseQuery<R>, R extends Comparable<R>>
extends RecordReader<NullWritable, DruidWritable>
implements org.apache.hadoop.mapred.RecordReader<NullWritable, DruidWritable> {
private static final Logger LOG = LoggerFactory.getLogger(DruidQueryRecordReader.class);
/**
* Query that Druid executes.
*/
protected T query;
/**
* Query results.
*/
protected Iterator<R> results = Iterators.emptyIterator();
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
initialize(split, context.getConfiguration());
}
public void initialize(InputSplit split, Configuration conf) throws IOException {
HiveDruidSplit hiveDruidSplit = (HiveDruidSplit) split;
// Create query
query = createQuery(hiveDruidSplit.getDruidQuery());
// Execute query
if (LOG.isInfoEnabled()) {
LOG.info("Retrieving from druid using query:\n " + query);
}
InputStream response;
try {
response = DruidStorageHandlerUtils.submitRequest(DruidStorageHandler.getHttpClient(),
DruidStorageHandlerUtils.createRequest(hiveDruidSplit.getLocations()[0], query));
} catch (Exception e) {
throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
// Retrieve results
List<R> resultsList;
try {
resultsList = createResultsList(response);
} catch (IOException e) {
response.close();
throw e;
}
if (resultsList == null || resultsList.isEmpty()) {
return;
}
results = resultsList.iterator();
}
protected abstract T createQuery(String content) throws IOException;
protected abstract List<R> createResultsList(InputStream content) throws IOException;
@Override
public NullWritable createKey() {
return NullWritable.get();
}
@Override
public DruidWritable createValue() {
return new DruidWritable();
}
@Override
public abstract boolean next(NullWritable key, DruidWritable value) throws IOException;
@Override
public long getPos() {
return 0;
}
@Override
public abstract boolean nextKeyValue() throws IOException;
@Override
public abstract NullWritable getCurrentKey() throws IOException, InterruptedException;
@Override
// TODO: we could generate vector row batches so that vectorized execution may get triggered
public abstract DruidWritable getCurrentValue() throws IOException, InterruptedException;
@Override
public abstract float getProgress() throws IOException;
@Override
public void close() {
// Nothing to do
}
}