/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.task.source;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UncheckedIOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import java.nio.file.Files;
import java.nio.file.Path;
import com.addthis.basis.util.LessStrings;
import com.addthis.bundle.channel.DataChannelError;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.core.list.ListBundle;
import com.addthis.hydra.data.filter.lambda.StringWithValueFilter;
import com.addthis.hydra.data.util.DateUtil;
import com.addthis.hydra.data.util.Tokenizer;
import com.addthis.hydra.task.run.TaskRunConfig;
import com.addthis.hydra.task.source.bundleizer.Bundleizer;
import com.addthis.hydra.task.source.bundleizer.BundleizerFactory;
import com.addthis.hydra.task.source.bundleizer.ColumnBundleizer;
import com.google.common.base.Joiner;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.escape.Escaper;
import com.google.common.io.ByteStreams;
import com.google.common.net.UrlEscapers;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This data source <span class="hydra-summary">submits a hydra query for data</span>.
*
* <p>The user specifies the job identifier, query path, and query operations
* of the query to execute. Queries are submitted to the query master to coordinate
* scheduling. Query operations are not performed on the query master. In other words
* the {@link #ops} parameter is identical to the "remote ops" of a query submitted to
* the query system. You may optionally specify which specific tasks of the job should
* be queried. If you do not specify any tasks then the tasks of the job you are querying
* will be evenly distributed across the tasks of the job using this input source.
*
* <p>The query input source currently forbids partial results from being processed.
* If the query is not finished when the job is stopped then the job will error.
* This is a stop-gap measure until we determine a comprehensive solution for partial
* query results.
*
* <p>The query input source specifies a mark file. If the query runs to completion
* then the mark file will be created. The next time the job runs it will not submit
* another query.
*
* @user-reference
*/
public class DataSourceQuery extends TaskDataSource {
static final int LOG_TRUNCATE_CHARS = 500;
private static final Joiner AMPERSAND_JOINER = Joiner.on('&');
private static final Joiner COMMA_JOINER = Joiner.on(',');
private static final Logger log = LoggerFactory.getLogger(DataSourceQuery.class);
/**
* Request that the query system format output
* in the version of csv output that corrects
* escapes characters.
*/
private static final String QUERY_FORMAT = "csv2";
/**
* Separator in csv format.
*/
private static final String TOKEN_SEPARATOR = ",";
/**
* Group character in csv format.
*/
private static final String[] TOKEN_GROUPS = {"\""};
/**
* URL to submit a query. Typical url is
* "http://[hostname]:2222/query/call"
*/
private static final String QUERY_URL = "http://%s:%d%s";
/**
* Query master host name. This field is required.
*/
@JsonProperty(required = true) private String mqmaster;
/**
* Query master port number. Default value is 2222.
*/
@JsonProperty(required = true) private int port;
/**
* Query master url. Default is "/query/call"
*/
@JsonProperty(required = true) private String urlPath;
/**
* The tree job that is to be queried. Specify no jobId
* for a job to query itself. This field is optional.
*/
@JsonProperty private String jobId;
/**
* Optionally specify the directory
* of the tree for the given job. Default
* is "data".
*/
@JsonProperty(required = true) private String treeDir;
/**
* Path for the query. This field is required.
*/
@JsonProperty(required = true) private StringWithValueFilter path;
/**
* Operations for the query. This field is optional.
*/
@JsonProperty private String ops;
/**
* Which tasks to query. This field is optional.
*/
@JsonProperty private Integer[] tasks;
/** Number of tasks in the query job. This field is required. */
@JsonProperty(required = true) private int taskTotal;
/**
* Specify the field names for the columns
* produced by the query. This field is required.
*/
@JsonProperty(required = true) private String[] fields;
/**
* Path to the mark file. Default is "markfile".
*/
@JsonProperty private Path markFile;
/** Ignore the mark file */
@JsonProperty private boolean ignoreMarkFile;
@JsonProperty private TaskRunConfig config;
private final AtomicBoolean queryCompleted = new AtomicBoolean(false);
private final AtomicReference<Exception> firstError = new AtomicReference<>(null);
private Bundleizer bundleizer;
private Bundle nextBundle;
private InputStream underlyingInputStream;
@Override public void init() {
if (testMarkFile()) {
return;
}
HttpURLConnection conn = null;
tasks = buildTasks(tasks);
try {
URL url = new URL(String.format(QUERY_URL, mqmaster, port, urlPath));
conn = (HttpURLConnection) url.openConnection();
conn.setDoOutput(true);
conn.setRequestMethod("POST");
conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
Map<String, String> parameters = new HashMap<>();
parameters.put("sender", config.jobIdWithNode());
parameters.put("job", jobId);
parameters.put("path", DateUtil.expandDateMacro(path.get()));
parameters.put("dir", treeDir);
// the query master should not perform any ops on these queries
parameters.put("rops", ops);
parameters.put("tasks", COMMA_JOINER.join(tasks));
parameters.put("format", QUERY_FORMAT);
writePostBody(conn, parameters);
underlyingInputStream = conn.getInputStream();
Tokenizer tokenizer = new Tokenizer(TOKEN_SEPARATOR, TOKEN_GROUPS, false);
BundleizerFactory format = new ColumnBundleizer(fields, tokenizer, null);
bundleizer = format.createBundleizer(underlyingInputStream, new ListBundle());
} catch (IOException outer) {
if ((conn != null) && (conn.getErrorStream() != null)) {
try {
log.error("URL connection was unsuccessful. Response is {}",
new String(ByteStreams.toByteArray(conn.getErrorStream())));
} catch (IOException inner) {
log.error("During connection error failure to read error stream: ", inner);
}
}
firstError.compareAndSet(null, outer);
throw Throwables.propagate(outer);
}
}
/**
* Deletes the existing mark file if ignoreMarkFile
* field is set. Tests if the mark file exists.
*
* @return true if the mark file exists
*/
private boolean testMarkFile() {
if (markFile != null) {
if (ignoreMarkFile) {
try {
boolean deleted = Files.deleteIfExists(markFile);
if (deleted) {
log.warn("Deleted mark file : {}", markFile);
}
} catch (IOException ex) {
firstError.compareAndSet(null, ex);
throw Throwables.propagate(ex);
}
}
if (Files.exists(markFile)) {
queryCompleted.set(true);
return true;
}
}
return false;
}
/**
* If a list of tasks is provided as input then perform validation
* on the tasks numbers. If a list of tasks is not provided then
* generate a list of tasks.
*/
private Integer[] buildTasks(Integer[] tasks) {
if (tasks != null) {
List<Integer> outBounds = new ArrayList<>();
for (Integer task : tasks) {
if ((task < 0) || (task >= taskTotal)) {
outBounds.add(task);
}
}
if (outBounds.size() > 0) {
throw new IllegalArgumentException("The following provided task numbers " +
"are out of bounds: " + outBounds);
}
return tasks;
} else {
return config.calcShardList(taskTotal);
}
}
private static void writePostBody(HttpURLConnection conn, Map<String, String> parameters) throws IOException {
try (OutputStream os = conn.getOutputStream()) {
Escaper escaper = UrlEscapers.urlFormParameterEscaper();
// Select non-null (key, value) pairs and join them
List<String> kvpairs = parameters.entrySet().stream()
.filter((e) -> (e.getValue() != null))
.map((e) -> (escaper.escape(e.getKey()) + "=" +
escaper.escape(e.getValue()))).collect(Collectors.toList());
String content = AMPERSAND_JOINER.join(kvpairs);
log.info("First {} characters of POST body are {}", LOG_TRUNCATE_CHARS,
LessStrings.trunc(content, LOG_TRUNCATE_CHARS));
os.write(content.getBytes());
os.flush();
}
}
@Override public Bundle next() throws DataChannelError {
if (nextBundle != null) {
Bundle result = nextBundle;
nextBundle = null;
return result;
} else {
try {
if (bundleizer != null) {
Bundle next = bundleizer.next();
if (next == null) {
queryCompleted.set(true);
if (markFile != null) {
Files.createFile(markFile);
}
}
return next;
} else {
return null;
}
} catch (Exception ex) {
firstError.compareAndSet(null, ex);
throw Throwables.propagate(ex);
}
}
}
@Override public Bundle peek() throws DataChannelError {
if (nextBundle == null) {
try {
if (bundleizer != null) {
nextBundle = bundleizer.next();
}
} catch (Exception ex) {
firstError.compareAndSet(null, ex);
throw Throwables.propagate(ex);
}
}
return nextBundle;
}
@Override public void close() {
try {
if (underlyingInputStream != null) {
underlyingInputStream.close();
}
//noinspection ThrowableResultOfMethodCallIgnored
if (!queryCompleted.get() && (firstError.get() == null)) {
throw new UnsupportedOperationException("Query did not complete before job completed. " +
"Partial query results are currently not " +
"allowed by the query input source");
}
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}
@Override
public @Nonnull ImmutableList<Path> writableRootPaths() {
return ImmutableList.of(markFile);
}
}