/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.elasticsearch;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.completer.CompletionType;
import org.apache.zeppelin.elasticsearch.action.ActionResponse;
import org.apache.zeppelin.elasticsearch.action.AggWrapper;
import org.apache.zeppelin.elasticsearch.action.HitWrapper;
import org.apache.zeppelin.elasticsearch.client.ElasticsearchClient;
import org.apache.zeppelin.elasticsearch.client.HttpBasedClient;
import org.apache.zeppelin.elasticsearch.client.TransportBasedClient;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.aggregations.Aggregations;
import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.InternalSingleBucketAggregation;
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
import org.elasticsearch.search.aggregations.metrics.InternalMetricsAggregation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.github.wnameless.json.flattener.JsonFlattener;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonObject;
/**
* Elasticsearch Interpreter for Zeppelin.
*/
public class ElasticsearchInterpreter extends Interpreter {
private static Logger logger = LoggerFactory.getLogger(ElasticsearchInterpreter.class);
private static final String HELP = "Elasticsearch interpreter:\n"
+ "General format: <command> /<indices>/<types>/<id> <option> <JSON>\n"
+ " - indices: list of indices separated by commas (depends on the command)\n"
+ " - types: list of document types separated by commas (depends on the command)\n"
+ "Commands:\n"
+ " - search /indices/types <query>\n"
+ " . indices and types can be omitted (at least, you have to provide '/')\n"
+ " . a query is either a JSON-formatted query, nor a lucene query\n"
+ " - size <value>\n"
+ " . defines the size of the result set (default value is in the config)\n"
+ " . if used, this command must be declared before a search command\n"
+ " - count /indices/types <query>\n"
+ " . same comments as for the search\n"
+ " - get /index/type/id\n"
+ " - delete /index/type/id\n"
+ " - index /index/type/id <json-formatted document>\n"
+ " . the id can be omitted, elasticsearch will generate one";
protected static final List<String> COMMANDS = Arrays.asList(
"count", "delete", "get", "help", "index", "search");
private static final Pattern FIELD_NAME_PATTERN = Pattern.compile("\\[\\\\\"(.+)\\\\\"\\](.*)");
public static final String ELASTICSEARCH_HOST = "elasticsearch.host";
public static final String ELASTICSEARCH_PORT = "elasticsearch.port";
public static final String ELASTICSEARCH_CLIENT_TYPE = "elasticsearch.client.type";
public static final String ELASTICSEARCH_CLUSTER_NAME = "elasticsearch.cluster.name";
public static final String ELASTICSEARCH_RESULT_SIZE = "elasticsearch.result.size";
public static final String ELASTICSEARCH_BASIC_AUTH_USERNAME = "elasticsearch.basicauth.username";
public static final String ELASTICSEARCH_BASIC_AUTH_PASSWORD = "elasticsearch.basicauth.password";
private final Gson gson = new GsonBuilder().setPrettyPrinting().create();
private ElasticsearchClient elsClient;
private int resultSize = 10;
public ElasticsearchInterpreter(Properties property) {
super(property);
}
@Override
public void open() {
logger.info("Properties: {}", getProperty());
String clientType = getProperty(ELASTICSEARCH_CLIENT_TYPE);
clientType = clientType == null ? null : clientType.toLowerCase();
try {
this.resultSize = Integer.parseInt(getProperty(ELASTICSEARCH_RESULT_SIZE));
}
catch (final NumberFormatException e) {
this.resultSize = 10;
logger.error("Unable to parse " + ELASTICSEARCH_RESULT_SIZE + " : " +
property.get(ELASTICSEARCH_RESULT_SIZE), e);
}
try {
if (StringUtils.isEmpty(clientType) || "transport".equals(clientType)) {
elsClient = new TransportBasedClient(getProperty());
}
else if ("http".equals(clientType)) {
elsClient = new HttpBasedClient(getProperty());
}
else {
logger.error("Unknown type of Elasticsearch client: " + clientType);
}
}
catch (final IOException e) {
logger.error("Open connection with Elasticsearch", e);
}
}
@Override
public void close() {
if (elsClient != null) {
elsClient.close();
}
}
@Override
public InterpreterResult interpret(String cmd, InterpreterContext interpreterContext) {
logger.info("Run Elasticsearch command '" + cmd + "'");
if (StringUtils.isEmpty(cmd) || StringUtils.isEmpty(cmd.trim())) {
return new InterpreterResult(InterpreterResult.Code.SUCCESS);
}
int currentResultSize = resultSize;
if (elsClient == null) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Problem with the Elasticsearch client, please check your configuration (host, port,...)");
}
String[] items = StringUtils.split(cmd.trim(), " ", 3);
// Process some specific commands (help, size, ...)
if ("help".equalsIgnoreCase(items[0])) {
return processHelp(InterpreterResult.Code.SUCCESS, null);
}
if ("size".equalsIgnoreCase(items[0])) {
// In this case, the line with size must be followed by a search,
// so we will continue with the next lines
final String[] lines = StringUtils.split(cmd.trim(), "\n", 2);
if (lines.length < 2) {
return processHelp(InterpreterResult.Code.ERROR,
"Size cmd must be followed by a search");
}
final String[] sizeLine = StringUtils.split(lines[0], " ", 2);
if (sizeLine.length != 2) {
return processHelp(InterpreterResult.Code.ERROR, "Right format is : size <value>");
}
currentResultSize = Integer.parseInt(sizeLine[1]);
items = StringUtils.split(lines[1].trim(), " ", 3);
}
if (items.length < 2) {
return processHelp(InterpreterResult.Code.ERROR, "Arguments missing");
}
final String method = items[0];
final String url = items[1];
final String data = items.length > 2 ? items[2].trim() : null;
final String[] urlItems = StringUtils.split(url.trim(), "/");
try {
if ("get".equalsIgnoreCase(method)) {
return processGet(urlItems, interpreterContext);
}
else if ("count".equalsIgnoreCase(method)) {
return processCount(urlItems, data, interpreterContext);
}
else if ("search".equalsIgnoreCase(method)) {
return processSearch(urlItems, data, currentResultSize, interpreterContext);
}
else if ("index".equalsIgnoreCase(method)) {
return processIndex(urlItems, data);
}
else if ("delete".equalsIgnoreCase(method)) {
return processDelete(urlItems);
}
return processHelp(InterpreterResult.Code.ERROR, "Unknown command");
}
catch (final Exception e) {
return new InterpreterResult(InterpreterResult.Code.ERROR, "Error : " + e.getMessage());
}
}
@Override
public void cancel(InterpreterContext interpreterContext) {
// Nothing to do
}
@Override
public FormType getFormType() {
return FormType.SIMPLE;
}
@Override
public int getProgress(InterpreterContext interpreterContext) {
return 0;
}
@Override
public List<InterpreterCompletion> completion(String s, int i,
InterpreterContext interpreterContext) {
final List suggestions = new ArrayList<>();
for (final String cmd : COMMANDS) {
if (cmd.toLowerCase().contains(s)) {
suggestions.add(new InterpreterCompletion(cmd, cmd, CompletionType.command.name()));
}
}
return suggestions;
}
private void addAngularObject(InterpreterContext interpreterContext, String prefix, Object obj) {
interpreterContext.getAngularObjectRegistry().add(
prefix + "_" + interpreterContext.getParagraphId().replace("-", "_"),
obj, null, null);
}
private String[] getIndexTypeId(String[] urlItems) {
if (urlItems.length < 3) {
return null;
}
final String index = urlItems[0];
final String type = urlItems[1];
final String id = StringUtils.join(Arrays.copyOfRange(urlItems, 2, urlItems.length), '/');
if (StringUtils.isEmpty(index)
|| StringUtils.isEmpty(type)
|| StringUtils.isEmpty(id)) {
return null;
}
return new String[] { index, type, id };
}
private InterpreterResult processHelp(InterpreterResult.Code code, String additionalMessage) {
final StringBuffer buffer = new StringBuffer();
if (additionalMessage != null) {
buffer.append(additionalMessage).append("\n");
}
buffer.append(HELP).append("\n");
return new InterpreterResult(code, InterpreterResult.Type.TEXT, buffer.toString());
}
/**
* Processes a "get" request.
*
* @param urlItems Items of the URL
* @param interpreterContext Instance of the context
* @return Result of the get request, it contains a JSON-formatted string
*/
private InterpreterResult processGet(String[] urlItems, InterpreterContext interpreterContext) {
final String[] indexTypeId = getIndexTypeId(urlItems);
if (indexTypeId == null) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index/type/id)");
}
final ActionResponse response = elsClient.get(indexTypeId[0], indexTypeId[1], indexTypeId[2]);
if (response.isSucceeded()) {
final JsonObject json = response.getHit().getSourceAsJsonObject();
final String jsonStr = gson.toJson(json);
addAngularObject(interpreterContext, "get", json);
return new InterpreterResult(
InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TEXT,
jsonStr);
}
return new InterpreterResult(InterpreterResult.Code.ERROR, "Document not found");
}
/**
* Processes a "count" request.
*
* @param urlItems Items of the URL
* @param data May contains the JSON of the request
* @param interpreterContext Instance of the context
* @return Result of the count request, it contains the total hits
*/
private InterpreterResult processCount(String[] urlItems, String data,
InterpreterContext interpreterContext) {
if (urlItems.length > 2) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index1,index2,.../type1,type2,...)");
}
final ActionResponse response = searchData(urlItems, data, 0);
addAngularObject(interpreterContext, "count", response.getTotalHits());
return new InterpreterResult(
InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TEXT,
"" + response.getTotalHits());
}
/**
* Processes a "search" request.
*
* @param urlItems Items of the URL
* @param data May contains the JSON of the request
* @param size Limit of result set
* @param interpreterContext Instance of the context
* @return Result of the search request, it contains a tab-formatted string of the matching hits
*/
private InterpreterResult processSearch(String[] urlItems, String data, int size,
InterpreterContext interpreterContext) {
if (urlItems.length > 2) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index1,index2,.../type1,type2,...)");
}
final ActionResponse response = searchData(urlItems, data, size);
addAngularObject(interpreterContext, "search",
(response.getAggregations() != null && response.getAggregations().size() > 0) ?
response.getAggregations() : response.getHits());
return buildResponseMessage(response);
}
/**
* Processes a "index" request.
*
* @param urlItems Items of the URL
* @param data JSON to be indexed
* @return Result of the index request, it contains the id of the document
*/
private InterpreterResult processIndex(String[] urlItems, String data) {
if (urlItems.length < 2 || urlItems.length > 3) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index/type or /index/type/id)");
}
final ActionResponse response = elsClient.index(
urlItems[0], urlItems[1], urlItems.length == 2 ? null : urlItems[2], data);
return new InterpreterResult(
InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TEXT,
response.getHit().getId());
}
/**
* Processes a "delete" request.
*
* @param urlItems Items of the URL
* @return Result of the delete request, it contains the id of the deleted document
*/
private InterpreterResult processDelete(String[] urlItems) {
final String[] indexTypeId = getIndexTypeId(urlItems);
if (indexTypeId == null) {
return new InterpreterResult(InterpreterResult.Code.ERROR,
"Bad URL (it should be /index/type/id)");
}
final ActionResponse response =
elsClient.delete(indexTypeId[0], indexTypeId[1], indexTypeId[2]);
if (response.isSucceeded()) {
return new InterpreterResult(
InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TEXT,
response.getHit().getId());
}
return new InterpreterResult(InterpreterResult.Code.ERROR, "Document not found");
}
private ActionResponse searchData(String[] urlItems, String query, int size) {
String[] indices = null;
String[] types = null;
if (urlItems.length >= 1) {
indices = StringUtils.split(urlItems[0], ",");
}
if (urlItems.length > 1) {
types = StringUtils.split(urlItems[1], ",");
}
return elsClient.search(indices, types, query, size);
}
private InterpreterResult buildAggResponseMessage(Aggregations aggregations) {
// Only the result of the first aggregation is returned
//
final Aggregation agg = aggregations.asList().get(0);
InterpreterResult.Type resType = InterpreterResult.Type.TEXT;
String resMsg = "";
if (agg instanceof InternalMetricsAggregation) {
resMsg = XContentHelper.toString((InternalMetricsAggregation) agg).toString();
}
else if (agg instanceof InternalSingleBucketAggregation) {
resMsg = XContentHelper.toString((InternalSingleBucketAggregation) agg).toString();
}
else if (agg instanceof InternalMultiBucketAggregation) {
final Set<String> headerKeys = new HashSet<>();
final List<Map<String, Object>> buckets = new LinkedList<>();
final InternalMultiBucketAggregation multiBucketAgg = (InternalMultiBucketAggregation) agg;
for (final MultiBucketsAggregation.Bucket bucket : multiBucketAgg.getBuckets()) {
try {
final XContentBuilder builder = XContentFactory.jsonBuilder();
bucket.toXContent(builder, null);
final Map<String, Object> bucketMap = JsonFlattener.flattenAsMap(builder.string());
headerKeys.addAll(bucketMap.keySet());
buckets.add(bucketMap);
}
catch (final IOException e) {
logger.error("Processing bucket: " + e.getMessage(), e);
}
}
final StringBuffer buffer = new StringBuffer();
final String[] keys = headerKeys.toArray(new String[0]);
for (final String key: keys) {
buffer.append("\t" + key);
}
buffer.deleteCharAt(0);
for (final Map<String, Object> bucket : buckets) {
buffer.append("\n");
for (final String key: keys) {
buffer.append(bucket.get(key)).append("\t");
}
buffer.deleteCharAt(buffer.length() - 1);
}
resType = InterpreterResult.Type.TABLE;
resMsg = buffer.toString();
}
return new InterpreterResult(InterpreterResult.Code.SUCCESS, resType, resMsg);
}
private InterpreterResult buildAggResponseMessage(List<AggWrapper> aggregations) {
final InterpreterResult.Type resType = InterpreterResult.Type.TABLE;
String resMsg = "";
final Set<String> headerKeys = new HashSet<>();
final List<Map<String, Object>> buckets = new LinkedList<>();
for (final AggWrapper aggregation: aggregations) {
final Map<String, Object> bucketMap = JsonFlattener.flattenAsMap(aggregation.getResult());
headerKeys.addAll(bucketMap.keySet());
buckets.add(bucketMap);
}
final StringBuffer buffer = new StringBuffer();
final String[] keys = headerKeys.toArray(new String[0]);
for (final String key: keys) {
buffer.append("\t" + key);
}
buffer.deleteCharAt(0);
for (final Map<String, Object> bucket : buckets) {
buffer.append("\n");
for (final String key: keys) {
buffer.append(bucket.get(key)).append("\t");
}
buffer.deleteCharAt(buffer.length() - 1);
}
resMsg = buffer.toString();
return new InterpreterResult(InterpreterResult.Code.SUCCESS, resType, resMsg);
}
private String buildSearchHitsResponseMessage(ActionResponse response) {
if (response.getHits() == null || response.getHits().size() == 0) {
return "";
}
//First : get all the keys in order to build an ordered list of the values for each hit
//
final List<Map<String, Object>> flattenHits = new LinkedList<>();
final Set<String> keys = new TreeSet<>();
for (final HitWrapper hit : response.getHits()) {
final String json = hit.getSourceAsString();
final Map<String, Object> flattenJsonMap = JsonFlattener.flattenAsMap(json);
final Map<String, Object> flattenMap = new HashMap<>();
for (final Iterator<String> iter = flattenJsonMap.keySet().iterator(); iter.hasNext(); ) {
// Replace keys that match a format like that : [\"keyname\"][0]
final String fieldName = iter.next();
final Matcher fieldNameMatcher = FIELD_NAME_PATTERN.matcher(fieldName);
if (fieldNameMatcher.matches()) {
flattenMap.put(fieldNameMatcher.group(1) + fieldNameMatcher.group(2),
flattenJsonMap.get(fieldName));
}
else {
flattenMap.put(fieldName, flattenJsonMap.get(fieldName));
}
}
flattenHits.add(flattenMap);
for (final String key : flattenMap.keySet()) {
keys.add(key);
}
}
// Next : build the header of the table
//
final StringBuffer buffer = new StringBuffer();
for (final String key : keys) {
buffer.append(key).append('\t');
}
buffer.replace(buffer.lastIndexOf("\t"), buffer.lastIndexOf("\t") + 1, "\n");
// Finally : build the result by using the key set
//
for (final Map<String, Object> hit : flattenHits) {
for (final String key : keys) {
final Object val = hit.get(key);
if (val != null) {
buffer.append(val);
}
buffer.append('\t');
}
buffer.replace(buffer.lastIndexOf("\t"), buffer.lastIndexOf("\t") + 1, "\n");
}
return buffer.toString();
}
private InterpreterResult buildResponseMessage(ActionResponse response) {
final List<AggWrapper> aggregations = response.getAggregations();
if (aggregations != null && aggregations.size() > 0) {
return buildAggResponseMessage(aggregations);
}
return new InterpreterResult(
InterpreterResult.Code.SUCCESS,
InterpreterResult.Type.TABLE,
buildSearchHitsResponseMessage(response));
}
}