package com.thinkbiganalytics.nifi.v2.elasticsearch;
/*-
* #%L
* thinkbig-nifi-elasticsearch-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.nifi.processor.AbstractNiFiProcessor;
import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessSession;
import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.util.StandardValidators;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONObject;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.InetSocketTransportAddress;
import java.io.IOException;
import java.io.InputStream;
import java.net.InetAddress;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.UUID;
/**
* This processor indexes json data in elasticsearch
*/
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
@Tags({"elasticsearch", "thinkbig"})
@CapabilityDescription("Write FlowFile from a JSON array to Elasticsearch (V2)")
public class IndexElasticSearch extends AbstractNiFiProcessor {
/**
* Success Relationship for JSON objects that are successfully indexed in elasticsearch
*/
public static final Relationship REL_SUCCESS = new Relationship.Builder()
.name("success")
.description("Json objects that are successfully indexed in elasticsearch are transferred to this relationship")
.build();
/**
* Failure Relationship for JSON objects that are fail to index in elasticsearch
*/
public static final Relationship REL_FAILURE = new Relationship.Builder()
.name("failure")
.description(
"Json objects that are un-successfully indexed in elasticsearch are transferred to this relationship")
.build();
/**
* Property for the name of the index
*/
public static final PropertyDescriptor INDEX_NAME = new PropertyDescriptor.Builder()
.name("IndexName")
.description("The name of the index")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
/**
* Property for the index type
*/
public static final PropertyDescriptor TYPE = new PropertyDescriptor.Builder()
.name("Type")
.description("Elasticsearch type")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
/**
* Property for the elastic search host name
*/
public static final PropertyDescriptor HOST_NAME = new PropertyDescriptor.Builder()
.name("HostName")
.description("Elasticsearch host")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
/**
* Property for the elastic search cluster name
*/
public static final PropertyDescriptor CLUSTER_NAME = new PropertyDescriptor.Builder()
.name("ClusterName")
.description("Elasticsearch cluster")
.required(true)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
/**
* Property for the id to use for indexing into elasticsearch.
*/
public static final PropertyDescriptor ID_FIELD = new PropertyDescriptor.Builder()
.name("IdField")
.description("Id that you want to use for indexing into elasticsearch. If it is empty then a UUID will be generated")
.required(false)
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
.expressionLanguageSupported(true)
.build();
private final Set<Relationship> relationships;
private final List<PropertyDescriptor> propDescriptors;
/**
* default constructor constructs the relationship and property collections
*/
public IndexElasticSearch() {
final Set<Relationship> r = new HashSet<>();
r.add(REL_SUCCESS);
r.add(REL_FAILURE);
relationships = Collections.unmodifiableSet(r);
final List<PropertyDescriptor> pds = new ArrayList<>();
pds.add(INDEX_NAME);
pds.add(TYPE);
pds.add(HOST_NAME);
pds.add(CLUSTER_NAME);
pds.add(ID_FIELD);
propDescriptors = Collections.unmodifiableList(pds);
}
@Override
public Set<Relationship> getRelationships() {
return relationships;
}
@Override
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
return propDescriptors;
}
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final ComponentLog logger = getLog();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
try {
/* Configuration parameters for spark launcher */
String indexName = context.getProperty(INDEX_NAME).evaluateAttributeExpressions(flowFile).getValue();
String type = context.getProperty(TYPE).evaluateAttributeExpressions(flowFile).getValue();
String hostName = context.getProperty(HOST_NAME).evaluateAttributeExpressions(flowFile).getValue();
String clusterName = context.getProperty(CLUSTER_NAME).evaluateAttributeExpressions(flowFile).getValue();
String idField = context.getProperty(ID_FIELD).evaluateAttributeExpressions(flowFile).getValue();
final StringBuffer sb = new StringBuffer();
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(InputStream in) throws IOException {
sb.append(IOUtils.toString(in, Charset.defaultCharset()));
}
});
logger.debug("The json that was received is: " + sb.toString());
boolean success = sendToElasticSearch(sb.toString(), hostName, indexName, type, clusterName, idField);
/* Wait for job completion */
if (!success) {
logger.info("*** Completed with failed status ");
session.transfer(flowFile, REL_FAILURE);
} else {
logger.info("*** Completed with status ");
session.transfer(flowFile, REL_SUCCESS);
}
} catch (final Exception e) {
logger.error("Unable to execute Elasticsearch job", new Object[]{flowFile, e});
session.transfer(flowFile, REL_FAILURE);
}
}
private boolean sendToElasticSearch(String json, String hostName, String index, String type, String clusterName, String idField) throws Exception {
final ComponentLog logger = getLog();
Settings settings = Settings.settingsBuilder()
.put("cluster.name", clusterName).build();
Client client = TransportClient.builder().settings(settings).build()
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName(hostName), 9300));
JSONArray array = new JSONArray(json);
BulkRequestBuilder bulkRequest = client.prepareBulk();
for (int i = 0; i < array.length(); i++) {
JSONObject jsonObj = array.getJSONObject(i);
String id;
if (idField != null && idField.length() > 0) {
id = jsonObj.getString(idField);
} else {
id = UUID.randomUUID().toString();
}
jsonObj.put("post_date", String.valueOf(System.currentTimeMillis()));
bulkRequest.add(client.prepareIndex(index, type, id)
.setSource(jsonObj.toString())
);
}
BulkResponse bulkResponse = bulkRequest.get();
if (bulkResponse.hasFailures()) {
logger.error("Error occurred while batch updating" + bulkResponse.buildFailureMessage());
return false;
}
return true;
}
}