/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.github.fhuss.storm.elasticsearch.bolt;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Tuple;
import com.github.fhuss.storm.elasticsearch.ClientFactory;
import com.github.fhuss.storm.elasticsearch.Document;
import com.github.fhuss.storm.elasticsearch.commons.RichTickTupleBolt;
import com.github.fhuss.storm.elasticsearch.mapper.TupleMapper;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
/**
* Simple Bolt to index documents batch into an elasticsearch cluster.
*
* @author fhussonnois
*/
public class IndexBatchBolt<T> extends RichTickTupleBolt {
private static final Logger LOGGER = LoggerFactory.getLogger(IndexBatchBolt.class);
public static final TimeUnit DEFAULT_TIME_UNIT = TimeUnit.SECONDS;
public static final long DEFAULT_EMIT_FREQUENCY = 10;
private static final int QUEUE_MAX_SIZE = 1000;
private OutputCollector outputCollector;
private Client client;
private ClientFactory clientFactory;
private LinkedBlockingQueue<Tuple> queue;
private TupleMapper<Document<T>> mapper;
/**
* Creates a new {@link IndexBatchBolt} instance.
*
* @param emitFrequency the batch frequency
* @param unit the time unit of the emit frequency
* @param clientFactory the elasticsearch client factory
* @param mapper the document tuple mapper
*/
public IndexBatchBolt(ClientFactory clientFactory, TupleMapper<Document<T>> mapper, long emitFrequency, TimeUnit unit) {
super(emitFrequency, unit);
this.clientFactory = clientFactory;
this.mapper = mapper;
}
/**
* Creates a new {@link IndexBatchBolt} instance which use SECOND as time unit for batch frequency.
* @param clientFactory the elasticsearch client factory
* @param mapper the the document tuple mapper
*/
public IndexBatchBolt(ClientFactory clientFactory, TupleMapper<Document<T>> mapper, long emitFrequency) {
this(clientFactory, mapper, emitFrequency, DEFAULT_TIME_UNIT);
}
/**
* Creates a new {@link IndexBatchBolt} instance with a default batch frequency set to 10 seconds.
* @param clientFactory the elasticsearch client factory
* @param mapper the the document tuple mapper
*/
public IndexBatchBolt(ClientFactory clientFactory, TupleMapper<Document<T>> mapper) {
this(clientFactory, mapper, DEFAULT_EMIT_FREQUENCY, DEFAULT_TIME_UNIT);
}
/**
* (non-Javadoc)
* @see backtype.storm.task.IBolt#prepare(java.util.Map, backtype.storm.task.TopologyContext, backtype.storm.task.OutputCollector)
*/
@Override
public void prepare(Map stormConf, TopologyContext topologyContext, OutputCollector outputCollector) {
this.outputCollector = outputCollector;
this.client = clientFactory.makeClient(stormConf);
this.queue = new LinkedBlockingQueue<>(QUEUE_MAX_SIZE);
}
@Override
protected void executeTickTuple(Tuple tuple) {
bulkUpdateIndexes();
outputCollector.ack(tuple);
}
@Override
protected void executeTuple(Tuple tuple) {
if( ! queue.offer(tuple) ) {
bulkUpdateIndexes();
queue.add(tuple);
}
}
protected void bulkUpdateIndexes( ) {
List<Tuple> inputs = new ArrayList<>(queue.size());
queue.drainTo(inputs);
BulkRequestBuilder bulkRequest = client.prepareBulk();
for (Tuple input : inputs) {
Document<T> doc = mapper.map(input);
IndexRequestBuilder request = client.prepareIndex(doc.getName(), doc.getType(), doc.getId()).setSource((String)doc.getSource());
if(doc.getParentId() != null) {
request.setParent(doc.getParentId());
}
bulkRequest.add(request);
}
try {
if (bulkRequest.numberOfActions() > 0) {
BulkResponse bulkItemResponses = bulkRequest.execute().actionGet();
if (bulkItemResponses.hasFailures()) {
BulkItemResponse[] items = bulkItemResponses.getItems();
for (int i = 0; i < items.length; i++) {
ackOrFail(items[i], inputs.get(i));
}
} else {
ackAll(inputs);
}
}
} catch (ElasticsearchException e) {
LOGGER.error("Unable to process bulk request, " + inputs.size() + " tuples are in failure", e);
outputCollector.reportError(e.getRootCause());
failAll(inputs);
}
}
private void ackOrFail(BulkItemResponse item, Tuple tuple) {
if (item.isFailed()) {
LOGGER.error("Failed to process tuple : " + mapper.map(tuple));
outputCollector.fail(tuple);
} else {
outputCollector.ack(tuple);
}
}
protected void ackAll(List<Tuple> inputs) {
for(Tuple t : inputs)
outputCollector.ack(t);
}
protected void failAll(List<Tuple> inputs) {
for(Tuple t : inputs)
outputCollector.fail(t);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
/* no-ouput */
}
@Override
public void cleanup() {
if( this.client != null) this.client.close();
}
}