package com.lucidworks.storm.solr; import static com.lucidworks.storm.spring.SpringBolt.ExecuteResult; import backtype.storm.task.OutputCollector; import backtype.storm.tuple.Tuple; import com.codahale.metrics.Counter; import com.codahale.metrics.Timer; import com.lucidworks.storm.spring.StreamingDataAction; import com.lucidworks.storm.spring.TickTupleAware; import com.ryantenney.metrics.annotation.Metric; import org.apache.log4j.Logger; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.SolrInputDocument; import org.springframework.beans.factory.annotation.Autowired; import java.io.Closeable; import java.io.IOException; import java.util.HashMap; import java.util.Map; /** * A simple Spring-managed POJO for sending messages processed by a Storm topology to SolrCloud. * Bean implementations do not need to be thread-safe but should be created in the prototype scope * to support multiple bolts running in the same JVM in the same Storm topology. */ public class SolrBoltAction implements StreamingDataAction, TickTupleAware, Closeable { public static Logger log = Logger.getLogger(SolrBoltAction.class); @Metric public Timer sendBatchToSolr; @Metric public Counter indexedCounter; @Metric public Counter tuplesReceived; protected CloudSolrClient cloudSolrClient; protected SolrInputDocumentMapper solrInputDocumentMapper; protected int maxBufferSize = 100; // avoids sending 100's of requests per second to Solr in high-throughput envs protected long bufferTimeoutMs = 500L; protected SolrUpdateRequestStrategy updateRequestStrategy; protected DocumentAssignmentStrategy documentAssignmentStrategy; // used internally for buffering docs before sending to Solr private Map<String,DocBuffer> buffers = new HashMap<String,DocBuffer>(); @Autowired public SolrBoltAction(CloudSolrClient cloudSolrClient) { this.cloudSolrClient = cloudSolrClient; this.cloudSolrClient.connect(); } public ExecuteResult onTick() { boolean anyNeedsFlush = false; for (DocBuffer b : buffers.values()) { // this catches the case where we have buffered docs, but don't see any more docs flowing in for a while if (b.shouldFlushBuffer()) { anyNeedsFlush = true; break; } } if (anyNeedsFlush) { // have to flush them all so we can ack correctly for (DocBuffer b : buffers.values()) { flushBufferedDocs(b); } return ExecuteResult.ACK; } // todo: remove old DocBuffer objects from the map // todo: could pro-actively create collections that will be needed soon here return ExecuteResult.IGNORED; } public ExecuteResult execute(Tuple input, OutputCollector outputCollector) { if (tuplesReceived != null) { tuplesReceived.inc(); } String docId = input.getString(0); Object docObj = input.getValue(1); if (docId == null || docObj == null) { log.warn("Ignored tuple: "+input); return ExecuteResult.IGNORED; // nothing to index } try { return processInputDoc(docId, docObj); } catch (Exception exc) { log.error("Failed to process "+docId+" due to: "+exc); if (exc instanceof RuntimeException) { throw (RuntimeException)exc; } else { throw new RuntimeException(exc); } } } /** * Process an input document that has already been validated; good place to start for sub-classes to * plug-in their own input Tuple processing logic. */ protected ExecuteResult processInputDoc(String docId, Object docObj) throws Exception { // default if not auto-wired if (solrInputDocumentMapper == null) solrInputDocumentMapper = new DefaultSolrInputDocumentMapper(); SolrInputDocument doc = solrInputDocumentMapper.toInputDoc(docId, docObj); if (doc == null) return ExecuteResult.IGNORED; // mapper doesn't want this object indexed if (documentAssignmentStrategy == null) { // relies on the CloudSolrClient having a default collection specified documentAssignmentStrategy = new DefaultDocumentAssignmentStrategy(); } return bufferDoc(documentAssignmentStrategy.getCollectionForDoc(cloudSolrClient, doc), doc); } public int getMaxBufferSize() { return maxBufferSize; } public void setMaxBufferSize(int maxBufferSize) { this.maxBufferSize = maxBufferSize; } public long getBufferTimeoutMs() { return bufferTimeoutMs; } public void setBufferTimeoutMs(long bufferTimeoutMs) { this.bufferTimeoutMs = bufferTimeoutMs; } public SolrInputDocumentMapper getSolrInputDocumentMapper() { return solrInputDocumentMapper; } public void setSolrInputDocumentMapper(SolrInputDocumentMapper solrInputDocumentMapper) { this.solrInputDocumentMapper = solrInputDocumentMapper; } public SolrUpdateRequestStrategy getUpdateRequestStrategy() { return updateRequestStrategy; } public void setUpdateRequestStrategy(SolrUpdateRequestStrategy updateRequestStrategy) { this.updateRequestStrategy = updateRequestStrategy; } public DocumentAssignmentStrategy getDocumentAssignmentStrategy() { return documentAssignmentStrategy; } public void setDocumentAssignmentStrategy(DocumentAssignmentStrategy documentAssignmentStrategy) { this.documentAssignmentStrategy = documentAssignmentStrategy; } public UpdateRequest createUpdateRequest(String collection) { UpdateRequest req = new UpdateRequest(); req.setParam("collection", collection); return req; } protected ExecuteResult bufferDoc(String collection, SolrInputDocument doc) { DocBuffer docBuffer = buffers.get(collection); if (docBuffer == null) { docBuffer = new DocBuffer(collection, maxBufferSize, bufferTimeoutMs); buffers.put(collection, docBuffer); } docBuffer.add(doc); return docBuffer.shouldFlushBuffer() ? flushBufferedDocs(docBuffer) : ExecuteResult.BUFFERED; } protected ExecuteResult flushBufferedDocs(DocBuffer b) { int numDocsInBatch = b.buffer.size(); if (numDocsInBatch == 0) { b.reset(); return ExecuteResult.ACK; } Timer.Context timer = (sendBatchToSolr != null) ? sendBatchToSolr.time() : null; try { sendBatchToSolr(b); } finally { if (timer != null) timer.stop(); if (indexedCounter != null) indexedCounter.inc(numDocsInBatch); b.reset(); } return ExecuteResult.ACK; } protected void sendBatchToSolr(DocBuffer b) { if (log.isDebugEnabled()) log.debug("Sending buffer of " + b.buffer.size() + " to collection " + b.collection); UpdateRequest req = createUpdateRequest(b.collection); req.add(b.buffer); updateRequestStrategy.sendUpdateRequest(cloudSolrClient, b.collection, req); } public void close() throws IOException { // flush any buffered docs before shutting down for (DocBuffer b : buffers.values()) { if (!b.buffer.isEmpty()) { try { flushBufferedDocs(b); } catch (Exception exc) { log.error("Failed to flush buffered docs for "+b.collection+" before shutting down due to: "+exc, exc); } } } buffers.clear(); if (documentAssignmentStrategy != null && documentAssignmentStrategy instanceof Closeable) { try { ((Closeable)documentAssignmentStrategy).close(); } catch (Exception ignore) { log.warn("Error when trying to close the documentAssignmentStrategy due to: "+ignore); } } if (updateRequestStrategy != null && updateRequestStrategy instanceof Closeable) { try { ((Closeable)updateRequestStrategy).close(); } catch (Exception ignore) { log.warn("Error when trying to close the updateRequestStrategy due to: "+ignore); } } if (solrInputDocumentMapper != null && solrInputDocumentMapper instanceof Closeable) { try { ((Closeable)solrInputDocumentMapper).close(); } catch (Exception ignore) { log.warn("Error when trying to close the solrInputDocumentMapper due to: "+ignore); } } } }