/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.hadoop;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.response.UpdateResponse;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.ExecutorUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Enables adding batches of documents to an EmbeddedSolrServer.
*/
class BatchWriter {
private final EmbeddedSolrServer solr;
private volatile Exception batchWriteException = null;
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public Exception getBatchWriteException() {
return batchWriteException;
}
public void setBatchWriteException(Exception batchWriteException) {
this.batchWriteException = batchWriteException;
}
/** The number of writing threads. */
final int writerThreads;
/** Queue Size */
final int queueSize;
private final ThreadPoolExecutor batchPool;
private TaskID taskId = null;
/**
* The number of in progress batches, must be zero before the close can
* actually start closing
*/
AtomicInteger executingBatches = new AtomicInteger(0);
/**
* Create the batch writer object, set the thread to daemon mode, and start
* it.
*
*/
final class Batch implements Runnable {
private List<SolrInputDocument> documents;
private UpdateResponse result;
public Batch(Collection<SolrInputDocument> batch) {
documents = new ArrayList<>(batch);
}
public void run() {
try {
executingBatches.getAndIncrement();
result = runUpdate(documents);
} finally {
executingBatches.getAndDecrement();
}
}
protected List<SolrInputDocument> getDocuments() {
return documents;
}
protected void setDocuments(List<SolrInputDocument> documents) {
this.documents = documents;
}
protected UpdateResponse getResult() {
return result;
}
protected void setResult(UpdateResponse result) {
this.result = result;
}
protected void reset(List<SolrInputDocument> documents) {
if (this.documents == null) {
this.documents = new ArrayList<>(documents);
} else {
this.documents.clear();
this.documents.addAll(documents);
}
result = null;
}
protected void reset(SolrInputDocument document) {
if (this.documents == null) {
this.documents = new ArrayList<>();
} else {
this.documents.clear();
}
this.documents.add(document);
result = null;
}
}
protected UpdateResponse runUpdate(List<SolrInputDocument> batchToWrite) {
try {
UpdateResponse result = solr.add(batchToWrite);
SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCHES_WRITTEN.toString(), 1);
SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString(), batchToWrite.size());
if (LOG.isDebugEnabled()) {
SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCH_WRITE_TIME.toString(), result.getElapsedTime());
}
return result;
} catch (Throwable e) {
if (e instanceof Exception) {
setBatchWriteException((Exception) e);
} else {
setBatchWriteException(new Exception(e));
}
SolrRecordWriter.incrementCounter(taskId, getClass().getName() + ".errors", e.getClass().getName(), 1);
LOG.error("Unable to process batch", e);
return null;
}
}
public BatchWriter(EmbeddedSolrServer solr, int batchSize, TaskID tid,
int writerThreads, int queueSize) {
this.solr = solr;
this.writerThreads = writerThreads;
this.queueSize = queueSize;
taskId = tid;
// we need to obtain the settings before the constructor
if (writerThreads != 0) {
batchPool = new ExecutorUtil.MDCAwareThreadPoolExecutor(writerThreads, writerThreads, 5,
TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(queueSize),
new ThreadPoolExecutor.CallerRunsPolicy());
} else { // single threaded case
batchPool = null;
}
}
public void queueBatch(Collection<SolrInputDocument> batch)
throws IOException, SolrServerException {
throwIf();
Batch b = new Batch(batch);
if (batchPool != null) {
batchPool.execute(b);
} else { // single threaded case
b.run();
throwIf();
}
}
public synchronized void close(TaskAttemptContext context)
throws InterruptedException, SolrServerException, IOException {
if (batchPool != null) {
context.setStatus("Waiting for batches to complete");
batchPool.shutdown();
while (!batchPool.isTerminated()) {
LOG.info(String.format(Locale.ENGLISH,
"Waiting for %d items and %d threads to finish executing", batchPool
.getQueue().size(), batchPool.getActiveCount()));
batchPool.awaitTermination(5, TimeUnit.SECONDS);
}
}
context.setStatus("Committing Solr Phase 1");
solr.commit(true, false);
context.setStatus("Optimizing Solr");
int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1);
LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments);
long start = System.nanoTime();
solr.optimize(true, false, maxSegments);
context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.nanoTime() - start);
float secs = (System.nanoTime() - start) / (float)(10^9);
LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs);
context.setStatus("Committing Solr Phase 2");
solr.commit(true, false);
context.setStatus("Shutting down Solr");
solr.close();
}
/**
* Throw a legal exception if a previous batch write had an exception. The
* previous state is cleared. Uses {@link #batchWriteException} for the state
* from the last exception.
*
* This will loose individual exceptions if the exceptions happen rapidly.
*
* @throws IOException On low level IO error
* @throws SolrServerException On Solr Exception
*/
private void throwIf() throws IOException, SolrServerException {
final Exception last = batchWriteException;
batchWriteException = null;
if (last == null) {
return;
}
if (last instanceof SolrServerException) {
throw (SolrServerException) last;
}
if (last instanceof IOException) {
throw (IOException) last;
}
throw new IOException("Batch Write Failure", last);
}
}