package org.solrmarc.driver;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import org.apache.log4j.Logger;
import org.apache.solr.common.SolrInputDocument;
import org.marc4j.marc.Record;
/**
* <h1>ChunkIndexerThread</h1>
*
* This class implements sending batches of documents to Solr. It implements retries to cope with
* the issue where one bad document in a batch will cause all subsequent solr input documents in
* the batch to be skipped.
* <br/>
* To accomplish this the class will divide the batch into several smaller segments, and re-try sending
* those smaller batches. Eventually a sub-batch containing the problem record will be sent one-by-one
* to insure that all valid documents are correctly sent to Solr, while only the documents containing
* errors are skipped.
* <br/>
* If the parameter errQ is not null the records that cause an error will be appended to that list and
* can subsequently be logged or fixed and retried.
*
*
* @author rh9ec
*
*/
public class ChunkIndexerWorker implements Runnable
{
private final static Logger logger = Logger.getLogger(ChunkIndexerWorker.class);
final String threadName;
final Collection<SolrInputDocument> docs;
final Collection<RecordAndDoc> recordAndDocs;
final Indexer indexer;
final BlockingQueue<RecordAndDoc> errQ;
String firstDocId = null;
String lastDocId = null;
boolean trackProgress = false;
public ChunkIndexerWorker(String threadName, Collection<RecordAndDoc> recordAndDocs,
BlockingQueue<RecordAndDoc> errQ, Indexer indexer)
{
this.threadName = threadName;
this.recordAndDocs = recordAndDocs;
this.docs = buildDocList(recordAndDocs);
this.errQ = errQ;
this.indexer = indexer;
this.trackProgress = Boolean.parseBoolean(System.getProperty("solrmarc.track.solr.progress", "false"));
}
private Collection<SolrInputDocument> buildDocList(final Collection<RecordAndDoc> recordAndDocs)
{
Collection<SolrInputDocument> docs = new ArrayList<>(recordAndDocs.size());
for (RecordAndDoc recDoc : recordAndDocs)
{
String docID = controlNumOrDefault(recDoc.getRec(), "Rec with No 001");
if (firstDocId == null) firstDocId = docID;
docs.add(recDoc.doc);
lastDocId = docID;
}
return docs;
}
private final String controlNumOrDefault(final Record rec, final String label)
{
String docID = rec.getControlNumber();
if (docID == null) docID = label;
return(docID);
}
@Override
public void run()
{
Thread.currentThread().setName(threadName);
int inChunk = docs.size();
logger.debug("Adding chunk of "+inChunk+ " documents -- starting with id : "+firstDocId);
try {
// If all goes well, this is all we need. Add the docs, count the docs, and if desired return the docs with errors
int cnt = indexer.solrProxy.addDocs(docs);
indexer.addToCnt(2, cnt);
logger.debug("Added chunk of "+cnt+ " documents -- starting with id : "+firstDocId);
if (trackProgress || logger.isDebugEnabled())
{
logger.info("Total sent so far: " + indexer.getCounts()[2]);
}
if (errQ != null)
{
for (RecordAndDoc recDoc : recordAndDocs)
{
if (!recDoc.errLocs.isEmpty())
{
errQ.add(recDoc);
}
}
}
}
catch (Exception e)
{
Iterator<RecordAndDoc> recDocI = recordAndDocs.iterator();
if (inChunk == 1)
{
RecordAndDoc recDoc = recDocI.next();
indexer.singleRecordSolrError(recDoc, e, errQ);
}
else if (inChunk > 20)
{
logger.debug("Failed on chunk of "+inChunk+ " documents -- starting with id : "+firstDocId);
int newChunkSize = inChunk / 4;
Runnable subChunk[] = new Runnable[4];
for (int i = 0; i < 4; i++)
{
List<RecordAndDoc> newRecDoc = new ArrayList<>(newChunkSize);
String id1 = null, id2 = null;
for (int j = 0; j < newChunkSize; j++)
{
if (recDocI.hasNext())
{
RecordAndDoc recDoc = recDocI.next();
newRecDoc.add(recDoc);
String docID = controlNumOrDefault(recDoc.getRec(), "RecCnt_" + (newChunkSize * 4 + j));
if (id1 == null) id1 = docID;
id2 = docID;
}
}
// Split the chunk into 4 sub-chunks, and start a ChunkIndexerThread for each of them.
subChunk[i] = new ChunkIndexerWorker("SolrUpdateOnError_"+id1+"_"+id2, newRecDoc, errQ, indexer);
subChunk[i].run();
}
}
// less than 20 in the chunk resubmit records one-by-one
else
{
logger.debug("Failed on chunk of "+inChunk+ " documents -- starting with id : "+firstDocId);
// error on bulk update, resubmit one-by-one
while (recDocI.hasNext())
{
RecordAndDoc recDoc = recDocI.next();
indexer.indexSingleDocument(recDoc);
}
}
}
}
}