package org.solrmarc.driver; import org.apache.log4j.Logger; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; import org.marc4j.MarcError; import org.marc4j.MarcException; import org.marc4j.MarcReader; import org.marc4j.marc.Record; import org.solrmarc.driver.RecordAndDoc.eErrorLocationVal; import org.solrmarc.index.indexer.AbstractValueIndexer; import org.solrmarc.index.indexer.IndexerSpecException; import org.solrmarc.index.indexer.IndexerSpecException.eErrorSeverity; import org.solrmarc.index.indexer.ValueIndexerFactory; import org.solrmarc.solr.SolrProxy; import org.solrmarc.solr.SolrRuntimeException; import org.solrmarc.tools.SolrMarcIndexerException; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; /** * The single-threaded reference implementation of the indexing process. * Reads a record, builds the SolrInputDocument, and sends it to the SolrProxy. * The various methods that it uses to accomplish these tasks are used by the * various multi-threaded classes. * * @author rh9ec * */ public class Indexer { private final static Logger logger = Logger.getLogger(Indexer.class); protected final List<AbstractValueIndexer<?>> indexers; protected SolrProxy solrProxy; protected final BlockingQueue<RecordAndDoc> errQ; protected final BlockingQueue<String> delQ; protected boolean shuttingDown = false; protected boolean viaInterrupt = false; protected boolean isShutDown = false; private int cnts[] = new int[] { 0, 0, 0 }; EnumSet<eErrorHandleVal> errHandle = EnumSet.noneOf(eErrorHandleVal.class); public enum eErrorHandleVal { RETURN_ERROR_RECORDS, INDEX_ERROR_RECORDS; }; public Indexer(final List<AbstractValueIndexer<?>> indexers, final SolrProxy solrProxy) { this.indexers = indexers; this.solrProxy = solrProxy; errQ = new LinkedBlockingQueue<RecordAndDoc>(); delQ = new LinkedBlockingQueue<String>(); } protected Indexer(Indexer toClone) { indexers = new ArrayList<AbstractValueIndexer<?>>(); for (AbstractValueIndexer<?> indexer : toClone.indexers) { this.indexers.add(ValueIndexerFactory.makeThreadSafeCopy(indexer)); } this.solrProxy = toClone.solrProxy; this.errQ = toClone.errQ; this.delQ = toClone.delQ; this.errHandle = toClone.errHandle; } protected Indexer makeThreadSafeCopy() { return (new Indexer(this)); } boolean isSet(eErrorHandleVal val) { return (errHandle.contains(val)); } void setErr(eErrorHandleVal val) { errHandle.add(val); } /** * indexToSolr - Reads in a MARC Record, produces SolrInputDocument for it, * sends that document to solr This is the single threaded version that does * each of those action sequentially * * @param reader * @return array containing number of records read, number of records * indexed, and number of records sent to solr */ public int[] indexToSolr(final MarcReader reader) { resetCnts(); while (!shuttingDown) { RecordAndCnt recordAndCnt = getRecord(reader); if (recordAndCnt == null) break; logger.debug("record read : " + recordAndCnt.getRecord().getControlNumber()); RecordAndDoc recDoc = null; try { recDoc = getIndexDoc(recordAndCnt.getRecord(), recordAndCnt.getCnt()); } catch (SolrMarcIndexerException smie) { break; } if (recDoc != null) { indexSingleDocument(recDoc); } } if (shuttingDown) { endProcessing(); } return (cnts); } protected void indexSingleDocument(RecordAndDoc recDoc) { try { if (recDoc.getDoc() != null) { solrProxy.addDoc(recDoc.getDoc()); incrementCnt(2); if (recDoc.getErrLvl() != eErrorSeverity.NONE && isSet(eErrorHandleVal.RETURN_ERROR_RECORDS)) { if (isSet(eErrorHandleVal.RETURN_ERROR_RECORDS)) { errQ.add(recDoc); } } } } catch (SolrRuntimeException sse) { singleRecordSolrError(recDoc, sse, errQ); } catch (Exception e) { singleRecordSolrError(recDoc, e, errQ); } } void resetCnts() { cnts[0] = cnts[1] = cnts[2] = 0; } int incrementCnt(int cntNum) { return(++cnts[cntNum]); } int addToCnt(int cntNum, int amount) { cnts[cntNum] += amount; return(cnts[cntNum]); } int[] getCounts() { return(cnts); } RecordAndCnt getRecord(MarcReader reader) { Record record = null; while (record == null) { try { if (reader.hasNext()) record = reader.next(); else return(null); } catch (MarcException me) { logger.error("Unrecoverable Error in MARC record data", me); if (Boolean.parseBoolean(System.getProperty("solrmarc.terminate.on.marc.exception", "true"))) { return(null); } else { logger.warn("Trying to continue after MARC record data error"); record = null; } } } int cnt = incrementCnt(0); return (new RecordAndCnt(record, cnt)); } RecordAndDoc getIndexDoc(Record record, int count) { RecordAndDoc recDoc = null; recDoc = indexToSolrDoc(record); if (recDoc.getSolrMarcIndexerException() != null) { SolrMarcIndexerException smie = recDoc.getSolrMarcIndexerException(); String recCtrlNum = recDoc.rec.getControlNumber(); String idMessage = smie.getMessage() != null ? smie.getMessage() : ""; if (smie.getLevel() == SolrMarcIndexerException.IGNORE) { logger.info("Record will be Ignored " + (recCtrlNum != null ? recCtrlNum : "") + " " + idMessage + " (record count " + count + ")"); return(null); } else if (smie.getLevel() == SolrMarcIndexerException.DELETE) { logger.info("Record will be Deleted " + (recCtrlNum != null ? recCtrlNum : "") + " " + idMessage + " (record count " + count + ")"); delQ.add(recCtrlNum); return(null); } else if (smie.getLevel() == SolrMarcIndexerException.EXIT) { logger.info("Serious Error flagged in record " + (recCtrlNum != null ? recCtrlNum : "") + " " + idMessage + " (record count " + count + ")"); logger.info("Terminating indexing."); throw new SolrMarcIndexerException(SolrMarcIndexerException.EXIT); } } if (recDoc.getErrLvl() != eErrorSeverity.NONE) { if (isSet(eErrorHandleVal.RETURN_ERROR_RECORDS) && !isSet(eErrorHandleVal.INDEX_ERROR_RECORDS)) { errQ.add(recDoc); } if (recDoc.getErrLvl() == eErrorSeverity.FATAL && recDoc.ise != null) { String recCtrlNum = recDoc.rec.getControlNumber(); String idMessage = recDoc.ise.getMessage() != null ? recDoc.ise.getMessage() : ""; String indSpec = recDoc.ise.getSpecMessage() != null ? recDoc.ise.getSpecMessage() : ""; logger.info("Fatal Error returned for record " + (recCtrlNum != null ? recCtrlNum : "") + " : " + idMessage + " (record count " + count + ")"); logger.info("Fatal Error from by index spec " + (recCtrlNum != null ? recCtrlNum : "") + " : " + indSpec + " (record count " + count + ")"); logger.info("Terminating indexing."); throw new SolrMarcIndexerException(SolrMarcIndexerException.EXIT); } if (!isSet(eErrorHandleVal.INDEX_ERROR_RECORDS)) { logger.debug("Skipping error record: " + recDoc.rec.getControlNumber()); return(null); } } incrementCnt(1); return(recDoc); } protected SolrInputDocument combineDocWithErrors(SolrInputDocument[] documentParts, boolean includeErrors) { SolrInputDocument result = documentParts[0]; if (includeErrors) { if (!documentParts[1].isEmpty()) result.putAll(documentParts[1]); if (!documentParts[2].isEmpty()) result.putAll(documentParts[2]); } return result; } private eErrorSeverity addExceptionsToMap(SolrInputDocument document, List<IndexerSpecException> perRecordExceptions, eErrorSeverity errLvl) { if (perRecordExceptions != null) { for (IndexerSpecException e : perRecordExceptions) { @SuppressWarnings("unused") String specMessage = e.getSpecMessage(); document.addField("marc_error", e.getMessage()); errLvl = eErrorSeverity.max(errLvl, e.getErrLvl()); for (Throwable cause = e.getCause(); cause != null; cause = cause.getCause()) { document.addField("marc_error", e.getSolrField() + " : " + cause.getMessage()); } } } return (errLvl); } private void addMarcErrorsToMap(SolrInputDocument document, List<MarcError> errors) { for (Object err : errors) { document.addField("marc_error", err.toString()); } } protected RecordAndDoc indexToSolrDoc(final Record record) { Map<String, SolrInputField> emptyMap = new LinkedHashMap<>(); SolrInputDocument[] inputDocs = new SolrInputDocument[] { new SolrInputDocument(emptyMap), new SolrInputDocument(emptyMap), new SolrInputDocument(emptyMap) }; RecordAndDoc recDoc = new RecordAndDoc(record); eErrorSeverity errLvl = eErrorSeverity.NONE; ValueIndexerFactory.instance().clearPerRecordErrors(); for (final AbstractValueIndexer<?> indexer : indexers) { try { final Collection<String> data = indexer.getFieldData(record); for (String fieldName : indexer.getSolrFieldNames()) { if (data.size() == 0) { /* do_nothing() */ } else { for (String dataVal : data) { inputDocs[0].addField(fieldName, dataVal, 1.0f); } } } } catch (OutOfMemoryError oome) { logger.error("OOMError in record: " + recDoc.rec.getControlNumber()); logger.error("while processing index specification: " + indexer.getSpecLabel()); logger.error("number of per record exceptions: " + ((ValueIndexerFactory.instance().getPerRecordErrors() != null) ? ValueIndexerFactory.instance().getPerRecordErrors().size() : 0)); inputDocs[2].addField("marc_error", indexer.getSolrFieldNames().toString() + oome.getMessage()); errLvl = eErrorSeverity.FATAL; recDoc.addErrLoc(eErrorLocationVal.INDEXING_ERROR); } catch (InvocationTargetException ioe) { Throwable wrapped = ioe.getTargetException(); // Exception wrappedE = (wrapped instanceof Exception) ? // (Exception)wrapped : null; if (wrapped != null && wrapped instanceof IndexerSpecException) { logger.debug("Exception in record: " + recDoc.rec.getControlNumber()); logger.debug("while processing index specification: " + indexer.getSpecLabel()); errLvl = eErrorSeverity.max(errLvl, ((IndexerSpecException) wrapped).getErrLvl()); ((IndexerSpecException)wrapped).setSolrFieldAndSpec(indexer.getSolrFieldNamesStr(), indexer.getSpecLabel()); recDoc.setIndexerSpecException((IndexerSpecException)wrapped); } else if (wrapped != null && wrapped instanceof OutOfMemoryError) { logger.error("OOMError in record: " + recDoc.rec.getControlNumber()); logger.error("while processing index specification: " + indexer.getSpecLabel()); inputDocs[2].addField("marc_error", indexer.getSolrFieldNames().toString() + wrapped.getMessage()); errLvl = eErrorSeverity.FATAL; recDoc.addErrLoc(eErrorLocationVal.INDEXING_ERROR); } else if (wrapped != null && wrapped instanceof IllegalArgumentException) { logger.warn("Exception in record: " + recDoc.rec.getControlNumber()); logger.warn("while processing index specification: " + indexer.getSpecLabel()); if (wrapped != null) { logger.debug(wrapped); } errLvl = eErrorSeverity.ERROR; } else { logger.warn("Exception in record: " + recDoc.rec.getControlNumber()); logger.warn("while processing index specification: " + indexer.getSpecLabel()); if (wrapped != null) { logger.warn(wrapped); } errLvl = eErrorSeverity.ERROR; } inputDocs[2].addField("marc_error", indexer.getSolrFieldNames().toString() + wrapped.getMessage()); recDoc.addErrLoc(eErrorLocationVal.INDEXING_ERROR); } catch (SolrMarcIndexerException e) { recDoc.setSolrMarcIndexerException(e); } catch (IndexerSpecException e) { logger.warn("Exception in record: " + recDoc.rec.getControlNumber()); logger.warn("while processing index specification: " + indexer.getSpecLabel()); inputDocs[2].addField("marc_error", indexer.getSolrFieldNames().toString() + e.getMessage()); errLvl = eErrorSeverity.max(errLvl, e.getErrLvl()); recDoc.addErrLoc(eErrorLocationVal.INDEXING_ERROR); } catch (Exception e) { logger.warn("Exception in record: " + recDoc.rec.getControlNumber()); logger.warn("while processing index specification: " + indexer.getSpecLabel()); inputDocs[2].addField("marc_error", indexer.getSolrFieldNames().toString() + e.getMessage()); errLvl = eErrorSeverity.ERROR; recDoc.addErrLoc(eErrorLocationVal.INDEXING_ERROR); } } if (record.hasErrors()) { addMarcErrorsToMap(inputDocs[1], record.getErrors()); recDoc.addErrLoc(eErrorLocationVal.MARC_ERROR); } List<IndexerSpecException> perRecordExceptions = ValueIndexerFactory.instance().getPerRecordErrors(); if (perRecordExceptions != null) { errLvl = addExceptionsToMap(inputDocs[2], perRecordExceptions, errLvl); recDoc.addErrLoc(eErrorLocationVal.INDEXING_ERROR); } recDoc.setDoc(combineDocWithErrors(inputDocs, isSet(eErrorHandleVal.INDEX_ERROR_RECORDS))); recDoc.setMaxErrLvl(errLvl); ValueIndexerFactory.instance().doneWithRecord(record); return recDoc; } protected void singleRecordSolrError(RecordAndDoc recDoc, Exception e1, BlockingQueue<RecordAndDoc> errQ) { logger.error("Failed on single doc with id : " + recDoc.getRec().getControlNumber()); if (e1 instanceof SolrRuntimeException && e1.getCause() instanceof SolrException) { SolrException cause = (SolrException) e1.getCause(); logger.error(cause.getMessage()); } else if (e1 instanceof SolrRuntimeException && e1.getCause() instanceof InvocationTargetException) { InvocationTargetException cause = (InvocationTargetException) e1.getCause(); Throwable target = cause.getTargetException(); logger.error(target.getMessage()); } else { logger.error(e1); } if (errQ != null) { recDoc.addErrLoc(eErrorLocationVal.SOLR_ERROR); recDoc.errLvl = eErrorSeverity.ERROR; errQ.add(recDoc); } } boolean isShutDown() { return isShutDown; } void setIsShutDown() { isShutDown = true; } void shutDown(boolean viaInterrupt) { this.viaInterrupt = viaInterrupt; shuttingDown = true; } void endProcessing() { if (delQ.size() > 0) { logger.info("Deleting "+delQ.size()+ " records "); } for (String recCtrlNum : delQ) { // String recCtrlNum = recDoc.rec.getControlNumber(); logger.debug("Deleting record " + (recCtrlNum != null ? recCtrlNum : "")); try { solrProxy.delete(recCtrlNum); } catch (SolrRuntimeException e) { // TODO Auto-generated catch block e.printStackTrace(); } } try { logger.info("Commiting updates to Solr"); solrProxy.commit(false); } catch (SolrRuntimeException e) { } } private static long time(long time, TimeUnit unit) { return unit.convert(time, TimeUnit.NANOSECONDS); } void reportPerMethodTime() { logger.info("Elapsed time per indexing method:"); for (final AbstractValueIndexer<?> indexer : indexers) { long elaspedTime = indexer.getTotalElapsedTime(); long minutes = time(elaspedTime, TimeUnit.MINUTES); long seconds = time(elaspedTime, TimeUnit.SECONDS); long millis = time(elaspedTime, TimeUnit.MILLISECONDS); millis -= seconds * 1000; seconds -= minutes * 60; String elapsedStr = String.format("%d min, %d.%03d sec", minutes, seconds, millis); logger.info(elapsedStr + " ---" + indexer.getSolrFieldNames().toString() + ":" + indexer.getSpecLabel()); } } }