package org.solrmarc.driver;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.marc4j.MarcReader;
import org.marc4j.MarcReaderConfig;
import org.marc4j.MarcReaderFactory;
import org.solrmarc.driver.RecordAndDoc.eErrorLocationVal;
import org.solrmarc.index.indexer.AbstractValueIndexer;
import org.solrmarc.index.indexer.IndexerSpecException;
import org.solrmarc.index.indexer.IndexerSpecException.eErrorSeverity;
import org.solrmarc.index.indexer.ValueIndexerFactory;
import org.solrmarc.marc.SolrMarcMarcReaderFactory;
//import org.solrmarc.marc.MarcReaderFactory;
import org.solrmarc.solr.DevNullProxy;
import org.solrmarc.solr.SolrCoreLoader;
import org.solrmarc.solr.SolrProxy;
import org.solrmarc.solr.SolrRuntimeException;
import org.solrmarc.solr.StdOutProxy;
import org.solrmarc.tools.PropertyUtils;
/**
* Uses the command-line arguments to create a MarcReader, a collection of AbstractValueIndexer
* objects, and a SolrProxy object and then passes them to the Indexer class which loops through
* the MARC records, builds SolrInputDocuments and then sends them to Solr
*
* @author rh9ec
*
*/
public class IndexDriver extends BootableMain
{
private final static Logger logger = Logger.getLogger(IndexDriver.class);
private Properties readerProps;
private MarcReaderConfig readerConfig;
private ValueIndexerFactory indexerFactory = null;
private List<AbstractValueIndexer<?>> indexers;
private Indexer indexer;
private MarcReader reader;
private SolrProxy solrProxy;
private int numIndexed[];
private String[] args;
private long startTime;
private Thread shutdownSimulator = null;
/**
* The main entry point of the SolrMarc indexing process. Typically called by the Boot class.
*
* @param args - The command-line arguments passed to the program
*/
public static void main(String[] args)
{
IndexDriver driver = new IndexDriver(args);
driver.execute();
}
/**
* Provided as an optional entry-point for the SolrMarc indexing process. It merely stores the
* command-line arguments so then can be used by the method execute.
*
* @param args - The command-line arguments passed to the program
*/
public IndexDriver(String[] args)
{
this.args = args;
}
/**
* Creates a MarcReader, a collection of AbstractValueIndexer objects, and a SolrProxy object
* based on the values in the command-line arguments. It creates a Indexer object
* and calls processInput which passes the MarcReader to the Indexer object to index all of the
* MARC records.
*/
public void execute()
{
processArgs(args, true);
indexerFactory = ValueIndexerFactory.initialize(homeDirStrs);
initializeFromOptions();
List<String> inputFiles = options.valuesOf(files);
logger.info("Opening input files: " + Arrays.toString(inputFiles.toArray()));
this.configureReader(inputFiles);
if (deleteRecordByIdFile.value(options) != null)
{
this.processDeletes();
}
this.processInput();
}
private void processDeletes()
{
File deleteFile = deleteRecordByIdFile.value(options);
if (deleteFile.exists() && deleteFile.canRead())
{
BufferedReader delReader;
try
{
delReader = new BufferedReader(new FileReader(deleteFile));
String line;
while ((line = delReader.readLine() ) != null)
{
indexer.delQ.add(line.trim());
}
delReader.close();
}
catch (FileNotFoundException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
catch (IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
private void initializeFromOptions()
{
String inputSource[] = new String[1];
String propertyFileAsURLStr = PropertyUtils.getPropertyFileAbsoluteURL(homeDirStrs, options.valueOf(readOpts), true, inputSource);
try
{
configureReaderProps(propertyFileAsURLStr);
}
catch (IOException e1)
{
logger.fatal("Fatal error: Exception opening reader properties input stream: " + inputSource[0]);
logger.error("Exiting...");
System.exit(1);
}
String solrJClassName = solrjClass.value(options);
String solrURL = options.has("solrURL") ? options.valueOf("solrURL").toString() : options.has("null") ? "devnull" : "stdout";
boolean multithread = options.has("solrURL") && !options.has("debug") ? true : false;
try
{
this.configureOutput(solrURL, solrJClassName);
}
catch (SolrRuntimeException sre)
{
logger.error("Error connecting to solr at URL " + solrURL + " : " + sre.getMessage());
logger.debug("", sre);
logger.error("Exiting...");
System.exit(6);
}
String specs = options.valueOf(configSpecs);
try
{
logger.info("Reading and compiling index specifications: " + specs);
this.configureIndexer(specs, multithread);
}
catch (IOException | IllegalAccessException | InstantiationException e1)
{
logger.error("Error opening or reading index configurations: " + specs, e1);
logger.error("Exiting...");
System.exit(2);
}
List<IndexerSpecException> exceptions = this.indexerFactory.getValidationExceptions();
if (!exceptions.isEmpty())
{
logger.error("Error processing index configurations: " + specs);
logTextForExceptions(exceptions);
logger.error("Exiting...");
System.exit(5);
}
}
final static String [] solrmarcPropertyStrings = {
"solrmarc.indexer.chunksize",
"solrmarc.indexer.threadcount",
"solrmarc.solrj.threadcount",
"solrmarc.track.solr.progress",
"solrmarc.terminate.on.marc.exception",
"solrmarc.output.redirect",
"solrmarc.indexer.test.fire.method",
"solrmarc.method.report",
};
private void configureReaderProps(String propertyFileURLStr) throws FileNotFoundException, IOException
{
List<String> propertyStringsToCopy = Arrays.asList(solrmarcPropertyStrings);
readerProps = new Properties();
if (propertyFileURLStr != null)
{
readerProps.load(PropertyUtils.getPropertyFileInputStream(propertyFileURLStr));
Enumeration<?> iter = readerProps.propertyNames();
while (iter.hasMoreElements())
{
String propertyName = iter.nextElement().toString();
if (propertyName.startsWith("solrmarc.") && propertyStringsToCopy.contains(propertyName) && System.getProperty(propertyName) == null)
{
System.setProperty(propertyName, readerProps.getProperty(propertyName));
}
}
try {
readerConfig = new MarcReaderConfig(readerProps);
}
catch(NoClassDefFoundError ncdfe)
{
readerConfig = null;
}
}
}
private void configureReader(List<String> inputFilenames)
{
try
{
reader = MarcReaderFactory.makeReader((MarcReaderConfig)readerConfig, ValueIndexerFactory.instance().getHomeDirs(), inputFilenames);
}
catch (IOException e)
{
throw new IllegalArgumentException(e.getMessage(), e);
}
catch(NoClassDefFoundError ncdfe)
{
logger.warn("Using SolrMarc with a marc4j version < 2.8 uses deprecated code in SolrMarc");
reader = SolrMarcMarcReaderFactory.instance().makeReader(readerProps, ValueIndexerFactory.instance().getHomeDirs(), inputFilenames);
}
// reader = MarcReaderFactory.makeReader(readerProps, ValueIndexerFactory.instance().getHomeDirs(), inputFilenames);
}
private void configureIndexer(String indexSpecifications, boolean multiThreaded)
throws IllegalAccessException, InstantiationException, IOException
{
String[] indexSpecs = indexSpecifications.split("[ ]*,[ ]*");
File[] specFiles = new File[indexSpecs.length];
int i = 0;
for (String indexSpec : indexSpecs)
{
File specFile = new File(indexSpec);
if (!specFile.isAbsolute()) specFile = PropertyUtils.findFirstExistingFile(homeDirStrs, indexSpec);
logger.info("Opening index spec file: " + specFile);
specFiles[i++] = specFile;
}
indexers = indexerFactory.createValueIndexers(specFiles);
boolean includeErrors = Boolean.parseBoolean(PropertyUtils.getProperty(readerProps, "marc.include_errors", "false"));
boolean returnErrors = Boolean.parseBoolean(PropertyUtils.getProperty(readerProps, "marc.return_errors", "false"));
int chunkSize = Integer.parseInt(System.getProperty("solrmarc.indexer.chunksize", "640"));
indexer = null;
if (multiThreaded) indexer = new ThreadedIndexer(indexers, solrProxy, chunkSize);
else indexer = new Indexer(indexers, solrProxy);
if (returnErrors)
{
indexer.setErr(Indexer.eErrorHandleVal.RETURN_ERROR_RECORDS);
}
if (includeErrors)
{
indexer.setErr(Indexer.eErrorHandleVal.INDEX_ERROR_RECORDS);
}
}
private void configureOutput(String solrURL, String solrJClassName)
{
if (solrURL.equals("stdout"))
{
try
{
PrintStream out = new PrintStream(System.out, true, "UTF-8");
System.setOut(out);
solrProxy = new StdOutProxy(out);
}
catch (UnsupportedEncodingException e)
{
// since the encoding is hard-coded, and is valid, this Exception cannot occur.
}
}
else if (solrURL.equals("devnull"))
{
solrProxy = new DevNullProxy();
}
else
{
solrProxy = SolrCoreLoader.loadRemoteSolrServer(solrURL, solrJClassName, true);
}
}
private void processInput()
{
String inEclipseStr = System.getProperty("runInEclipse");
boolean inEclipse = "true".equalsIgnoreCase(inEclipseStr);
String systemClassPath = System.getProperty("java.class.path");
logger.debug("System Class Path = " + systemClassPath);
if (!systemClassPath.contains("solrmarc_core"))
inEclipse = true;
shutdownSimulator = new ShutdownSimulator(inEclipse);
shutdownSimulator.start();
Thread shutdownHook = new MyShutdownThread(indexer, shutdownSimulator);
Runtime.getRuntime().addShutdownHook(shutdownHook);
startTime = System.currentTimeMillis();
long endTime = startTime;
try
{
numIndexed = indexer.indexToSolr(reader);
}
catch (Exception e)
{
if (!indexer.viaInterrupt) Runtime.getRuntime().removeShutdownHook(shutdownHook);
logger.fatal("ERROR: Error while invoking indexToSolr");
logger.fatal(e);
}
endTime = System.currentTimeMillis();
if (!indexer.viaInterrupt) Runtime.getRuntime().removeShutdownHook(shutdownHook);
indexer.endProcessing();
boolean perMethodReport = Boolean.parseBoolean(PropertyUtils.getProperty(readerProps, "solrmarc.method.report", "false"));
reportResultsAndTime(numIndexed, startTime, endTime, indexer, (indexer.shuttingDown) ? false : perMethodReport);
if (!indexer.viaInterrupt && indexer.errQ.size() > 0)
{
handleRecordErrors();
}
if (!indexer.viaInterrupt && shutdownSimulator != null) shutdownSimulator.interrupt();
indexer.setIsShutDown();
if (indexer.shuttingDown && indexer.viaInterrupt)
{
try
{
Thread.sleep(5000);
}
catch (InterruptedException ie)
{
endTime = startTime;
}
}
}
private void reportResultsAndTime(int[] numIndexed, long startTime, long endTime, Indexer indexer, boolean perMethodReport)
{
logger.info("" + numIndexed[0] + " records read");
logger.info("" + numIndexed[1] + " records indexed and ");
long minutes = ((endTime - startTime) / 1000) / 60;
long seconds = (endTime - startTime) / 1000 - (minutes * 60);
long hundredths = (endTime - startTime) / 10 - (minutes * 6000) - (seconds * 100) + 100;
String hundredthsStr = ("" + hundredths).substring(1);
String minutesStr = ((minutes > 0) ? "" + minutes + " minute" + ((minutes != 1) ? "s " : " ") : "");
String secondsStr = "" + seconds + "." + hundredthsStr + " seconds";
logger.info("" + numIndexed[2] + " records sent to Solr in " + minutesStr + secondsStr);
if (perMethodReport) indexer.reportPerMethodTime();
}
private void handleRecordErrors()
{
Collection<RecordAndDoc> errQ = indexer.errQ;
int[][] errTypeCnt = new int[][]{{0,0,0,0,0},{0,0,0,0,0},{0,0,0,0,0}};
for (final RecordAndDoc entry : errQ)
{
if (!entry.errLocs.isEmpty())
{
logger.debug("Error Rec id = " + entry.rec.getControlNumber());
}
if (entry.errLocs.contains(eErrorLocationVal.MARC_ERROR))
{
errTypeCnt[0][entry.getErrLvl().ordinal()]++;
}
if (entry.errLocs.contains(eErrorLocationVal.INDEXING_ERROR))
{
errTypeCnt[1][entry.getErrLvl().ordinal()]++;
}
if (entry.errLocs.contains(eErrorLocationVal.SOLR_ERROR))
{
errTypeCnt[2][entry.getErrLvl().ordinal()]++;
}
}
showErrReport("MARC", errTypeCnt[0]);
showErrReport("Index", errTypeCnt[1]);
showErrReport("Solr", errTypeCnt[2]);
}
private void showErrReport(String errLocStr, int[] errorLvlCnt)
{
for (int i = 0; i < errorLvlCnt.length; i++)
{
if (errorLvlCnt[i] > 0)
{
logger.info("" + errorLvlCnt[i] + " records have " + errLocStr + " errors of level: " + eErrorSeverity.values()[i].toString());
}
}
}
@SuppressWarnings("unused")
private String getTextForExceptions(List<IndexerSpecException> exceptions)
{
StringBuilder text = new StringBuilder();
String lastSpec = "";
for (IndexerSpecException e : exceptions)
{
String specMessage = e.getSpecMessage();
if (!specMessage.equals(lastSpec))
{
text.append(specMessage).append("\n");
}
text.append(e.getMessage()).append("\n");
for (Throwable cause = e.getCause(); cause != null; cause = cause.getCause())
{
text.append(e.getSolrField()).append(" : ").append(cause.getMessage()).append("\n");
}
}
return (text.toString());
}
private void logTextForExceptions(List<IndexerSpecException> exceptions)
{
String lastSpec = "";
for (IndexerSpecException e : exceptions)
{
eErrorSeverity level = e.getErrLvl();
Priority priority = getPriorityForSeverity(level);
String specMessage = e.getSpecMessage();
if (!specMessage.equals(lastSpec))
{
logger.log(priority, specMessage);
}
logger.log(priority, e.getMessage());
for (Throwable cause = e.getCause(); cause != null; cause = cause.getCause())
{
logger.log(priority, e.getSolrField() + " : " + cause.getMessage());
}
}
}
private Priority getPriorityForSeverity(eErrorSeverity level)
{
switch (level) {
case NONE: return (Level.DEBUG);
case INFO: return (Level.INFO);
case WARN: return (Level.WARN);
case ERROR: return (Level.ERROR);
case FATAL: return (Level.FATAL);
}
return (Level.DEBUG);
}
/**
* <h1>MyShutdownThread</h1> This class implements a shutdown hook that is
* installed in the Java Runtime. If a user attempts to terminate the import
* process, this hook will signal the threads that are handling the import
* (via Thread.interrupt) and they will shutdown cleanly, and commit the
* changes to Solr before allowing the program to terminate.
*
* @author rh9ec
*
*/
class MyShutdownThread extends Thread
{
private Indexer indexer;
private Thread killItToDie;
public MyShutdownThread(Indexer ind, Thread shutdownSimulator)
{
indexer = ind;
killItToDie = shutdownSimulator;
}
@Override
public void run()
{
// System.err.println("Starting Shutdown hook");
logger.info("Starting Shutdown hook");
if (!indexer.isShutDown())
{
logger.info("Stopping main indexer loop");
indexer.shutDown(true);
}
while (!indexer.isShutDown())
{
try
{
sleep(2000);
}
catch (InterruptedException e)
{
}
}
logger.info("Finished Shutdown hook");
LogManager.shutdown();
try
{
sleep(1000);
}
catch (InterruptedException e)
{
indexer = null;
}
killItToDie.interrupt();
}
}
/**
* <h1>ShutdownSimulator</h1> A small class that is only useful for
* debugging purposes. Specifically for debugging the shutdown hook. The
* Eclipse Java Development Environment is unable to shutdown a process it
* is running in a way that the shutdown hook is invoked, instead Eclipse
* merely summarily destroys the process, which is unhelpful. <br/>
* To enable this feature, you must define the system property
* "runInEclipse" as true, usually via the VM arguments panel on the
* Arguments tab in the debug configuration dialog. -DrunInEclipse=true Then
* when the program is running in Eclipse, you will need to click in the
* Console window, and press [ENTER] to simulate a CTRL-C being sent to the
* program.
*
* @author rh9ec
*
*/
class ShutdownSimulator extends Thread
{
boolean inEclipse;
public ShutdownSimulator(boolean inEclipse)
{
this.inEclipse = inEclipse;
}
@Override
public void run()
{
setName("Eclipse-Shutdown-Simulator-Thread");
if (inEclipse)
{
System.out.println("You're using Eclipse; click in this console and "
+ "press ENTER to call System.exit() and run the shutdown routine.");
}
while (true)
{
try
{
if (inEclipse && System.in.available() > 0)
{
System.in.read();
System.exit(0);
}
else
{
sleep(2000);
}
}
catch (IOException e)
{
break;
}
catch (InterruptedException e)
{
break;
}
}
}
}
}