package org.solrmarc.driver; import java.io.BufferedOutputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.util.Arrays; import java.util.List; import java.util.Properties; import org.apache.log4j.Logger; import org.marc4j.MarcError; import org.marc4j.MarcException; import org.marc4j.MarcJsonWriter; import org.marc4j.MarcReader; import org.marc4j.MarcReaderConfig; import org.marc4j.MarcReaderFactory; import org.marc4j.MarcStreamWriter; import org.marc4j.MarcWriter; import org.marc4j.MarcXmlWriter; import org.marc4j.converter.impl.UnicodeToAnsel; import org.marc4j.marc.Record; import org.solrmarc.index.indexer.AbstractValueIndexer; import org.solrmarc.index.indexer.ValueIndexerFactory; //import org.solrmarc.marc.MarcReaderFactory; import org.solrmarc.solr.SolrProxy; import org.solrmarc.tools.PropertyUtils; public class RecordFixer extends BootableMain { public final static Logger logger = Logger.getLogger(IndexDriver.class); Properties readerProps; MarcReaderConfig readerConfig; List<AbstractValueIndexer<?>> indexers; Indexer indexer; MarcReader reader; SolrProxy solrProxy; boolean verbose; int numIndexed[]; String[] args; long startTime; Thread shutdownSimulator = null; private MarcWriter writer; public static void main(String[] args) { RecordFixer driver = new RecordFixer(args); driver.execute(); } public RecordFixer(String[] args) { this.args = args; } public void execute() { processArgs(args, true); ValueIndexerFactory.initialize(homeDirStrs); initializeFromOptions(); List<String> inputFiles = options.valuesOf(files); logger.info("Opening input files: " + Arrays.toString(inputFiles.toArray())); this.configureReader(inputFiles); this.processInput(); } public void initializeFromOptions() { String inputSource[] = new String[1]; String propertyFileAsURLStr = PropertyUtils.getPropertyFileAbsoluteURL(homeDirStrs, options.valueOf(readOpts), true, inputSource); // File f1 = new File(options.valueOf(readOpts)); try { configureReaderProps(propertyFileAsURLStr); } catch (IOException e1) { logger.fatal("Fatal error: Exception opening reader properties input stream: " + inputSource[0]); logger.error("Exiting..."); System.exit(1); } String outputType = options.has("solrURL") ? options.valueOf("solrURL").toString() : options.has("null") ? "devnull" : "stdout"; this.configureOutput(outputType); } public void configureReaderProps(String propertyFileURLStr) throws FileNotFoundException, IOException { readerProps = new Properties(); if (propertyFileURLStr != null) { readerProps.load(PropertyUtils.getPropertyFileInputStream(propertyFileURLStr)); } readerConfig = new MarcReaderConfig(readerProps); } public void configureReader(List<String> inputFilenames) { try { reader = MarcReaderFactory.makeReader(readerConfig, ValueIndexerFactory.instance().getHomeDirs(), inputFilenames); } catch (IOException e) { throw new IllegalArgumentException(e.getMessage(), e); } } public void configureOutput(String mode) { PrintStream marcOut = null; try { String sysoutRedir = System.getProperty("solrmarc.output.redirect", null); if (sysoutRedir != null) { marcOut = new PrintStream(new BufferedOutputStream(new FileOutputStream(sysoutRedir)), true); } else { marcOut = System.out; } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } if (mode.equals("to_xml")) { writer = new MarcXmlWriter(marcOut, "UTF-8", true); } else if (mode.equals("to_json")) { writer = new MarcJsonWriter(marcOut, MarcJsonWriter.MARC_IN_JSON); } else if (mode.equals("to_utf8")) { writer = new MarcStreamWriter(marcOut, "UTF-8", true); } else if (mode.equals("to_marc8")) { writer = new MarcStreamWriter(marcOut, "ISO8859_1", true); writer.setConverter(new UnicodeToAnsel()); } else if (mode.equals("to_ncr")) { writer = new MarcStreamWriter(marcOut, "ISO8859_1", true); writer.setConverter(new UnicodeToAnsel(true)); } // else if (mode.equals("untranslateNCRifneeded")) // { // if (writer == null) // { // baos = new ByteArrayOutputStream(); // conv = new UnicodeToAnsel(); // convNCR = new UnicodeToAnsel(true); // writer = new MarcStreamWriter(baos, "ISO8859_1", true); // writer.setConverter(conv); // } // baos.reset(); // record.getLeader().setCharCodingScheme(' '); // writer.setConverter(conv); // writer.write(record); // baos.flush(); // byte[] bytes = baos.toByteArray(); // if (byteArrayContains(bytes, "|".getBytes())) // { // baos.reset(); // writer.setConverter(convNCR); // writer.write(record); // } // baos.flush(); // System.out.write(baos.toByteArray()); // } } public void processInput() { while (true) { Record record = null; try { if (reader.hasNext()) record = reader.next(); else break; } catch (MarcException me) { logger.error("Unrecoverable Error in MARC record data", me); if (Boolean.parseBoolean(System.getProperty("solrmarc.terminate.on.marc.exception", "true"))) break; else { logger.warn("Trying to continue after MARC record data error"); continue; } } if (record.hasErrors()) { reportMarcErrors(record, record.getErrors()); } writer.write(record); } writer.close(); } private void reportMarcErrors(Record record, List<MarcError> errors) { String id = record.getControlNumber(); if (id == null) id = "No_001_Field"; for (Object err : errors) { logger.info(id + " : " + err.toString()); } } }