package org.solrmarc.marc; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import org.marc4j.MarcException; import org.marc4j.MarcReader; import org.marc4j.marc.ControlField; import org.marc4j.marc.DataField; import org.marc4j.marc.Record; import org.marc4j.marc.Subfield; import org.marc4j.marc.VariableField; /** * @author rh9ec * * Binary Marc records have a maximum size of 99999 bytes. In the data dumps from * the Sirsi/Dynix Virgo system if a record with all of its holdings information * attached would be greater that that size, the records is written out multiple * times with each subsequent record containing a subset of the total holdings information. * This class reads ahead to determine when the next record in a Marc file is actually * a continuation of the same record. When this occurs, the holdings information in the * next record is appended to/merged with the in-memory Marc record representation already * read. * */ public class MarcCombiningReader implements MarcReader { Record currentRecord = null; Record nextRecord = null; MarcReader reader; String idsToMerge = null; String leftControlField = null; String rightControlField = null; // Initialize logging category static Logger logger = Logger.getLogger(MarcFilteredReader.class.getName()); /** * Constructor for a "combining" Marc reader, that looks ahead at the Marc file to determine * when the next record is a continuation of the currently read record. * * @param reader - The Lower level MarcReader that returns Marc4J Record objects that are read from a Marc file. * @param idsToMerge - string representing a regular expression matching those fields to be merged for continuation records. * @param leftControlField - string representing a control field in the current record to use for matching purposes (null to default to 001). * @param rightControlField - string representing a control field in the next record to use for matching purposes (null to default to 001). */ public MarcCombiningReader(MarcReader reader, String idsToMerge, String leftControlField, String rightControlField) { this.reader = reader; this.idsToMerge = idsToMerge; this.leftControlField = leftControlField; this.rightControlField = rightControlField; } public boolean hasNext() { if (currentRecord == null) { currentRecord = next(); } return(currentRecord != null); } public Record next() { if (currentRecord != null) { Record tmp = currentRecord; currentRecord = null; return(tmp); } else if (currentRecord == null) { if (nextRecord != null) { currentRecord = nextRecord; nextRecord = null; } if (!reader.hasNext()) { return ((currentRecord != null) ? next() : null); } try { nextRecord = reader.next(); } catch (Exception e) { if (currentRecord != null) { String recCntlNum = currentRecord.getControlNumber(); throw new MarcException("Couldn't get next record after " + (recCntlNum != null ? recCntlNum : "") + " -- " + e.toString()); } else throw new MarcException("Marc record couldn't be read -- " + e.toString()); } while (recordsMatch(currentRecord, nextRecord)) { currentRecord = combineRecords(currentRecord, nextRecord, idsToMerge); // mergeErrors(currentErrors, nextErrors); if (reader.hasNext()) { try { nextRecord = reader.next(); } catch (Exception e) { String recCntlNum = currentRecord.getControlNumber(); throw new MarcException("Couldn't get next record after " + (recCntlNum != null ? recCntlNum : "") + " -- " + e.toString()); } } else { nextRecord = null; } } return(next()); } return(null); } /** * Support method to find a specific control field within a record and return * its contents as a string. * @param record - record to search * @param tag - tag number to search for */ private String findControlField(Record record, String tag) { String tagstart = tag.substring(0,3); List<VariableField> fields = record.getVariableFields(tagstart); for (VariableField field : fields) { if (field instanceof ControlField) { ControlField cf = (ControlField) field; if (cf.getTag().matches(tagstart)) { return((String)cf.getData()); } } else if (field instanceof DataField) { DataField df = (DataField)field; if (df.getTag().matches(tagstart)) { char subfieldtag = 'a'; if (tag.length() > 3) subfieldtag = tag.charAt(4); Subfield sf = df.getSubfield(subfieldtag); if (sf != null) return(sf.getData()); } } } return(null); } /** * Support method to detect if two records match. * @param left - left side of the comparison (current record) * @param right - right side of the comparison (next record) */ private boolean recordsMatch(Record left, Record right) { // Records can't match if they don't exist! if (left == null || right == null) { return false; } // Initialize match strings extracted from records: String leftStr = null; String rightStr = null; // For both sides of the match (left and right), check to see if the user // provided a control field setting. If no preference was provided, we'll // match using the record ID. If a preference exists, we need to look up // the specified control field in the record. if (leftControlField == null) { leftStr = left.getControlNumber(); } else { leftStr = findControlField(left, leftControlField); } if (rightControlField == null) { rightStr = right.getControlNumber(); } else { rightStr = findControlField(right, rightControlField); } // Check for a match and return an appropriate status: if (leftStr != null && rightStr != null && leftStr.equals(rightStr)) { return true; } return false; } static public Record combineRecords(Record currentRecord, Record nextRecord, String idsToMerge) { List<VariableField> fields = nextRecord.getVariableFields(); for (VariableField field : fields) { if (field.getTag().matches(idsToMerge)) { currentRecord.addVariableField(field); } } if (nextRecord.hasErrors()) { currentRecord.addErrors(nextRecord.getErrors()); } return(currentRecord); } static public Record combineRecords(Record currentRecord, Record nextRecord, String idsToMerge, String fieldInsertBefore) { List<VariableField> existingFields = currentRecord.getVariableFields(); List<VariableField> fieldsToMove = new ArrayList<VariableField>(); // temporarily remove some existing fields for (VariableField field : existingFields) { if (field.getTag().matches(fieldInsertBefore)) { fieldsToMove.add(field); currentRecord.removeVariableField(field); } } List<VariableField> fields = nextRecord.getVariableFields(); for (VariableField field : fields) { if (field.getTag().matches(idsToMerge)) { currentRecord.addVariableField(field); } } // now add back the temporarily removed fields for (VariableField field : fieldsToMove) { currentRecord.addVariableField(field); } if (nextRecord.hasErrors()) { currentRecord.addErrors(nextRecord.getErrors()); } return(currentRecord); } }