package org.solrmarc.marc;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.marc4j.MarcException;
import org.marc4j.MarcReader;
import org.marc4j.marc.ControlField;
import org.marc4j.marc.DataField;
import org.marc4j.marc.MarcFactory;
import org.marc4j.marc.Record;
import org.marc4j.marc.Subfield;
import org.marc4j.marc.VariableField;
import org.marc4j.marc.impl.Verifier;
import org.solrmarc.index.SolrIndexer;
import org.solrmarc.tools.PropertyUtils;
import org.solrmarc.tools.Utils;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
/**
*
* @author Robert Haschart
* @version $Id: MarcFilteredReader.java 1718 2013-11-08 21:35:12Z rh9ec@virginia.edu $
*
*/
public class MarcFilteredReader implements MarcReader
{
String includeRecordIfFieldPresent = null;
String includeRecordIfFieldContains = null;
String includeRecordIfFieldMissing = null;
String includeRecordIfFieldDoesntContain = null;
String deleteSubfieldsSpec = null;
Record currentRecord = null;
MarcReader reader;
// SolrMarcException exception;
// private String remapPropertiesFilename;
private Properties remapProperties = null;
// Initialize logging category
static Logger logger = Logger.getLogger(MarcFilteredReader.class.getName());
/**
*
* @param r
* @param ifFieldPresent
* @param ifFieldMissing
*/
public MarcFilteredReader(MarcReader r, String ifFieldPresent, String ifFieldMissing, String deleteSubfields)
{
deleteSubfieldsSpec = deleteSubfields;
if (ifFieldPresent != null)
{
String present[] = ifFieldPresent.split("/", 2);
includeRecordIfFieldPresent = present[0];
if (present.length > 1)
{
includeRecordIfFieldContains = present[1];
}
}
if (ifFieldMissing != null)
{
String missing[] = ifFieldMissing.split("/", 2);
includeRecordIfFieldMissing = missing[0];
if (missing.length > 1)
{
includeRecordIfFieldDoesntContain = missing[1];
}
}
reader = r;
}
public MarcFilteredReader(MarcReader r, String ifFieldPresent, String ifFieldMissing, String deleteSubfields, String remapURL)
{
this(r, ifFieldPresent, ifFieldMissing, deleteSubfields);
// remapPropertiesFilename = System.getProperty("marc.override.reader.remapURL", null);
remapProperties = PropertyUtils.loadProperties(remapURL);
}
/**
* Implemented through interface
* @return Returns true if the iteration has more records, false otherwise
*/
public boolean hasNext()
{
if (currentRecord == null)
{
currentRecord = next();
}
return(currentRecord != null);
}
// static boolean recordHasMatch(Record rec, String tag, String pattern)
// {
// for (VariableField field : rec.getVariableFields(tag))
// {
// if (field.find(pattern))
// return(true);
// }
// return false;
// }
//
// static boolean recordHasField(Record rec, String tag)
// {
// for (VariableField field : rec.getVariableFields(tag))
// {
// if (field.find(pattern))
// return(true);
// }
// return false;
// }
/**
* Returns the next marc file in the iteration
*/
public Record next()
{
if (currentRecord != null)
{
Record tmp = currentRecord;
currentRecord = null;
return(tmp);
}
while (currentRecord == null)
{
if (!reader.hasNext()) return(null);
Record rec = null;
try {
rec = reader.next();
}
catch (MarcException me)
{
//System.err.println("Error reading Marc Record: "+ me.getMessage());
// exception = new SolrMarcException(me.getMessage(), me.getCause());
// exception.printMessage("Error reading Marc record:");
// exception.printStackTrace();
logger.error("Error reading Marc Record.");
logger.error(me.getMessage());
throw me;
}
if (deleteSubfieldsSpec!= null)
{
deleteSubfields(rec);
}
if (remapProperties != null)
{
boolean keepRecord = remapRecord(rec);
if (keepRecord == false)
{
logger.info("Remap Rules say record "+rec.getControlNumber()+" should be skipped");
continue;
}
}
if (rec != null && includeRecordIfFieldPresent != null)
{
Set<String> fields = SolrIndexer.instance().getFieldList(rec, includeRecordIfFieldPresent);
if (fields.size() != 0)
{
if (includeRecordIfFieldContains == null || Utils.setItemContains(fields, includeRecordIfFieldContains))
{
currentRecord = rec;
}
}
}
if (rec != null && includeRecordIfFieldMissing != null)
{
Set<String> fields = SolrIndexer.instance().getFieldList(rec, includeRecordIfFieldMissing);
if ((includeRecordIfFieldDoesntContain == null && fields.size() == 0) ||
(includeRecordIfFieldDoesntContain != null && !Utils.setItemContains(fields, includeRecordIfFieldDoesntContain)))
{
currentRecord = rec;
}
}
if (rec != null && includeRecordIfFieldPresent == null && includeRecordIfFieldMissing == null)
{
currentRecord = rec;
}
}
return currentRecord ;
}
void deleteSubfields(Record rec)
{
String fieldSpecs[] = deleteSubfieldsSpec.split(":");
for (String fieldSpec : fieldSpecs)
{
String tag = fieldSpec.substring(0,3);
String subfield = null;
if (fieldSpec.length() > 3) subfield = fieldSpec.substring(3);
List<VariableField> list = (List<VariableField>)rec.getVariableFields(tag);
for (VariableField field : list)
{
if (field instanceof DataField)
{
DataField df = ((DataField)field);
if (subfield != null)
{
List<Subfield> sfs = (List<Subfield>)df.getSubfields(subfield.charAt(0));
if (sfs != null && sfs.size() != 0)
{
rec.removeVariableField(df);
for (Subfield sf : sfs)
{
df.removeSubfield(sf);
}
rec.addVariableField(df);
}
}
else
{
rec.removeVariableField(df);
}
}
}
}
}
private boolean remapRecord(Record rec)
{
List<VariableField> fields = rec.getVariableFields();
List<VariableField> fToDelete = new ArrayList<VariableField>();
List<VariableField> fToInsert = new ArrayList<VariableField>();
boolean keepRecord = true;
for (VariableField field : fields)
{
String tag = field.getTag();
if (remapProperties.containsKey(tag))
{
if (Verifier.isControlNumberField(tag))
{
for (int i = 0; remapProperties.containsKey(tag+"_"+i); i++)
{
String remapString = remapProperties.getProperty(tag+"_"+i);
String mapParts[] = remapString.split("=>");
if (eval(mapParts[0], (ControlField)field, rec))
{
keepRecord &= process(mapParts[1], field, null, fToDelete, fToInsert, rec);
}
}
}
else
{
// List<Subfield> subfields = ((DataField)field).getSubfields();
List<Subfield> sfToDelete = new ArrayList<Subfield>();
for (int i = 0; remapProperties.containsKey(tag+"_"+i); i++)
{
String remapString = remapProperties.getProperty(tag+"_"+i);
String mapParts[] = remapString.split("=>");
if (eval(mapParts[0], (DataField)field, rec))
{
keepRecord &= process(mapParts[1], field, sfToDelete, fToDelete, fToInsert, rec);
}
}
if (sfToDelete.size() != 0)
{
for (Subfield sf : sfToDelete)
{
((DataField)field).removeSubfield(sf);
}
}
}
}
if (!keepRecord) break;
}
if (keepRecord && remapProperties.containsKey("once"))
{
// List<Subfield> sfToDelete = new ArrayList<Subfield>();
for (int i = 0; remapProperties.containsKey("once_"+i); i++)
{
String remapString = remapProperties.getProperty("once_"+i);
String mapParts[] = remapString.split("=>");
if (eval(mapParts[0], null, rec))
{
keepRecord &= process(mapParts[1], null, null, fToDelete, fToInsert, rec);
}
}
}
if (keepRecord && fToDelete.size() != 0)
{
for (VariableField field : fToDelete)
{
rec.removeVariableField(field);
}
}
if (keepRecord && fToInsert.size() != 0)
{
for (VariableField field : fToInsert)
{
if (field instanceof DataField)
{
int index = 0;
for (DataField df : (List<DataField>)rec.getDataFields())
{
if (df.getTag().compareTo(field.getTag()) >= 0)
break;
index++;
}
rec.getDataFields().add(index, (DataField)field);
}
else if (field.getTag().equals("001"))
{
rec.addVariableField(field);
}
else if (field instanceof ControlField)
{
int index = 0;
for (ControlField df : (List<ControlField>)rec.getControlFields())
{
if (df.getTag().compareTo(field.getTag()) >= 0)
break;
index++;
}
rec.getControlFields().add(index, (ControlField)field);
}
}
}
return(keepRecord);
}
private boolean eval(String conditional, VariableField field, Record record)
{
List<Subfield> subfields;
if (conditional.startsWith("true()"))
{
return(true);
}
else if (conditional.startsWith("not("))
{
String arg = getOneConditional(conditional);
if (arg != null)
{
return(!eval(arg, field, record));
}
}
else if (conditional.startsWith("indicatormatches("))
{
String args[] = getTwoArgs(conditional);
if (field != null && field instanceof DataField && args.length == 2 && args[0].length() == 1 && args[1].length() == 1)
{
char indicator1 = ((DataField)field).getIndicator1();
char indicator2 = ((DataField)field).getIndicator2();
if ((args[0].charAt(0) == '*' || args[0].charAt(0) == indicator1) &&
(args[1].charAt(0) == '*' || args[1].charAt(0) == indicator2))
{
return(true);
}
return(false);
}
}
else if (conditional.startsWith("subfieldmatches("))
{
String args[] = getTwoArgs(conditional);
if (field != null && field instanceof DataField && args.length == 2 && args[0].length() == 1)
{
subfields = ((DataField)field).getSubfields(args[0].charAt(0));
for (Subfield sf : subfields)
{
if (sf.getData().matches(args[1]))
return(true);
}
}
else if (field != null && field instanceof ControlField && args.length == 2)
{
if (((ControlField)field).getData().matches(args[1])) return(true);
}
}
else if (conditional.startsWith("subfieldcontains("))
{
String args[] = getTwoArgs(conditional);
if (field != null && field instanceof DataField && args.length == 2 && args[0].length() == 1)
{
subfields = ((DataField)field).getSubfields(args[0].charAt(0));
for (Subfield sf : subfields)
{
if (sf.getData().contains(args[1]))
return(true);
}
}
else if (field != null && field instanceof ControlField && args.length == 2)
{
if (((ControlField)field).getData().contains(args[1])) return(true);
}
}
else if (conditional.startsWith("subfieldexists("))
{
String arg = getOneArg(conditional);
if (field != null && field instanceof DataField && arg.length() == 1)
{
subfields = ((DataField)field).getSubfields(arg.charAt(0));
if (subfields.size() > 0) return(true);
}
else if (field != null && field instanceof ControlField)
{
return(true);
}
}
else if (conditional.startsWith("and("))
{
String args[] = getTwoConditionals(conditional);
if (args.length == 2)
{
return(eval(args[0], field, record) && eval(args[1], field, record));
}
}
else if (conditional.startsWith("or("))
{
String args[] = getTwoConditionals(conditional);
if (args.length == 2)
{
return(eval(args[0], field, record) || eval(args[1], field, record));
}
}
else if (conditional.startsWith("fieldexists("))
{
String args[] = getThreeArgs(conditional);
if (args.length == 3 && args[0].matches("[0-9][0-9][0-9]") && args[1].length() == 1)
{
for (VariableField vf : (List<VariableField>)record.getVariableFields(args[0]))
{
if (vf instanceof DataField)
{
for (Subfield sf : (List<Subfield>)((DataField)vf).getSubfields(args[1].charAt(0)))
{
if (sf.getData().equals(args[2]) || sf.getData().matches(args[2]))
return(true);
}
}
}
}
return(false);
}
return false;
}
private boolean process(String command, VariableField field, List<Subfield> sfToDelete, List<VariableField> fToDelete, List<VariableField> fToInsert, Record record)
{
List<Subfield> subfields;
if (command.startsWith("replace("))
{
String args[] = getThreeArgs(command);
if (field != null && field instanceof DataField && args.length == 3 && args[0].length() == 1)
{
subfields = ((DataField)field).getSubfields(args[0].charAt(0));
for (Subfield sf : subfields)
{
String newData = sf.getData().replaceAll(args[1], args[2]);
if (!newData.equals(sf.getData()))
{
sf.setData(newData);
}
}
}
else if (field != null && field instanceof ControlField && args.length == 3)
{
String newData = ((ControlField)field).getData().replaceAll(args[1], args[2]);
if (!newData.equals(((ControlField)field).getData()))
{
((ControlField)field).setData(newData);
}
}
}
else if (command.startsWith("append("))
{
String args[] = getTwoArgs(command);
if (field != null && field instanceof DataField && args.length == 2 && args[0].length() == 1)
{
subfields = ((DataField)field).getSubfields(args[0].charAt(0));
for (Subfield sf : subfields)
{
String newData = sf.getData() + args[1];
if (!newData.equals(sf.getData()))
{
sf.setData(newData);
}
}
}
else if (field != null && field instanceof ControlField && args.length == 2)
{
String newData = ((ControlField)field).getData() + args[1];
((ControlField)field).setData(newData);
}
}
else if (command.startsWith("prepend("))
{
String args[] = getTwoArgs(command);
if (field != null && field instanceof DataField && args.length == 2 && args[0].length() == 1)
{
subfields = ((DataField)field).getSubfields(args[0].charAt(0));
for (Subfield sf : subfields)
{
String newData = args[1] + sf.getData();
if (!newData.equals(sf.getData()))
{
sf.setData(newData);
}
}
}
else if (field != null && field instanceof ControlField && args.length == 2)
{
String newData = args[1] + ((ControlField)field).getData();
((ControlField)field).setData(newData);
}
}
else if (command.startsWith("deletesubfield("))
{
String arg = getOneArg(command);
if (field != null && field instanceof DataField && arg.length() == 1)
{
subfields = ((DataField)field).getSubfields(arg.charAt(0));
for (Subfield sf : subfields)
{
sfToDelete.add(sf);
}
}
else if (field != null && field instanceof ControlField)
{
fToDelete.add(field);
}
}
else if (command.startsWith("both("))
{
String args[] = getTwoConditionals(command);
@SuppressWarnings("unused")
boolean returncode = true;
if (args.length == 2)
{
returncode = process(args[0], field, sfToDelete, fToDelete, fToInsert, record);
returncode &= process(args[1], field, sfToDelete, fToDelete, fToInsert, record);
}
}
else if (command.startsWith("deletefield("))
{
fToDelete.add(field);
}
else if (command.startsWith("deleteotherfield("))
{
String args[] = getThreeArgs(command);
if (args.length == 3 && args[0].matches("[0-9][0-9][0-9]") && args[1].length() == 1)
{
for (VariableField vf : (List<VariableField>)record.getVariableFields(args[0]))
{
subfields = ((DataField)vf).getSubfields(args[1].charAt(0));
for (Subfield sf : subfields)
{
if (sf.getData().equals(args[2]) || sf.getData().matches(args[2]))
{
fToDelete.add(vf);
}
}
}
}
}
else if (command.startsWith("insertfield("))
{
String arg = getOneArg(command);
VariableField vf = createFieldFromString(arg, null);
if (vf != null) fToInsert.add(vf);
}
else if (command.startsWith("insertparameterizedfield("))
{
String args[] = getThreeArgs(command);
Pattern p = Pattern.compile(args[2]);
Matcher m;
if (field != null && field instanceof DataField)
{
m = p.matcher(((DataField)field).getSubfield(args[1].charAt(0)).getData());
}
else
{
m = p.matcher(((ControlField)field).getData());
}
VariableField vf;
if (m.matches())
{
vf = createFieldFromString(args[0], stringsFromMatcher(m));
}
else
{
vf = createFieldFromString(args[0], null);
}
if (vf != null) fToInsert.add(vf);
}
else if (command.startsWith("reject()"))
{
return(false);
}
return(true);
}
private String[] stringsFromMatcher(Matcher m)
{
String result[] = new String[m.groupCount()+1];
result[0] = m.group(0);
for (int i = 0; i < m.groupCount(); i++)
{
result[i+1] = m.group(i+1);
}
return result;
}
static Pattern newControlFieldDef = Pattern.compile("=?([0][0][0-9]) [ ]?(.*)");
static Pattern newDataFieldDef = Pattern.compile("=?([0-9][0-9][0-9]) [ ]?([0-9 \\|])([0-9 \\|])([$].*)");
static Pattern newSubfieldDef = Pattern.compile("[$]([a-z0-9])(([^$]|\\[$]|[$][{][0-9]*[}])*)(.*)");
static MarcFactory factory = null;
private VariableField createFieldFromString(String arg, String argmatches[])
{
Matcher mdf = newDataFieldDef.matcher(arg);
Matcher cdf = newControlFieldDef.matcher(arg);
if (factory == null) factory = MarcFactory.newInstance();
if (cdf.matches()) // make a control field
{
ControlField cf = factory.newControlField(mdf.group(1));
String data = cdf.group(2);
if (argmatches != null)
{
data = fillParameters(data, argmatches);
}
cf.setData(data);
return(cf);
}
else if (mdf.matches())
{
char ind1 = mdf.group(2).charAt(0);
if (ind1 < '0' || ind1 > '9') ind1 = ' ';
char ind2 = mdf.group(3).charAt(0);
if (ind2 < '0' || ind2 > '9') ind2 = ' ';
DataField df = factory.newDataField(mdf.group(1), ind1, ind2);
String sfData = mdf.group(4);
while (!sfData.isEmpty())
{
Matcher sm = newSubfieldDef.matcher(sfData);
if (sm.matches())
{
char code = sm.group(1).charAt(0);
String data = sm.group(2);
if (argmatches != null)
{
data = fillParameters(data, argmatches);
}
sfData = sm.group(4);
Subfield sf = factory.newSubfield(code, data);
df.addSubfield(sf);
}
}
return(df);
}
return null;
}
private String fillParameters(String data, String argmatches[])
{
for (int i = 0; i < argmatches.length; i++)
{
if (data.contains("${"+(i+1)+"}"))
{
data = data.replaceAll("[$][{]"+(i+1)+"[}]", argmatches[i+1]);
}
}
return data;
}
static Pattern oneArg = Pattern.compile("[a-z]*[(]\"((\\\"|[^\"])*)\"[ ]*[)]");
private String getOneArg(String conditional)
{
Matcher m = oneArg.matcher(conditional.trim());
if (m.matches())
{
return(m.group(1).replaceAll("\\\"", "\""));
}
return null;
}
static Pattern twoArgs = Pattern.compile("[a-z]*[(]\"((\\\"|[^\"])*)\",[ ]*\"((\\\"|[^\"])*)\"[)]");
private String[] getTwoArgs(String conditional)
{
Matcher m = twoArgs.matcher(conditional.trim());
if (m.matches())
{
String result[] = new String[]{m.group(1).replaceAll("\\\"", "\""), m.group(3).replaceAll("\\\"", "\"")};
return(result);
}
return null;
}
static Pattern threeArgs = Pattern.compile("[a-z]*[(][ ]*\"((\\\"|[^\"])*)\",[ ]*\"((\\\"|[^\"])*)\",[ ]*\"((\\\"|[^\"])*)\"[)]");
private String[] getThreeArgs(String conditional)
{
Matcher m = threeArgs.matcher(conditional.trim());
if (m.matches())
{
String result[] = new String[]{m.group(1).replaceAll("\\\"", "\""), m.group(3).replaceAll("\\\"", "\""), m.group(5).replaceAll("\\\"", "\"")};
return(result);
}
return null;
}
static Pattern twoConditionals = Pattern.compile("[a-z]*[(]([a-z]*[(].*[)]),[ ]*([a-z]*[(].*[)])[)]");
private String[] getTwoConditionals(String conditional)
{
Matcher m = twoConditionals.matcher(conditional.trim());
if (m.matches())
{
String result[] = new String[]{m.group(1), m.group(2)};
return(result);
}
return null;
}
static Pattern oneConditional = Pattern.compile("[a-z]*[(]([a-z]*[(].*[)])[)]");
private String getOneConditional(String conditional)
{
Matcher m = oneConditional.matcher(conditional.trim());
if (m.matches())
{
String result = m.group(1);
return(result);
}
return null;
}
// static Pattern argAndConditional = Pattern.compile("[a-z]*[(][ ]*\"((\\\"|[^\"])*)\",[ ]*([a-z]*[(].*[)])[)]");
// private String[] getArgAndConditional(String conditional)
// {
// Matcher m = argAndConditional.matcher(conditional.trim());
// if (m.matches())
// {
// String result[] = new String[]{m.group(1), m.group(2)};
// return(result);
// }
// return null;
// }
}