package org.solrmarc.index;
import static org.junit.Assert.*;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
import org.marc4j.MarcReader;
import org.marc4j.MarcReaderConfig;
import org.marc4j.MarcReaderFactory;
import org.marc4j.marc.Record;
import org.solrmarc.index.indexer.AbstractValueIndexer;
import org.solrmarc.index.indexer.MultiValueIndexer;
import org.solrmarc.index.indexer.ValueIndexerFactory;
import org.solrmarc.tools.PropertyUtils;
@RunWith(Parameterized.class)
public class ParameterizedIndexTest
{
static int cnt = 0;
String testNumber;
String config;
String recordFilename;
String indexSpec;
String expectedValue;
Pattern parameterParse = Pattern.compile("([^(]*)([(][^)]*[)])");
Pattern indexSpecParse = Pattern.compile("(([^,]*[.]properties)([ ]*,[ ]*(.*[.]properties))*)[(]([^)]*)[)]");
Pattern singleSpecParse = Pattern.compile("(([-A-Za-z0-9_]*)[ ]*=[ ]*)?(.*)");
static String dataDirectory;
static String dataFile;
private static ValueIndexerFactory factory;
static
{
factory = ValueIndexerFactory.initialize(new String[]{System.getProperty("test.data.dir", "test/data")});
}
public ParameterizedIndexTest(String config, String recordFilename, String indexSpec, String expectedValue)
{
this.testNumber = ""+(cnt++);
this.config = config;
this.recordFilename = recordFilename;
this.indexSpec = indexSpec;
this.expectedValue = expectedValue;
}
@Test
/**
* for each line specified in the test file
* (see org.solrmarc.index.indexValues javadoc below)
* run the indicated test data file through MarcMappingOnly, which will get the
* marc to solr mappings specified in yourSite_index.properties file, and
* look for the indicated solr field value in the indicated solr field.
*/
public void verifyIndexingResults() throws Exception
{
boolean ordered = false;
Properties readerProps = setReaderProperties(factory, config);
MarcReaderConfig readerConfig = new MarcReaderConfig(readerProps);
Record record = getRecord(factory, readerConfig, recordFilename);
Collection<MultiValueIndexer> indexers = createIndexer(factory, indexSpec);
Collection<String> result;
if (indexers.size() == 1) result = indexers.iterator().next().getFieldData(record);
else
{
result = new ArrayList<String>();
for (MultiValueIndexer indexer : indexers)
{
result.addAll(indexer.getFieldData(record));
}
}
String expected[];
if (expectedValue.startsWith("*ordered*"))
{
ordered = true;
expectedValue = expectedValue.substring(9).trim();
}
if (expectedValue.length() > 0)
expected = expectedValue.split("[|]");
else
expected = new String[0];
if (ordered)
assertThat(result, contains(expected));
else
assertThat(result, containsInAnyOrder(expected));
System.out.println("Test " + testNumber + " : " + config + " : " + recordFilename + " : " + indexSpec + " --> " + expectedValue);
}
private Collection<MultiValueIndexer> createIndexer(ValueIndexerFactory factory, String indexSpec)
{
MultiValueIndexer indexer = null;
Collection<MultiValueIndexer> multiResult = new ArrayList<MultiValueIndexer>();
Matcher indexSpecMatcher = indexSpecParse.matcher(indexSpec);
if (indexSpecMatcher.matches())
{
String indexSpecFile = indexSpecMatcher.group(1);
String specName = indexSpecMatcher.group(5);
String[] indexSpecs = indexSpecFile.split("[ ]*,[ ]*");
File[] specFiles = new File[indexSpecs.length];
int i = 0;
for (String ixSpec : indexSpecs)
{
File specFile = new File(indexSpec);
if (!specFile.isAbsolute()) specFile = PropertyUtils.findFirstExistingFile(factory.getHomeDirs(), ixSpec);
specFiles[i++] = specFile;
}
try
{
List<AbstractValueIndexer<?>> indexers = factory.createValueIndexers(specFiles);
for (AbstractValueIndexer<?> ix : indexers)
{
for (String fn : ix.getSolrFieldNames())
{
if (fn.equals(specName))
{
indexer = (MultiValueIndexer) ix;
multiResult.add(indexer);
}
}
}
}
catch (IllegalAccessException | InstantiationException | IOException e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
return(multiResult);
}
else // its a single spec
{
Matcher singleSpecMatcher = singleSpecParse.matcher(indexSpec);
if (singleSpecMatcher.matches())
{
String indexname = singleSpecMatcher.group(2);
String fullSpec = singleSpecMatcher.group(3);
if (indexname == null || indexname.length() == 0)
{
indexname = "test_"+testNumber;
}
indexer = factory.createValueIndexer(indexname, fullSpec);
return(Collections.singletonList(indexer));
}
}
return(null);
}
private Properties setReaderProperties(ValueIndexerFactory factory, String config) throws IOException
{
Matcher configMatcher = parameterParse.matcher(config);
String configFile;
String configAdditionStr = null;
if (config.length() == 0)
{
configFile = "marcreader.properties";
}
else if (config.matches("[-A-Za-z0-9_.]*[ ]*=.*"))
{
configFile = "marcreader.properties";
configAdditionStr = config;
}
else if (configMatcher.matches())
{
configFile = configMatcher.group(1);
if (configFile.length() == 0)
{
configFile = "marcreader.properties";
}
configAdditionStr = configMatcher.group(2);
}
else
{
configFile = config;
}
if (configAdditionStr == null) configAdditionStr = "";
String[] configAdditions = configAdditionStr.split(",");
Properties readerProps = new Properties();
String propertyFileAsURLStr = PropertyUtils.getPropertyFileAbsoluteURL(factory.getHomeDirs(), configFile, false, null);
readerProps.load(PropertyUtils.getPropertyFileInputStream(propertyFileAsURLStr));
for (String configAddition : configAdditions)
{
String[] propParts = configAddition.split("=");
if (propParts.length == 2)
{
readerProps.setProperty(propParts[0].trim(), propParts[1].trim());
}
}
return readerProps;
}
private Record getRecord(ValueIndexerFactory factory, MarcReaderConfig readerConfig, String recordFilename)
{
String recordToLookAt = null; // null means just get the first record from the named file
if (recordFilename.matches("[^(]*[(][^)]*[)]"))
{
String recParts[] = recordFilename.split("[()]");
recordFilename = recParts[0];
recordToLookAt = recParts[1];
}
String fullPath = dataDirectory + File.separator + "records" + File.separator + recordFilename;
MarcReader reader;
try
{
reader = MarcReaderFactory.makeReader(readerConfig, factory.getHomeDirs(), fullPath);
}
catch (IOException e)
{
throw new IllegalArgumentException("Fatal error: Exception opening InputStream" + fullPath);
}
Record record = null;
while (reader.hasNext())
{
record = reader.next();
if (recordToLookAt == null || record.getControlNumber().equals(recordToLookAt))
break;
}
return record;
}
@Parameters
/**
* reads in the file at test.data.path/test.data.file (usually
* test.data.path = yourSiteDirectory/test/data
* test.data.file = indextest.txt
* and puts the tests indicated there into a collection of arrays, where
* each item in the collection has this structure:
* it[0] = sequentially increasing ordinal number of test
* it[1] = config.properties file or + indicating to simply use the default config values
* it[2] = name of file containing marc records to be indexed for test
* it[3] = name of solr field to be checked in resulting solr doc
* it[4] = value expected in solr field
*/
public static Collection<String[]> indexValues()
{
dataDirectory = System.getProperty("test.data.dir", "test/data");
dataFile = System.getProperty("test.data.file", "indextest.txt");
String fullIndexTestFilename = dataDirectory + File.separator + dataFile;
File file = new File(fullIndexTestFilename);
List<String[]> result = new LinkedList<String[]>();
BufferedReader rIn = null;
try
{
rIn = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
}
catch (UnsupportedEncodingException e)
{
}
catch (FileNotFoundException e)
{
dataFile = null;
}
if (dataFile != null)
{
try {
String line;
String[] testdata = new String[]{"","","",""};
while (( line = rIn.readLine()) != null)
{
if (line.startsWith("#") || line.trim().length() == 0) continue;
if (line.startsWith("readerProps:"))
{
testdata[0] = line.substring("readerProps:".length()).trim();
}
else if (line.startsWith("record:"))
{
testdata[1] = line.substring("record:".length()).trim();
}
else if (line.startsWith("indexSpec:"))
{
testdata[2] = line.substring("indexSpec:".length()).trim();
}
else if (line.startsWith("expect:"))
{
testdata[3] = line.substring("expect:".length()).trim();
result.add(testdata);
testdata = new String[]{"","","",""};
}
}
rIn.close();
}
catch(IOException ioe)
{}
return(result);
}
String[] testdata = new String[]
{
"",
"specTestRecs.mrc(u8)",
"id = 001, first",
"u8"
};
result.add(testdata);
testdata = new String[]
{
"",
"1156470.mrc",
"035a, map(\"[(][Oo][Cc][Oo][Ll][Cc][)][^0-9]*[0]*([0-9]+)=>$1\",\"ocm[0]*([0-9]+)[ ]*[0-9]*=>$1\",\"ocn[0]*([0-9]+).*=>$1\", \"on[0]*([0-9]+).*=>$1\")",
"12275114"
};
result.add(testdata);
testdata = new String[]
{
"",
"1156470.mrc",
"oclc_pattern_map_test.properties(oclc_num)",
"12275114"
};
result.add(testdata);
testdata = new String[]
{
"",
"1156470.mrc",
"oclc_p_num = 035a, oclc_num_pattern_map.properties(oclc_num)",
"12275114"
};
result.add(testdata);
testdata = new String[]
{
"",
"specTestRecs.xml",
"subject_facet =600[a-z]:610[a-z]:611[a-z]:630[a-z]:650[a-z]:651[a-z]:655[a-z]:690[a-z], join (\" -- \"), cleanEnd, unique",
"Translating and interpreting -- Soviet Union -- History|Russian literature -- Translations from foreign languages -- History and criticism"
};
result.add(testdata);
testdata = new String[]
{
"",
"title_k.mrc",
"custom, getSortableTitle",
"morton hoffman papers"
};
result.add(testdata);
testdata = new String[]
{
"marc.permissive=true,marc.to_utf_8=true,marc.unicode_normalize=C",
"u4.mrc(u4)",
"245a",
// "The princes of Hà-tiên (1682-1867) /"
"The princes of H\u00E0-ti\u00EAn (1682-1867) /"
};
result.add(testdata);
testdata = new String[]
{
"marc.permissive=true,marc.to_utf_8=true,marc.unicode_normalize=false",
"u4.mrc(u4)",
"245a",
// "The princes of Hà-tiên (1682-1867) /"
"The princes of Ha\u0300-tie\u0302n (1682-1867) /"
};
result.add(testdata);
return(result);
}
}