/* Copyright 2013 Fabian Steeg. Licensed under the Eclipse Public License 1.0 */
package org.lobid.lodmill;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.SortedSet;
import java.util.TreeSet;
import org.culturegraph.mf.framework.DefaultStreamPipe;
import org.culturegraph.mf.framework.ObjectReceiver;
import org.culturegraph.mf.morph.Metamorph;
import org.culturegraph.mf.morph.MorphErrorHandler;
import org.culturegraph.mf.stream.pipe.ObjectTee;
import org.culturegraph.mf.stream.reader.Reader;
import org.culturegraph.mf.stream.sink.ObjectWriter;
import org.culturegraph.mf.stream.source.FileOpener;
import org.junit.Assert;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Ingest the ZVDD MARC-XML export.
*
* Run as Java application to use metaflow definitions; run as JUnit test to
* print some stats, transform the fields, and output results as N-Triples.
*
* @author Fabian Steeg (fsteeg)
*/
@SuppressWarnings("javadoc")
public abstract class AbstractIngestTests {
private static final Logger LOG =
LoggerFactory.getLogger(AbstractIngestTests.class);
private final String dataFile;
private final Reader reader;
protected Metamorph metamorph;
private final String statsMorphFile;
public AbstractIngestTests(final String dataFile, final String morphFile,
final String statsMorphFile, final Reader reader) {
this.dataFile = dataFile;
this.statsMorphFile = statsMorphFile;
metamorph = new Metamorph(Thread.currentThread().getContextClassLoader()
.getResourceAsStream(morphFile));
this.reader = reader;
}
/**
* Tests if the generated triples equals the triples in the test file
*
* @param testF<ileName The test file name, residing in the resource folder
* @param generatedFileName The to be generated file name .
* @param dsp A DefaultStreampipe
*/
public void triples(final String testFileName, final String generatedFileName,
final DefaultStreamPipe<ObjectReceiver<String>> dsp) {
setUpErrorHandler(metamorph);
final File generatedFile = new File(generatedFileName);
process(dsp, generatedFile);
File testFile;
try {
testFile = new File(Thread.currentThread().getContextClassLoader()
.getResource(testFileName).toURI());
compareFilesDefaultingBNodes(generatedFile, testFile);
} catch (URISyntaxException e) {
LOG.error(e.getMessage(), e);
}
generatedFile.deleteOnExit();
}
private static SortedSet<String> linesInFileToSetDefaultingBNodes(
final File file) {
SortedSet<String> set = null;
try (Scanner scanner = new Scanner(file)) {
set = asSet(scanner);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
}
return set;
}
/**
* Tests if two files are of equal content. As BNodes are not fix they are not
* comparable and thus they are defaulted to "_:bnodeDummy" to make the files
* comparable anyhow.
*
* @param generatedFile the actually generated file
* @param testFile the file which defines how the generatedFile should look
* like
*/
public static void compareFilesDefaultingBNodes(final File generatedFile,
final File testFile) {
assertSetSize(linesInFileToSetDefaultingBNodes(testFile),
linesInFileToSetDefaultingBNodes(generatedFile));
assertSetElements(linesInFileToSetDefaultingBNodes(generatedFile),
linesInFileToSetDefaultingBNodes(testFile));
}
/**
* Tests if content of one file is not part of the second file.
*
* @param generatedFile the actually generated file
* @param testFile the file which musn't have any lines also part of the
* generatedFile
*/
public static void checkIfNoIntersection(final File generatedFile,
final File testFile) {
assertSetNoIntersection(linesInFileToSetDefaultingBNodes(testFile),
linesInFileToSetDefaultingBNodes(generatedFile));
}
private static void assertSetNoIntersection(
final SortedSet<String> notExpectedSet,
final SortedSet<String> actualSet) {
final Iterator<String> notExpectedIterator = notExpectedSet.iterator();
boolean assertionError = false;
for (int i = 0; i < notExpectedSet.size(); i++) {
String notExpected = notExpectedIterator.next();
if (actualSet.contains(notExpected)) {
LOG.error("Not expected: " + notExpected + " to be part of the data");
assertionError = true;
}
}
if (assertionError) {
throw new AssertionError();
}
}
private static void assertSetSize(final SortedSet<String> expectedSet,
final SortedSet<String> actualSet) {
if (expectedSet.size() != actualSet.size()) {
final SortedSet<String> missingSet = new TreeSet<>(expectedSet);
missingSet.removeAll(actualSet);
LOG.error("Missing expected result set entries: " + missingSet);
}
Assert.assertEquals(expectedSet.size(), actualSet.size());
}
private static SortedSet<String> asSet(final Scanner scanner) {
final SortedSet<String> set = new TreeSet<>();
while (scanner.hasNextLine()) {
String actual = scanner.nextLine();
if (!actual.isEmpty()) {
actual =
actual.replaceFirst("(^_:\\w* )|( _:\\w* ?.$)", "_:bnodeDummy ");
set.add(actual);
}
}
return set;
}
private static void assertSetElements(final SortedSet<String> expectedSet,
final SortedSet<String> actualSet) {
final Iterator<String> expectedIterator = expectedSet.iterator();
final Iterator<String> actualIterator = actualSet.iterator();
for (int i = 0; i < expectedSet.size(); i++) {
String expected = expectedIterator.next();
String actual = actualIterator.next();
if (!expected.equals(actual)) {
LOG.error("Expected: " + expected + "\n but was:" + actual);
}
}
Assert.assertEquals(expectedSet, actualSet);
}
public void dot(final String fname) {
setUpErrorHandler(metamorph);
final File file = new File(fname);
process(new PipeEncodeDot(), file);
Assert.assertTrue(file.exists());
file.deleteOnExit();
}
public void stats(final String fileName) throws IOException {
final File file = new File(fileName);
metamorph = new Metamorph(Thread.currentThread().getContextClassLoader()
.getResourceAsStream(statsMorphFile));
setUpErrorHandler(metamorph);
final Stats stats = new Stats();
reader.setReceiver(metamorph).setReceiver(stats);
processFile();
reader.closeStream();
final List<Entry<String, Integer>> entries =
stats.sortedByValuesDescending();
Stats.writeTextileMappingTable(entries, new ArrayList<>(), file);
Assert.assertTrue("We should have some values", entries.size() > 1);
Assert.assertTrue("Values should have descending frequency", entries.get(0)
.getValue() >= entries.get(entries.size() - 1).getValue());
Assert.assertTrue("Mapping table should exist", file.exists());
file.deleteOnExit();
}
protected void process(
final DefaultStreamPipe<ObjectReceiver<String>> encoder,
final File file) {
final ObjectTee<String> tee = outputTee(file);
reader.setReceiver(metamorph).setReceiver(encoder).setReceiver(tee);
processFile();
reader.closeStream();
Assert.assertTrue("File should exist", file.exists());
Assert.assertTrue("File should not be empty", file.length() > 0);
}
private void processFile() {
FileOpener fileOpener = null;
fileOpener = new FileOpener();
fileOpener.setReceiver(reader);
fileOpener.process(dataFile);
}
private static ObjectTee<String> outputTee(final File triples) {
final ObjectTee<String> tee = new ObjectTee<>();
tee.addReceiver(new ObjectWriter<String>("stdout"));
tee.addReceiver(new ObjectWriter<String>(triples.getAbsolutePath()));
return tee;
}
protected static void setUpErrorHandler(final Metamorph metamorph) {
metamorph.setErrorHandler(new MorphErrorHandler() {
@Override
public void error(final Exception exception) {
LOG.error(exception.getMessage(), exception);
}
});
}
public static File concatenateGeneratedFilesIntoOneFile(String targetPath,
String testFilename) throws FileNotFoundException, IOException {
StringBuilder triples = new StringBuilder();
concatenateGeneratedFilesIntoOneString(targetPath, triples);
File testFile = new File(testFilename);
try (FileOutputStream fos = new FileOutputStream(testFile)) {
if (triples.length() > 1) {
fos.write(triples.toString().getBytes());
fos.close();
}
}
return testFile;
}
/**
*
* @param targetPath the main path of the
* @param subPath
* @param testFilename
* @return the File with the content of all teh other files
* @throws FileNotFoundException
* @throws IOException
*/
private static StringBuilder concatenateGeneratedFilesIntoOneString(
String targetPath, StringBuilder triples)
throws FileNotFoundException, IOException {
File parentPath = new File(targetPath + "/");
for (String filename : parentPath.list()) {
File newFile = new File(parentPath + "/" + filename);
if (newFile.isDirectory())
concatenateGeneratedFilesIntoOneString(
parentPath.getPath() + "/" + filename, triples);
else
triples.append(getFileContent(newFile));
}
return triples;
}
private static String getFileContent(File file) {
StringBuilder ntriples = new StringBuilder();
try (Scanner scanner = new Scanner(file)) {
while (scanner.hasNextLine()) {
final String actual = scanner.nextLine();
if (!actual.isEmpty()) {
ntriples.append(actual + "\n");
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}
return ntriples.toString();
}
}