/* Copyright 2014 hbz, Pascal Christoph.
* Licensed under the Eclipse Public License 1.0 */
package org.lobid.lodmill;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import org.apache.commons.io.FileUtils;
import org.culturegraph.mf.morph.Metamorph;
import org.culturegraph.mf.runner.Flux;
import org.culturegraph.mf.stream.converter.xml.XmlDecoder;
import org.culturegraph.mf.stream.pipe.StreamTee;
import org.culturegraph.mf.stream.source.FileOpener;
import org.junit.Test;
/**
* Transform hbz01 MAB2 catalog data. Using wikidata concordance table (which
* was build with {@link WikidataGeoJson2Mysql}).The port is deliberately
* hardwired to 3306. Skip this test if you have already a running daemon on
* port 3306.
*
* @author Pascal Christoph (dr0i)
*
*/
@SuppressWarnings("javadoc")
public final class MabXmlWikidata2lobidIntegrationTest {
private static final String TARGET_PATH = "tmp";
private static final String TEST_FILENAME = "hbz01.nt";
private static final String DB_PROTOCOL_AND_ADDRESS =
"jdbc:mysql://localhost:3306/";
private static final String DB_PASSWORD = "tzSblDEUGC1XhJB7";
private static final String DB_DBNAME = "lobid";
private static PreparedStatement ps;
private static ResultSet res;
@SuppressWarnings("static-method")
@Test
public void testFlow() throws IOException, URISyntaxException {
RdfModelMysqlWriter modelWriter = buildFlow();
final File testFile = dumpMysqlToFile(modelWriter);
// positive test
AbstractIngestTests.compareFilesDefaultingBNodes(testFile,
new File(Thread.currentThread().getContextClassLoader()
.getResource(TEST_FILENAME).toURI()));
// negative test
AbstractIngestTests.checkIfNoIntersection(testFile,
new File(Thread.currentThread().getContextClassLoader()
.getResource("hbz01negatives.ttl").toURI()));
testFile.deleteOnExit();
}
private static RdfModelMysqlWriter buildFlow() {
// hbz catalog transformation
final FileOpener opener = new FileOpener();
opener.setCompression("BZIP2");
TarReader tarReader = new TarReader();
final XmlDecoder xmlDecoder = new XmlDecoder();
XmlTee xmlTee = new XmlTee();
final MabXmlHandler handler = new MabXmlHandler();
final Metamorph morph =
new Metamorph("src/main/resources/morph-hbz01-to-lobid.xml");
final Triples2RdfModel triple2model = new Triples2RdfModel();
triple2model.setInput("N-TRIPLE");
RdfModelMysqlWriter modelWriter = createModelWriter();
triple2model.setReceiver(modelWriter);
StreamTee streamTee = new StreamTee();
final Stats stats = new Stats();
stats.setFilename("tmp.stats.csv");
streamTee.addReceiver(stats);
StreamTee streamTeeGeo = new StreamTee();
streamTee.addReceiver(streamTeeGeo);
PipeEncodeTriples encoder = new PipeEncodeTriples();
streamTeeGeo.addReceiver(encoder);
encoder.setReceiver(triple2model);
XmlEntitySplitter xmlEntitySplitter = new XmlEntitySplitter();
xmlEntitySplitter.setEntityName("ListRecords");
xmlEntitySplitter.setTopLevelElement("OAI-PMH");
XmlFilenameWriter xmlFilenameWriter = createXmlFilenameWriter();
xmlTee.setReceiver(handler).setReceiver(morph).setReceiver(streamTee);
xmlTee.addReceiver(xmlEntitySplitter);
xmlEntitySplitter.setReceiver(xmlFilenameWriter);
opener.setReceiver(tarReader).setReceiver(xmlDecoder).setReceiver(xmlTee);
opener.process(
new File("src/test/resources/hbz01XmlClobs.tar.bz2").getAbsolutePath());
opener.closeStream();
return modelWriter;
}
private static File dumpMysqlToFile(RdfModelMysqlWriter modelWriter)
throws IOException {
final File testFile = new File(TEST_FILENAME);
StringBuilder sb = new StringBuilder();
try {
ps = modelWriter.conn.prepareStatement("SELECT * FROM resourcesAll ");
res = ps.executeQuery();
while (res.next()) {
sb.append(res.getString(2));
}
} catch (SQLException e) {
e.printStackTrace();
}
FileUtils.writeStringToFile(testFile, sb.toString(), false);
return testFile;
}
private static XmlFilenameWriter createXmlFilenameWriter() {
XmlFilenameWriter xmlFilenameWriter = new XmlFilenameWriter();
xmlFilenameWriter.setStartIndex(2);
xmlFilenameWriter.setEndIndex(7);
xmlFilenameWriter.setTarget(TARGET_PATH + "/xml");
xmlFilenameWriter.setProperty(
"/OAI-PMH/ListRecords/record/metadata/record/datafield[@tag='001']/subfield[@code='a']");
xmlFilenameWriter.setCompression("bz2");
xmlFilenameWriter.setFileSuffix("");
xmlFilenameWriter.setEncoding("utf8");
return xmlFilenameWriter;
}
private static RdfModelMysqlWriter createModelWriter() {
RdfModelMysqlWriter modelWriter = new RdfModelMysqlWriter();
modelWriter.setProperty("http://purl.org/lobid/lv#hbzID");
modelWriter.setDbname(DB_DBNAME);
modelWriter.setTablename("resourcesAll");
modelWriter.setUsername("debian-sys-maint");
modelWriter.setPassword(DB_PASSWORD);
modelWriter.setDbProtocolAndAdress(DB_PROTOCOL_AND_ADDRESS);
return modelWriter;
}
@SuppressWarnings("static-method")
@Test
public void testFlux() throws URISyntaxException {
File fluxFile = new File(Thread.currentThread().getContextClassLoader()
.getResource("hbz01-to-lobid.flux").toURI());
try {
Flux.main(new String[] { fluxFile.getAbsolutePath() });
} catch (Exception e) {
System.err.println(e);
}
}
}