/* Copyright 2014 hbz, Pascal Christoph. * Licensed under the Eclipse Public License 1.0 */ package org.lobid.lodmill; import java.io.File; import java.net.URISyntaxException; import org.culturegraph.mf.morph.Metamorph; import org.culturegraph.mf.runner.Flux; import org.culturegraph.mf.stream.converter.xml.XmlDecoder; import org.culturegraph.mf.stream.pipe.StreamTee; import org.junit.Test; /** * Gets the data out of an elasticsearch index. Sink is a MySQL DB. The port is * deliberately hardwired to 3306. Skip this test if you have already a running * daemon on port 3306. * * @author Pascal Christoph (dr0i) * */ @SuppressWarnings("javadoc") public final class MabXmlElasticsearch2lobidIntegrationOnlineTest { private static final String TARGET_PATH = "tmp"; private static final String DB_PROTOCOL_AND_ADDRESS = "jdbc:mysql://localhost:3306/"; private static final String DB_PASSWORD = "tzSblDEUGC1XhJB7"; private static final String DB_DBNAME = "lobid"; @SuppressWarnings("static-method") @Test public void testFlow() { // hbz catalog transformation final ElasticsearchReader opener = new ElasticsearchReader(); opener.setClustername("quaoar"); opener.setHostname("193.30.112.171"); opener.setIndexname("hbz01"); opener.setShards("0,1,2,3,4"); final XmlDecoder xmlDecoder = new XmlDecoder(); XmlTee xmlTee = new XmlTee(); final MabXmlHandler handler = new MabXmlHandler(); final Metamorph morph = new Metamorph("src/main/resources/morph-hbz01-to-lobid.xml"); final Triples2RdfModel triple2model = new Triples2RdfModel(); triple2model.setInput("N-TRIPLE"); RdfModelMysqlWriter modelWriter = createModelWriter(); triple2model.setReceiver(modelWriter); StreamTee streamTee = new StreamTee(); final Stats stats = new Stats(); stats.setFilename("tmp.stats.csv"); streamTee.addReceiver(stats); StreamTee streamTeeGeo = new StreamTee(); streamTee.addReceiver(streamTeeGeo); PipeEncodeTriples encoder = new PipeEncodeTriples(); streamTeeGeo.addReceiver(encoder); encoder.setReceiver(triple2model); XmlEntitySplitter xmlEntitySplitter = new XmlEntitySplitter(); xmlEntitySplitter.setEntityName("ListRecords"); XmlFilenameWriter xmlFilenameWriter = createXmlFilenameWriter(); xmlTee.setReceiver(handler).setReceiver(morph).setReceiver(streamTee); xmlTee.addReceiver(xmlEntitySplitter); xmlEntitySplitter.setReceiver(xmlFilenameWriter); // StreamToReader streamToString = new StreamToReader(); opener.setReceiver(xmlDecoder).setReceiver(xmlTee); opener.process(""); opener.closeStream(); } private static XmlFilenameWriter createXmlFilenameWriter() { XmlFilenameWriter xmlFilenameWriter = new XmlFilenameWriter(); xmlFilenameWriter.setStartIndex(2); xmlFilenameWriter.setEndIndex(7); xmlFilenameWriter.setTarget(TARGET_PATH + "/xml"); xmlFilenameWriter.setProperty( "/OAI-PMH/ListRecords/record/metadata/record/datafield[@tag='001']/subfield[@code='a']"); xmlFilenameWriter.setCompression("bz2"); xmlFilenameWriter.setFileSuffix(""); xmlFilenameWriter.setEncoding("utf8"); return xmlFilenameWriter; } private static RdfModelMysqlWriter createModelWriter() { RdfModelMysqlWriter modelWriter = new RdfModelMysqlWriter(); modelWriter.setProperty("http://purl.org/lobid/lv#hbzID"); modelWriter.setDbname(DB_DBNAME); modelWriter.setTablename("resources"); modelWriter.setUsername("debian-sys-maint"); modelWriter.setPassword(DB_PASSWORD); modelWriter.setDbProtocolAndAdress(DB_PROTOCOL_AND_ADDRESS); return modelWriter; } @SuppressWarnings("static-method") @Test public void testFlux() throws URISyntaxException { File fluxFile = new File(Thread.currentThread().getContextClassLoader() .getResource("hbz01ES-to-lobid.flux").toURI()); try { Flux.main(new String[] { fluxFile.getAbsolutePath() }); } catch (Exception e) { System.err.println(e); } } }