package org.xbib.tools; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.xbib.common.settings.Settings; import org.xbib.io.Connection; import org.xbib.io.Session; import org.xbib.io.StringPacket; import org.xbib.io.archive.file.Finder; import org.xbib.io.archive.tar2.TarConnectionFactory; import org.xbib.io.archive.tar2.TarSession; import org.xbib.metric.MeterMetric; import org.xbib.util.concurrent.AbstractWorker; import org.xbib.util.concurrent.ForkJoinPipeline; import org.xbib.util.concurrent.URIWorkerRequest; import org.xbib.util.concurrent.Worker; import org.xbib.util.concurrent.WorkerProvider; import java.io.IOException; import java.io.Reader; import java.io.Writer; import java.net.InetAddress; import java.net.NetworkInterface; import java.net.URI; import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; import java.util.Queue; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import static org.xbib.common.settings.Settings.settingsBuilder; public abstract class Converter<P extends Worker<URIWorkerRequest>> extends AbstractWorker<URIWorkerRequest> implements CommandLineInterpreter { private final static Logger logger = LogManager.getLogger(Converter.class.getName()); protected Reader reader; protected Writer writer; protected static Settings settings; protected static Session<StringPacket> session; protected ForkJoinPipeline<URIWorkerRequest, Worker<URIWorkerRequest>> pipeline; @Override public Converter<P> reader(Reader reader) { this.reader = reader; setSettings(settingsBuilder().loadFromReader(reader).build()); return this; } @Override public Converter<P> writer(Writer writer) { this.writer = writer; return this; } @Override public void run() throws Exception { try { setQueue(newQueue()); logger.info("preparing sink"); prepareSink(); logger.info("preparing source"); prepareSource(); int concurrency = settings.getAsInt("concurrency", 1); logger.info("preparing pipeline"); pipeline = new ForkJoinPipeline<URIWorkerRequest, Worker<URIWorkerRequest>>() .setConcurrency(concurrency) .setQueue(getQueue()) .setProvider(provider()) .prepare(); logger.info("executing pipeline with {} workers", concurrency); pipeline.execute().waitFor(); logger.info("execution completed"); } catch (Throwable t) { logger.error(t.getMessage(), t); } finally { cleanup(); if (pipeline != null) { pipeline.shutdown(); for (Worker worker : pipeline.getWorkers()) { writeMetrics(worker.getMetric(), writer); } } } } @Override public void close() throws IOException { logger.info("worker close (no op)"); } @Override public void newRequest(Worker<URIWorkerRequest> worker, URIWorkerRequest request) { try { URI uri = request.get(); logger.info("processing URI {}", uri); process(uri); } catch (Throwable ex) { logger.error(request.get() + ": error while processing input: " + ex.getMessage(), ex); } } public void setSettings(Settings newSettings) { settings = newSettings; } protected void prepareSink() throws IOException { } protected void prepareSource() throws IOException { if (settings.get("runhost") != null) { logger.info("preparing input queue only on runhost={}", settings.get("runhost")); boolean found = false; Enumeration<NetworkInterface> nets = NetworkInterface.getNetworkInterfaces(); for (NetworkInterface netint : Collections.list(nets)) { Enumeration<InetAddress> inetAddresses = netint.getInetAddresses(); for (InetAddress addr : Collections.list(inetAddresses)) { if (addr.getHostName().equals(settings.get("runhost"))) { found = true; } } } if (!found) { logger.error("configured run host {} not found, exiting", settings.get("runhost")); System.exit(1); } } if (settings.getAsArray("uri").length > 0) { logger.info("preparing input queue from uri array={}", Arrays.asList(settings.getAsArray("uri"))); String[] inputs = settings.getAsArray("uri"); setQueue(new ArrayBlockingQueue<URIWorkerRequest>(inputs.length, true)); for (String input : inputs) { URIWorkerRequest request = new URIWorkerRequest(); request.set(URI.create(input)); getQueue().offer(request); } } else if (settings.get("uri") != null) { logger.info("preparing input queue from uri={}", settings.get("uri")); String input = settings.get("uri"); URIWorkerRequest element = new URIWorkerRequest(); element.set(URI.create(input)); getQueue().offer(element); // parallel URI into queue? if (settings.getAsBoolean("parallel", false)) { for (int i = 1; i < settings.getAsInt("concurrency", 1); i++) { element = new URIWorkerRequest(); element.set(URI.create(input)); getQueue().offer(element); } } } else if (settings.get("path") != null) { logger.info("preparing input queue from pattern={}", settings.get("pattern")); Queue<URI> uris = new Finder(settings.get("pattern")) .find(settings.get("path")) .pathSorted(settings.getAsBoolean("isPathSorted", false)) .chronologicallySorted(settings.getAsBoolean("isChronologicallySorted", false)) .getURIs(); logger.info("input from path = {}", uris); setQueue(new ArrayBlockingQueue<URIWorkerRequest>(uris.size(), true)); for (URI uri : uris) { URIWorkerRequest element = new URIWorkerRequest(); element.set(uri); getQueue().offer(element); } } else if (settings.get("archive") != null) { logger.info("preparing input queue from archive={}", settings.get("archive")); URIWorkerRequest element = new URIWorkerRequest(); element.set(URI.create(settings.get("archive"))); getQueue().offer(element); TarConnectionFactory factory = new TarConnectionFactory(); Connection<TarSession> connection = factory.getConnection(URI.create(settings.get("archive"))); session = connection.createSession(); session.open(Session.Mode.READ); } } protected Converter<P> cleanup() throws IOException { if (session != null) { session.close(); } return this; } protected void writeMetrics(MeterMetric metric, Writer writer) throws Exception { // TODO } protected BlockingQueue<URIWorkerRequest> newQueue() { return new ArrayBlockingQueue<URIWorkerRequest>(32, true); } protected abstract WorkerProvider<Worker<URIWorkerRequest>> provider(); protected abstract void process(URI uri) throws Exception; }