/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.streams.sysomos.provider; import org.apache.streams.config.ComponentConfigurator; import org.apache.streams.config.StreamsConfiguration; import org.apache.streams.config.StreamsConfigurator; import org.apache.streams.core.StreamsDatum; import org.apache.streams.core.StreamsProvider; import org.apache.streams.core.StreamsResultSet; import org.apache.streams.data.util.RFC3339Utils; import org.apache.streams.jackson.StreamsJacksonMapper; import org.apache.streams.sysomos.SysomosConfiguration; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import com.google.common.base.Splitter; import com.google.common.util.concurrent.Uninterruptibles; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import com.typesafe.config.ConfigParseOptions; import org.apache.commons.lang.NotImplementedException; import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.PrintStream; import java.math.BigInteger; import java.util.HashSet; import java.util.Map; import java.util.Queue; import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; /** * Streams Provider for the Sysomos Heartbeat API * <p/> * Configuration: * The provider takes either a Map[String,Object] containing the mode (backfill and terminate OR continuous) and a * Map[String,String] of heartbeat IDs to document target ids or a string of the format * ${heartbeatId}:${documentId},...,${heartbeatId}:${documentId} * This configuration will configure the provider to backfill to the specified document and either terminate or not * depending on the mode flag. Continuous mode is assumed, and is the ony mode supported by the String configuration. */ public class SysomosProvider implements StreamsProvider { public static final String STREAMS_ID = "SysomosProvider"; public static final String ENDING_TIME_KEY = "addedBefore"; public static final String STARTING_TIME_KEY = "addedAfter"; public static final String MODE_KEY = "mode"; public static final String STARTING_DOCS_KEY = "startingDocs"; public static final int LATENCY = 10000; //Default minLatency for querying the Sysomos API in milliseconds public static final long PROVIDER_BATCH_SIZE = 10000L; //Default maximum size of the queue public static final long API_BATCH_SIZE = 1000L; //Default maximum size of an API request private static final Logger LOGGER = LoggerFactory.getLogger(SysomosProvider.class); private final ReadWriteLock lock = new ReentrantReadWriteLock(); private final Set<String> completedHeartbeats = new HashSet<>(); private final long maxQueued; private final long minLatency; private final long scheduledLatency; private final long maxApiBatch; protected volatile Queue<StreamsDatum> providerQueue; private SysomosClient client; private SysomosConfiguration config; private ScheduledExecutorService stream; private Map<String, String> documentIds; private Map<String, String> addedBefore; private Map<String, String> addedAfter; private Mode mode = Mode.CONTINUOUS; private boolean started = false; private AtomicInteger count; /** * SysomosProvider constructor. * @param sysomosConfiguration SysomosConfiguration */ public SysomosProvider(SysomosConfiguration sysomosConfiguration) { this.config = sysomosConfiguration; this.client = new SysomosClient(sysomosConfiguration.getApiKey()); this.maxQueued = sysomosConfiguration.getMaxBatchSize() == null ? PROVIDER_BATCH_SIZE : sysomosConfiguration.getMaxBatchSize(); this.minLatency = sysomosConfiguration.getMinDelayMs() == null ? LATENCY : sysomosConfiguration.getMinDelayMs(); this.scheduledLatency = sysomosConfiguration.getScheduledDelayMs() == null ? (LATENCY * 15) : sysomosConfiguration.getScheduledDelayMs(); this.maxApiBatch = sysomosConfiguration.getMinDelayMs() == null ? API_BATCH_SIZE : sysomosConfiguration.getApiBatchSize(); this.count = new AtomicInteger(); } /** * To use from command line: * <p/> * Supply configuration similar to src/test/resources/rss.conf * <p/> * Launch using: * <p/> * mvn exec:java -Dexec.mainClass=org.apache.streams.rss.provider.RssStreamProvider -Dexec.args="rss.conf articles.json" * @param args args * @throws Exception Exception */ public static void main(String[] args) throws Exception { Preconditions.checkArgument(args.length >= 2); String configfile = args[0]; String outfile = args[1]; Config reference = ConfigFactory.load(); File file = new File(configfile); assert (file.exists()); Config testResourceConfig = ConfigFactory.parseFileAnySyntax(file, ConfigParseOptions.defaults().setAllowMissing(false)); Config typesafe = testResourceConfig.withFallback(reference).resolve(); StreamsConfiguration streamsConfiguration = StreamsConfigurator.detectConfiguration(typesafe); SysomosConfiguration config = new ComponentConfigurator<>(SysomosConfiguration.class).detectConfiguration(typesafe, "rss"); SysomosProvider provider = new SysomosProvider(config); ObjectMapper mapper = StreamsJacksonMapper.getInstance(); PrintStream outStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outfile))); provider.prepare(config); provider.startStream(); do { Uninterruptibles.sleepUninterruptibly(streamsConfiguration.getBatchFrequencyMs(), TimeUnit.MILLISECONDS); for (StreamsDatum datum : provider.readCurrent()) { String json; try { json = mapper.writeValueAsString(datum.getDocument()); outStream.println(json); } catch (JsonProcessingException ex) { System.err.println(ex.getMessage()); } } } while (provider.isRunning()); provider.cleanUp(); outStream.flush(); } public SysomosConfiguration getConfig() { return config; } public void setConfig(SysomosConfiguration config) { this.config = config; } public Mode getMode() { return mode; } public long getMinLatency() { return minLatency; } public long getMaxApiBatch() { return maxApiBatch; } public SysomosClient getClient() { return client; } @Override public String getId() { return STREAMS_ID; } @Override public void startStream() { LOGGER.trace("Starting Producer"); if (!started) { LOGGER.trace("Producer not started. Initializing"); stream = Executors.newScheduledThreadPool(getConfig().getHeartbeatIds().size() + 1); for (String heartbeatId : getConfig().getHeartbeatIds()) { Runnable task = createStream(heartbeatId); stream.scheduleWithFixedDelay(task, 0, this.scheduledLatency, TimeUnit.MILLISECONDS); LOGGER.info("Started producer task for heartbeat {}", heartbeatId); } started = true; } } @Override public StreamsResultSet readCurrent() { StreamsResultSet current; try { lock.writeLock().lock(); LOGGER.debug("Creating new result set for {} items", providerQueue.size()); count.addAndGet(providerQueue.size()); current = new StreamsResultSet(providerQueue); providerQueue = constructQueue(); } finally { lock.writeLock().unlock(); } return current; } @Override public StreamsResultSet readNew(BigInteger bigInteger) { throw new NotImplementedException("readNew not currently implemented"); } @Override public StreamsResultSet readRange(DateTime dateTime, DateTime dateTime2) { throw new NotImplementedException("readRange not currently implemented"); } //If the provider queue still has data, we are still running. If not, we are running if we have not been signaled //by all completed heartbeats so long as the thread pool is alive @Override public boolean isRunning() { return providerQueue.size() > 0 || (completedHeartbeats.size() < this.getConfig().getHeartbeatIds().size() && !(stream.isTerminated() || stream.isShutdown())); } @Override public void prepare(Object configurationObject) { this.providerQueue = constructQueue(); if (configurationObject instanceof Map) { extractConfigFromMap((Map)configurationObject); } else if (configurationObject instanceof String) { documentIds = Splitter.on(";").trimResults().withKeyValueSeparator("=").split((String)configurationObject); } } @Override public void cleanUp() { stream.shutdown(); // Disable new tasks from being submitted try { // Wait a while for existing tasks to terminate if (!stream.awaitTermination(60, TimeUnit.SECONDS)) { stream.shutdownNow(); // Cancel currently executing tasks // Wait a while for tasks to respond to being cancelled if (!stream.awaitTermination(60, TimeUnit.SECONDS)) { LOGGER.error("Stream did not terminate"); } } } catch (InterruptedException ie) { // (Re-)Cancel if current thread also interrupted stream.shutdownNow(); // Preserve interrupt status Thread.currentThread().interrupt(); } } /** * signalComplete. * @param heartbeatId heartbeatId */ public void signalComplete(String heartbeatId) { try { this.lock.writeLock().lock(); this.completedHeartbeats.add(heartbeatId); if (!this.isRunning()) { this.cleanUp(); } } finally { this.lock.writeLock().unlock(); } } protected void enqueueItem(StreamsDatum datum) { boolean success; do { try { pauseForSpace(); //Dont lock before this pause. We don't want to block the readCurrent method lock.readLock().lock(); success = providerQueue.offer(datum); Thread.yield(); } finally { lock.readLock().unlock(); } } while (!success); } protected SysomosHeartbeatStream createStream(String heartbeatId) { String afterTime = addedAfter != null && addedAfter.containsKey(heartbeatId) ? addedAfter.get(heartbeatId) : null; String beforeTime = addedBefore != null && addedBefore.containsKey(heartbeatId) ? addedBefore.get(heartbeatId) : null; if (documentIds != null && documentIds.containsKey(heartbeatId)) { return new SysomosHeartbeatStream(this, heartbeatId, documentIds.get(heartbeatId)); } if (afterTime != null) { if (beforeTime != null) { return new SysomosHeartbeatStream(this, heartbeatId, RFC3339Utils.parseToUTC(beforeTime), RFC3339Utils.parseToUTC(afterTime)); } else { return new SysomosHeartbeatStream(this, heartbeatId, null, RFC3339Utils.parseToUTC(afterTime)); } } return new SysomosHeartbeatStream(this, heartbeatId); } /** * Wait for the queue size to be below threshold before allowing execution to continue on this thread. */ protected void pauseForSpace() { while (this.providerQueue.size() >= maxQueued) { LOGGER.trace("Sleeping the current thread due to a full queue"); try { Thread.sleep(100); LOGGER.trace("Resuming thread after wait period"); } catch (InterruptedException ex) { LOGGER.warn("Thread was interrupted", ex); } } } @SuppressWarnings("unchecked") protected void extractConfigFromMap(Map configMap) { if (configMap.containsKey(MODE_KEY)) { Object configMode = configMap.get(MODE_KEY); if (!(configMode instanceof Mode)) { throw new IllegalStateException("Invalid configuration. Mode must be an instance of the Mode enum but was " + configMode); } this.mode = (Mode)configMode; } if (configMap.containsKey(STARTING_DOCS_KEY)) { Object configIds = configMap.get(STARTING_DOCS_KEY); if (!(configIds instanceof Map)) { throw new IllegalStateException("Invalid configuration. StartingDocs must be an instance of Map<String,String> but was " + configIds); } this.documentIds = (Map)configIds; } if (configMap.containsKey(STARTING_TIME_KEY)) { Object configIds = configMap.get(STARTING_TIME_KEY); if (!(configIds instanceof Map)) { throw new IllegalStateException("Invalid configuration. Added after key must be an instance of Map<String,String> but was " + configIds); } this.addedAfter = (Map)configIds; } if (configMap.containsKey(ENDING_TIME_KEY)) { Object configIds = configMap.get(ENDING_TIME_KEY); if (!(configIds instanceof Map)) { throw new IllegalStateException("Invalid configuration. Added before key must be an instance of Map<String,String> but was " + configIds); } this.addedBefore = (Map)configIds; } } private Queue<StreamsDatum> constructQueue() { return new ConcurrentLinkedQueue<>(); } public int getCount() { return this.count.get(); } public enum Mode {CONTINUOUS, BACKFILL_AND_TERMINATE} }