/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.streams.rss.provider;
import org.apache.streams.config.ComponentConfigurator;
import org.apache.streams.config.StreamsConfiguration;
import org.apache.streams.config.StreamsConfigurator;
import org.apache.streams.core.StreamsDatum;
import org.apache.streams.core.StreamsProvider;
import org.apache.streams.core.StreamsResultSet;
import org.apache.streams.jackson.StreamsJacksonMapper;
import org.apache.streams.rss.RssStreamConfiguration;
import org.apache.streams.rss.provider.perpetual.RssFeedScheduler;
import org.apache.streams.util.ComponentUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.Uninterruptibles;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigParseOptions;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.math.BigInteger;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* RSS {@link org.apache.streams.core.StreamsProvider} that provides content from rss feeds in boilerpipe format
*/
public class RssStreamProvider implements StreamsProvider {
private static final String STREAMS_ID = "RssStreamProvider";
private static final Logger LOGGER = LoggerFactory.getLogger(RssStreamProvider.class);
private static final int MAX_SIZE = 1000;
private RssStreamConfiguration config;
private boolean perpetual;
private ExecutorService executor;
private BlockingQueue<StreamsDatum> dataQueue;
private AtomicBoolean isComplete;
@VisibleForTesting
protected RssFeedScheduler scheduler;
public RssStreamProvider() {
this(new ComponentConfigurator<>(RssStreamConfiguration.class)
.detectConfiguration(StreamsConfigurator.getConfig().getConfig("rss")), false);
}
public RssStreamProvider(boolean perpetual) {
this(new ComponentConfigurator<>(RssStreamConfiguration.class)
.detectConfiguration(StreamsConfigurator.getConfig().getConfig("rss")), perpetual);
}
public RssStreamProvider(RssStreamConfiguration config) {
this(config, false);
}
public RssStreamProvider(RssStreamConfiguration config, boolean perpetual) {
this.perpetual = perpetual;
this.config = config;
}
@Override
public String getId() {
return STREAMS_ID;
}
@Override
public void startStream() {
LOGGER.trace("Starting Rss Scheduler");
this.executor.submit(this.scheduler);
}
@Override
public StreamsResultSet readCurrent() {
Queue<StreamsDatum> batch = new ConcurrentLinkedQueue<>();
int batchSize = 0;
while (!this.dataQueue.isEmpty() && batchSize < MAX_SIZE) {
StreamsDatum datum = ComponentUtils.pollWhileNotEmpty(this.dataQueue);
if (datum != null) {
++batchSize;
batch.add(datum);
}
}
this.isComplete.set(this.scheduler.isComplete() && batch.isEmpty() && this.dataQueue.isEmpty());
return new StreamsResultSet(batch);
}
@Override
public StreamsResultSet readNew(BigInteger sequence) {
return null;
}
@Override
public StreamsResultSet readRange(DateTime start, DateTime end) {
return null;
}
@Override
public boolean isRunning() {
return !this.isComplete.get();
}
@Override
public void prepare(Object configurationObject) {
this.executor = new ThreadPoolExecutor(1, 4, 15L, TimeUnit.SECONDS, new LinkedBlockingQueue<>());
this.dataQueue = new LinkedBlockingQueue<>();
this.scheduler = getScheduler(this.dataQueue);
this.isComplete = new AtomicBoolean(false);
int consecutiveEmptyReads = 0;
}
@VisibleForTesting
protected RssFeedScheduler getScheduler(BlockingQueue<StreamsDatum> queue) {
if (this.perpetual) {
return new RssFeedScheduler(this.executor, this.config.getFeeds(), queue);
} else {
return new RssFeedScheduler(this.executor, this.config.getFeeds(), queue, 0);
}
}
@Override
public void cleanUp() {
this.scheduler.stop();
ComponentUtils.shutdownExecutor(this.executor, 10, 10);
}
/**
* To use from command line:
*
* <p/>
* Supply configuration similar to src/test/resources/rss.conf
*
* <p/>
* Launch using:
*
* <p/>
* mvn exec:java -Dexec.mainClass=org.apache.streams.rss.provider.RssStreamProvider -Dexec.args="rss.conf articles.json"
* @param args args
* @throws Exception Exception
*/
public static void main(String[] args) throws Exception {
Preconditions.checkArgument(args.length >= 2);
String configfile = args[0];
String outfile = args[1];
Config reference = ConfigFactory.load();
File file = new File(configfile);
assert (file.exists());
Config testResourceConfig = ConfigFactory.parseFileAnySyntax(file, ConfigParseOptions.defaults().setAllowMissing(false));
Config typesafe = testResourceConfig.withFallback(reference).resolve();
StreamsConfiguration streamsConfiguration = StreamsConfigurator.detectConfiguration(typesafe);
RssStreamConfiguration config = new ComponentConfigurator<>(RssStreamConfiguration.class).detectConfiguration(typesafe, "rss");
RssStreamProvider provider = new RssStreamProvider(config);
ObjectMapper mapper = StreamsJacksonMapper.getInstance();
PrintStream outStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(outfile)));
provider.prepare(config);
provider.startStream();
do {
Uninterruptibles.sleepUninterruptibly(streamsConfiguration.getBatchFrequencyMs(), TimeUnit.MILLISECONDS);
for (StreamsDatum datum : provider.readCurrent()) {
String json;
try {
json = mapper.writeValueAsString(datum.getDocument());
outStream.println(json);
} catch (JsonProcessingException ex) {
System.err.println(ex.getMessage());
}
}
}
while ( provider.isRunning());
provider.cleanUp();
outStream.flush();
}
}