/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.streams.rss.provider; import org.apache.streams.core.StreamsDatum; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.testng.annotations.Test; import java.net.URL; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; /** * Unit tests for {@link org.apache.streams.rss.provider.RssStreamProviderTask} */ public class RssStreamProviderTaskIT { /** * Test that a task can read a valid rss from a url and queue the data. * @throws Exception Exception */ @Test public void testNonPerpetualNoTimeFramePull() throws Exception { com.healthmarketscience.common.util.resource.Handler.init(); BlockingQueue<StreamsDatum> queue = new LinkedBlockingQueue<>(); RssStreamProviderTask task = new RssStreamProviderTask(queue, "fake url"); Set<String> batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals(batch.size(), queue.size(), "Expected batch size to be the same as amount of queued datums"); task.PREVIOUSLY_SEEN.put("fake url", batch); //Test that it will out previously seen articles queue.clear(); batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals(batch.size(), queue.size(), "Expected batch size to be the same as amount of queued datums"); } /** * Test that perpetual streams will not output previously seen articles. * @throws Exception Exception */ @Test public void testPerpetualNoTimeFramePull() throws Exception { com.healthmarketscience.common.util.resource.Handler.init(); BlockingQueue<StreamsDatum> queue = new LinkedBlockingQueue<>(); RssStreamProviderTask task = new RssStreamProviderTask(queue, "fake url", new DateTime().minusYears(10), 10000, true); Set<String> batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals(batch.size(), queue.size(), "Expected batch size to be the same as amount of queued datums"); task.PREVIOUSLY_SEEN.put("fake url", batch); //Test that it will not out previously seen articles queue.clear(); batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals( queue.size(), 0 ); assertEquals( batch.size(), 20 ); task.PREVIOUSLY_SEEN.put("fake url", batch); //Test that not seen urls aren't blocked. queue.clear(); batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist2.xml")); assertEquals(batch.size(), queue.size()); assertEquals( queue.size(), 25); assertEquals( batch.size(), 25); } /** * Test that you can task will only output aritcles after a certain published time. * @throws Exception Exception */ @Test public void testNonPerpetualTimeFramedPull() throws Exception { com.healthmarketscience.common.util.resource.Handler.init(); BlockingQueue<StreamsDatum> queue = new LinkedBlockingQueue<>(); DateTime publishedSince = new DateTime().withYear(2014).withDayOfMonth(5).withMonthOfYear(9).withZone(DateTimeZone.UTC); RssStreamProviderTask task = new RssStreamProviderTask(queue, "fake url", publishedSince, 10000, false); Set<String> batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals( queue.size(), 15); assertEquals( batch.size(), 20); assertTrue( queue.size() < batch.size()); task.PREVIOUSLY_SEEN.put("fake url", batch); //Test that it will out previously seen articles queue.clear(); batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals( queue.size(), 15); assertEquals( batch.size(), 20); assertTrue( queue.size() < batch.size()); } /** * Test that task will only output articles after a certain published time that it has not seen before. * @throws Exception Exception */ @Test public void testPerpetualTimeFramedPull() throws Exception { com.healthmarketscience.common.util.resource.Handler.init(); BlockingQueue<StreamsDatum> queue = new LinkedBlockingQueue<>(); DateTime publishedSince = new DateTime().withYear(2014).withDayOfMonth(5).withMonthOfYear(9).withZone(DateTimeZone.UTC); RssStreamProviderTask task = new RssStreamProviderTask(queue, "fake url", publishedSince, 10000, true); Set<String> batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals( queue.size(),15); assertEquals( batch.size(), 20); assertTrue( queue.size() < batch.size()); task.PREVIOUSLY_SEEN.put("fake url", batch); //Test that it will not out put previously seen articles queue.clear(); batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist1.xml")); assertEquals( queue.size(), 0); assertEquals( batch.size(), 20); assertTrue( queue.size() < batch.size()); task.PREVIOUSLY_SEEN.put("fake url", batch); batch = task.queueFeedEntries(new URL("resource:///test_rss_xml/economist2.xml")); assertTrue( queue.size() < batch.size()); assertEquals(queue.size(), 3); assertEquals(batch.size(), 25); } }