/**
* Licensed to DigitalPebble Ltd under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* DigitalPebble licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.digitalpebble.storm.crawler.spout;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.utils.Utils;
import com.digitalpebble.storm.crawler.util.StringTabScheme;
/**
* Produces URLs taken randomly from a finite set. Useful for testing.
*/
@SuppressWarnings("serial")
public class RandomURLSpout extends BaseRichSpout {
private SpoutOutputCollector _collector;
private Random _rand;
private StringTabScheme scheme = new StringTabScheme();
private boolean active = true;
private boolean removeAfterSending = true;
private String[] urls = new String[] { "http://www.lequipe.fr/",
"http://www.lemonde.fr/", "http://www.bbc.co.uk/",
"http://www.facebook.com/", "http://www.rmc.fr" };
public RandomURLSpout(String... urls) {
this.urls = urls;
}
public RandomURLSpout() {
}
/**
* Removes the URLs from the list after they have been emitted. Default to
* false
**/
public void setRemoveAfterSending(boolean remove) {
removeAfterSending = remove;
}
@Override
public void open(@SuppressWarnings("rawtypes") Map conf,
TopologyContext context, SpoutOutputCollector collector) {
_collector = collector;
_rand = new Random();
}
@Override
public void nextTuple() {
if (!active)
return;
Utils.sleep(100);
if (urls.length == 0)
return;
int pos = _rand.nextInt(urls.length);
String url = urls[pos];
_collector.emit(
scheme.deserialize(url.getBytes(StandardCharsets.UTF_8)), url);
if (!removeAfterSending)
return;
// delete URL from the array
List<String> temp = new ArrayList<String>(urls.length - 1);
for (int i = 0; i < urls.length; i++) {
if (i == pos)
continue;
temp.add(urls[i]);
}
urls = temp.toArray(new String[temp.size()]);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(scheme.getOutputFields());
}
@Override
public void activate() {
super.activate();
active = true;
}
@Override
public void deactivate() {
super.deactivate();
active = false;
}
}