package focusedCrawler.link.frontier.selector; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Random; import com.google.common.collect.MinMaxPriorityQueue; import focusedCrawler.link.frontier.LinkRelevance; /** * Implements a link selection strategy that picks links from the storage at random. */ public class RandomLinkSelector implements LinkSelector { private Random random = new Random(); private MinMaxPriorityQueue<RandomLink> links; private class RandomLink { double relevance; final LinkRelevance link; public RandomLink(LinkRelevance link, double relevance) { this.link = link; this.relevance = relevance; } } @Override public void startSelection(int numberOfLinks) { links = MinMaxPriorityQueue .orderedBy(new Comparator<RandomLink>() { @Override public int compare(RandomLink o1, RandomLink o2) { return Double.compare(o1.relevance, o2.relevance); } }) .maximumSize(numberOfLinks) // keep only top-k items .create(); } @Override public void evaluateLink(LinkRelevance link) { if (link.getRelevance() > 0) { this.links.add(new RandomLink(link, random.nextDouble())); } } @Override public List<LinkRelevance> getSelectedLinks() { List<LinkRelevance> selectedLinks = new ArrayList<>(); for (RandomLink link : this.links) { selectedLinks.add(link.link); } this.links = null; // clean-up reference return selectedLinks; } }