package uk.bl.monitrix.model;
import java.util.List;
/**
* The known host list interface. Provides read/query access to the list of
* crawled hosts.
* @author Rainer Simon <rainer.simon@ait.ac.at>
*/
public interface KnownHostList {
/**
* Returns the total number of known hosts.
* @return the number of hosts
*/
public long count();
/**
* Returns the number of host that had at least one URL successfully resolved.
* @return the number of successfully crawled hosts.
*/
public long countSuccessful();
/**
* Returns the maximum average delay that was encountered over all hosts.
* @return the maximum average delay
*/
public long getMaxFetchDuration();
/**
* Checks if the specified hostname is already in the known hosts list.
* @param hostname the hostname
* @return <code>true</code> if the host is already in the list
*/
public boolean isKnown(String hostname);
/**
* Retrieves the host information for a specific host from the list.
* @param hostname the hostname
* @return the known host record
*/
public KnownHost getKnownHost(String hostname);
/**
* Searches the host list with the specified (e.g. keyword) query.
* Refer to documentation of specific implementations for the types of
* queries supported!
* @param query the search query
* @param limit the max number of results to return
* @param offset the result page offset
* @return the search result
*/
public SearchResult searchHosts(String query, int limit, int offset);
/**
* Returns the hosts registered under a specific top-level domain, with pagination.
* @param tld the top-level domain
* @param limit the pagination limit
* @param offset the pagination offset
* @return the search result
*/
public SearchResult searchByTopLevelDomain(String tld, int limit, int offset);
/**
* Returns the hosts within a specific average delay bracket.
* @param min the minimum average delay
* @param max the maximum average delay
* @param limit the pagination limit
* @param offset the pagination offset
* @return the search result
*/
public SearchResult searchByAverageFetchDuration(long min, long max, int limit, int offset);
/**
* Returns the hosts within a specified average retry rate bracket.
* @param min the minimum number of retries
* @param max the maximum number of retries
* @param limit the pagination limit
* @param offset the pagination offset
* @return the search result
*/
public SearchResult searchByAverageRetries(int min, int max, int limit, int offset);
/**
* Returns the hosts where the percentage of robots.txt-precluded fetch attempt lies within
* a specified range.
* @param min the minimum robots.txt-block percentage
* @param max the maximum robots.txt-block percentage
* @param limit the pagination limit
* @param offset the pagination offset
* @return the search result
*/
public SearchResult searchByRobotsBlockPercentage(double min, double max, int limit, int offset);
/**
* Returns the hosts where the percentage of redirects (HTTP 3xx) lies within
* a specified range.
* @param min the minimum redirect percentage
* @param max the maximum redirect percentage
* @param limit the pagination limit
* @param offset the pagination offset
* @return the search result
*/
public SearchResult searchByRedirectPercentage(double min, double max, int limit, int offset);
/**
* Retruns the names of the hosts which have been crawled since the
* specified timestamp.
* @param since the timestamp
* @return the list of hosts visited since the timestamp
*/
public List<KnownHost> getCrawledHosts(long since);
/**
* Returns the top-level domains encountered during the crawl.
* @return the list of top-level domains
*/
public List<String> getTopLevelDomains();
/**
* Counts the number of hosts registered under a specific top-level domain.
* @param tld the top level domain
* @return
*/
public long countForTopLevelDomain(String tld);
}