package uk.bl.monitrix.model;
import java.util.Iterator;
import java.util.List;
/**
* The crawl log interface.
* @author Rainer Simon <rainer.simon@ait.ac.at>
*/
public abstract class CrawlLog {
/**
* Utility method: returns true if the log is empty.
* @return <code>true</code> if the log is empty
*/
public boolean isEmpty() {
return countEntries() == 0;
}
/**
* Returns the UNIX timestamp of the crawl start time, i.e.
* the timestamp of the first entry written to the log.
* @return crawl start time
*/
public abstract long getCrawlStartTime();
/**
* Returns the UNIX timestamp of the last crawl activity, i.e.
* the timestamp of the last entry in written to the log.
* @return last crawl activity timestamp
*/
public abstract long getTimeOfLastCrawlActivity();
/**
* Utility method: returns the duration of the crawl so far (in
* milliseconds).
* @return the duration of the crawl
*/
public long getCrawlDuration() {
return getTimeOfLastCrawlActivity() - getCrawlStartTime();
}
/**
* Utility method: returns true if the last crawl activity was
* more than 5 minutes ago (in which case we consider the crawl idle)
* @return <code>true</code> if the crawl is idle
*/
public boolean isIdle() {
return (System.currentTimeMillis() - getTimeOfLastCrawlActivity()) > 5*60*1000;
}
/**
* Returns the N most recent entries in the log.
* @param n the number of entries to return
* @return the log entries
*/
public abstract List<CrawlLogEntry> getMostRecentEntries(int n);
/**
* Returns the total number of log entries.
* @return the total number of log entries
*/
public abstract long countEntries();
/**
* Returns the total number of revisit records.
* @return the total number of revisit records.
*/
public abstract long countRevisits();
/**
* Returns the log file IDs that occur in the DB.
* @return the list of log IDs
*/
public abstract List<String> listLogIds();
/**
* Returns the total number of log entries received from a specific crawler log file.
* @param logPath the path to the source log file
* @return the number of entries in the DB originating from that file
*/
public abstract long countEntriesForLog(String logId);
/**
* Returns all log entries that exist for the specified URL.
* @param url the url
* @return the log entries
*/
public abstract List<CrawlLogEntry> getEntriesForURL(String url);
/**
* Searches the crawl log with the specified (e.g. keyword) query.
* Refer to documentation of specific implementations for the types of
* queries supported! (Note: on MongoDB only *exacty matches* are supported! This
* means that - effectively - the results for .searchURLs and .getEntriesForURL
* return identical data!)
* @param query the search query
* @param limit the max number of results to return
* @param offset the result page offset
* @return the search result
*/
public abstract SearchResult searchByURL(String query, int limit, int offset);
/**
* Returns the log entries that carry the specified annotation
* @param annotation the annotation
* @param limit the max number of results to return
* @param offset the result page offset
* @return the log entries with that annotation
*/
public abstract SearchResult searchByAnnotation(String annotation, int limit, int offset);
/**
* Returns the number of log entries that fall within a specified compressability bracket
* @param from compressability value, lower bound
* @param to compressability value, upper bound
* @return the number of log entries in the compressability bracket
*/
public abstract long countByCompressability(double from, double to);
/**
* Returns the log entries that fall within a specified compressability bracket
* @param from compressability value, lower bound
* @param to compressability value, upper bound
* @param limit the pagination limit
* @param offset the pagination offset
* @return the log entries in the compressability bracket
*/
public abstract SearchResult searchByCompressability(double from, double to, int limit, int offset);
/**
* Counts the log entries for a specific host.
* @param hostname the host name
* @return the number of log entries for the host
*/
public abstract long countEntriesForHost(String hostname);
/**
* Returns the log entries for a specific host.
* @param hostname the host name
* @return the log entries for the host
*/
public abstract Iterator<CrawlLogEntry> getEntriesForHost(String hostname);
/**
* A helper function which extracts from the log the list of distinct host names
* which have any URLs with the specified annotation.
* @param annotation the annotation
* @return the list of hosts
*/
public abstract List<String> extractHostsForAnnotation(String annotation);
}