package com.soulgalore.crawler.core; import java.util.Collections; import java.util.Map; import com.soulgalore.crawler.util.HeaderUtil; /** * Configuration for a crawl. * */ public final class CrawlerConfiguration { // Property names for System properties. /** * System property for max number of http threads. */ public final static String MAX_THREADS_PROPERTY_NAME = "com.soulgalore.crawler.nrofhttpthreads"; /** * System property for socket timeout (in ms). */ public final static String SOCKET_TIMEOUT_PROPERTY_NAME = "com.soulgalore.crawler.http.socket.timeout"; /** * System property for connection timeout (in ms). */ public final static String CONNECTION_TIMEOUT_PROPERTY_NAME = "com.soulgalore.crawler.http.connection.timeout"; /** * System property for auth. */ public final static String AUTH_PROPERTY_NAME = "com.soulgalore.crawler.auth"; /** * System property for proxy config. */ public final static String PROXY_PROPERTY_NAME = "com.soulgalore.crawler.proxy"; /** * The default crawl level if no is supplied. */ public static final int DEFAULT_CRAWL_LEVEL = 1; /** * The default value if url:s should be verified to be ok or not. */ public static final boolean DEFAULT_SHOULD_VERIFY_URLS = true; private int maxLevels = DEFAULT_CRAWL_LEVEL; private String notOnPath = ""; private String onlyOnPath = ""; private String requestHeaders = ""; private String startUrl; private Map<String, String> requestHeadersMap = Collections.emptyMap(); private boolean verifyUrls = DEFAULT_SHOULD_VERIFY_URLS; private CrawlerConfiguration() { } public String getRequestHeaders() { return requestHeaders; } public Map<String, String> getRequestHeadersMap() { return requestHeadersMap; } public int getMaxLevels() { return maxLevels; } public String getNotOnPath() { return notOnPath; } public String getOnlyOnPath() { return onlyOnPath; } public String getStartUrl() { return startUrl; } public boolean isVerifyUrls() { return verifyUrls; } private CrawlerConfiguration copy() { final CrawlerConfiguration conf = new CrawlerConfiguration(); conf.setMaxLevels(getMaxLevels()); conf.setNotOnPath(getNotOnPath()); conf.setOnlyOnPath(getOnlyOnPath()); conf.setStartUrl(getStartUrl()); conf.setVerifyUrls(isVerifyUrls()); conf.setRequestHeaders(getRequestHeaders()); return conf; } private void setRequestHeaders(String requestHeaders) { this.requestHeaders = requestHeaders; requestHeadersMap = HeaderUtil.getInstance().createHeadersFromString(requestHeaders); } private void setMaxLevels(int maxLevels) { this.maxLevels = maxLevels; } private void setNotOnPath(String notOnPath) { this.notOnPath = notOnPath; } private void setOnlyOnPath(String onlyOnPath) { this.onlyOnPath = onlyOnPath; } private void setStartUrl(String startUrl) { this.startUrl = startUrl; } private void setVerifyUrls(boolean verifyUrls) { this.verifyUrls = verifyUrls; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + maxLevels; result = prime * result + ((notOnPath == null) ? 0 : notOnPath.hashCode()); result = prime * result + ((onlyOnPath == null) ? 0 : onlyOnPath.hashCode()); result = prime * result + ((startUrl == null) ? 0 : startUrl.hashCode()); result = prime * result + (verifyUrls ? 1231 : 1237); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; CrawlerConfiguration other = (CrawlerConfiguration) obj; if (maxLevels != other.maxLevels) return false; if (notOnPath == null) { if (other.notOnPath != null) return false; } else if (!notOnPath.equals(other.notOnPath)) return false; if (onlyOnPath == null) { if (other.onlyOnPath != null) return false; } else if (!onlyOnPath.equals(other.onlyOnPath)) return false; if (startUrl == null) { if (other.startUrl != null) return false; } else if (!startUrl.equals(other.startUrl)) return false; if (verifyUrls != other.verifyUrls) return false; return true; } public static class Builder { private final CrawlerConfiguration configuration = new CrawlerConfiguration(); public Builder() {} public CrawlerConfiguration build() { return configuration.copy(); } public Builder setMaxLevels(int maxLevels) { configuration.setMaxLevels(maxLevels); return this; } public Builder setNotOnPath(String notOnPath) { configuration.setNotOnPath(notOnPath); return this; } public Builder setOnlyOnPath(String onlyOnPath) { configuration.setOnlyOnPath(onlyOnPath); return this; } public Builder setStartUrl(String startUrl) { configuration.setStartUrl(startUrl); return this; } public Builder setVerifyUrls(boolean verifyUrls) { configuration.setVerifyUrls(verifyUrls); return this; } public Builder setRequestHeaders(String requestHeaders) { configuration.setRequestHeaders(requestHeaders); return this; } } public static Builder builder() { return new Builder(); } }