package focusedCrawler.link;
import java.io.IOException;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonUnwrapped;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import focusedCrawler.util.storage.StorageConfig;
public class LinkStorageConfig {
public static class BackSurferConfig {
@JsonProperty("link_storage.backsurfer.moz.access_id")
private String mozAccessId = null;
@JsonProperty("link_storage.backsurfer.moz.secret_key")
private String mozKey = null;
public BackSurferConfig() { }
public String getMozAccessId() {
return mozAccessId;
}
public String getMozKey() {
return mozKey;
}
}
@JsonProperty("link_storage.max_pages_per_domain")
private int maxPagesPerDomain = 100;
@JsonProperty("link_storage.link_classifier.type")
private String typeOfClassifier = "LinkClassifierBaseline";
@JsonProperty("link_storage.link_strategy.outlinks")
private boolean getOutlinks = true;
@JsonProperty("link_storage.link_strategy.use_scope")
private boolean useScope = false;
@JsonProperty("link_storage.directory")
private String linkDirectory = "data_url/dir";
@JsonProperty("link_storage.max_size_cache_urls")
private int maxCacheUrlsSize = 200000;
@JsonProperty("link_storage.link_strategy.backlinks")
private boolean getBacklinks = false;
@JsonProperty("link_storage.online_learning.enabled")
private boolean useOnlineLearning = false;
@JsonProperty("link_storage.online_learning.type")
private String onlineMethod = "FORWARD_CLASSIFIER_BINARY";
@JsonProperty("link_storage.online_learning.learning_limit")
private int learningLimit = 500;
@JsonProperty("link_storage.link_selector")
private String linkSelector = "TopkLinkSelector";
// TODO Remove target storage folder dependency from link storage
private String targetStorageDirectory = "data_target/";
@JsonUnwrapped
private BackSurferConfig backSurferConfig = new BackSurferConfig();
private final StorageConfig serverConfig;
@JsonProperty("link_storage.download_sitemap_xml")
private boolean downloadSitemapXml = false;
@JsonProperty("link_storage.scheduler.host_min_access_interval")
private int schedulerHostMinAccessInterval = 5000;
@JsonProperty("link_storage.scheduler.max_links")
private int schedulerMaxLinks = 10000;
public LinkStorageConfig(JsonNode config, ObjectMapper objectMapper) throws IOException {
objectMapper.readerForUpdating(this).readValue(config);
this.serverConfig = StorageConfig.create(config, "link_storage.server.");
}
public int getMaxPagesPerDomain() {
return maxPagesPerDomain;
}
public String getTypeOfClassifier() {
return typeOfClassifier;
}
public boolean getOutlinks() {
return getOutlinks;
}
public boolean isUseScope() {
return useScope;
}
public String getLinkDirectory() {
return linkDirectory;
}
public int getMaxCacheUrlsSize() {
return maxCacheUrlsSize;
}
public boolean getBacklinks() {
return getBacklinks;
}
public boolean isUseOnlineLearning() {
return useOnlineLearning;
}
public String getOnlineMethod() {
return onlineMethod;
}
public int getLearningLimit() {
return learningLimit;
}
public String getTargetStorageDirectory() {
return targetStorageDirectory;
}
public BackSurferConfig getBackSurferConfig() {
return backSurferConfig;
}
public String getLinkSelector() {
return linkSelector;
}
public StorageConfig getStorageServerConfig() {
return serverConfig;
}
public boolean getDownloadSitemapXml() {
return downloadSitemapXml;
}
public int getSchedulerHostMinAccessInterval() {
return schedulerHostMinAccessInterval;
}
public int getSchedulerMaxLinks() {
return schedulerMaxLinks;
}
}