package com.constellio.app.modules.es.connectors.http.fetcher; import java.net.MalformedURLException; import java.net.URL; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; import com.constellio.app.modules.es.model.connectors.http.ConnectorHttpInstance; public class ConnectorUrlAcceptor implements UrlAcceptor { private ConnectorHttpInstance connectorHttpInstance; public ConnectorUrlAcceptor(ConnectorHttpInstance connectorHttpInstance) { this.connectorHttpInstance = connectorHttpInstance; } @Override public boolean isAccepted(String url) { if (!isValidUrlPattern(url)) { return false; } else if (isSeed(url)) { return true; } else if (isExcluded(url)) { return false; } else if (isIncluded(url)) { return true; } return isFromSeed(url); } private boolean isValidUrlPattern(String pattern) { if (StringUtils.isBlank(pattern)) { return false; } try { new URL(pattern); } catch (MalformedURLException e) { return false; } return true; } private boolean isFromSeed(String url) { List<String> seeds = connectorHttpInstance.getSeedsList(); for (String seed : seeds) { if (StringUtils.isNotBlank(seed)) { if (StringUtils.startsWith(url, seed)) { return true; } } } return false; } private boolean isSeed(String url) { List<String> seeds = connectorHttpInstance.getSeedsList(); for (String seed : seeds) { if (StringUtils.equals(seed, url)) { return true; } } return false; } private boolean isExcluded(String url) { String patterns = connectorHttpInstance.getExcludePatterns(); String[] regexes = StringUtils.split(patterns, "\n"); if (matches(url, regexes)) { return true; } return false; } private boolean isIncluded(String url) { String patterns = connectorHttpInstance.getIncludePatterns(); String[] regexes = StringUtils.split(patterns, "\n"); if (matches(url, regexes)) { return true; } return false; } private boolean matches(String url, String[] regexes) { if (regexes != null) { for (String excludedSite : regexes) { Pattern pattern = Pattern.compile(excludedSite); Matcher matcher = pattern.matcher(url); if (matcher.find()) { return true; } } } return false; } }