package com.geccocrawler.gecco.downloader.proxy; import java.io.File; import java.net.URL; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.HttpHost; import com.google.common.base.Charsets; import com.google.common.io.Files; import com.google.common.io.Resources; /** * 代理ip从classpath下的proxys文件里加载 * 多代理支持,classpath根目录下放置proxys文件,文件格式如下 * 127.0.0.1:8888 * 127.0.0.1:8889 * 支持记录代理成功率,自动发现无效代理 * 支持在线添加代理 * * @author huchengyi * */ public class FileProxys implements Proxys { private static Log log = LogFactory.getLog(FileProxys.class); private ConcurrentLinkedQueue<Proxy> proxyQueue; private Map<String, Proxy> proxys = null; public FileProxys() { try { proxys = new ConcurrentHashMap<String, Proxy>(); proxyQueue = new ConcurrentLinkedQueue<Proxy>(); URL url = Resources.getResource("proxys"); File file = new File(url.getPath()); List<String> lines = Files.readLines(file, Charsets.UTF_8); if(lines.size() > 0) { for(String line : lines) { line = line.trim(); if(line.startsWith("#")) { continue; } String[] hostPort = line.split(":"); if(hostPort.length == 2) { String host = hostPort[0]; int port = NumberUtils.toInt(hostPort[1], 80); addProxy(host, port); } } } } catch(Exception ex) { log.info("proxys not load"); } } @Override public boolean addProxy(String host, int port) { return addProxy(host, port, null); } @Override public boolean addProxy(String host, int port, String src) { Proxy proxy = new Proxy(host, port); if(StringUtils.isNotEmpty(src)) { proxy.setSrc(src); } if(proxys.containsKey(proxy.toHostString())) { return false; } else { proxys.put(host+":"+port, proxy); proxyQueue.offer(proxy); if(log.isDebugEnabled()) { log.debug("add proxy : " + host + ":" + port); } return true; } } @Override public void failure(String host, int port) { Proxy proxy = proxys.get(host+":"+port); if(proxy != null) { long failure = proxy.getFailureCount().incrementAndGet(); long success = proxy.getSuccessCount().get(); reProxy(proxy, success, failure); } } @Override public void success(String host, int ip) { Proxy proxy = proxys.get(host+":"+ip); if(proxy != null) { long success = proxy.getSuccessCount().incrementAndGet(); long failure = proxy.getFailureCount().get(); reProxy(proxy, success, failure); } } private void reProxy(Proxy proxy, long success, long failure) { long sum = failure + success; if(sum < 20) { proxyQueue.offer(proxy); } else { if((success / (float)sum) >= 0.5f) { proxyQueue.offer(proxy); } } } @Override public HttpHost getProxy() { if(proxys == null || proxys.size() == 0) { return null; } Proxy proxy = proxyQueue.poll(); if(log.isDebugEnabled()) { log.debug("use proxy : " + proxy); } if(proxy == null) { return null; } return proxy.getHttpHost(); } }