package com.jackson.common.task; import com.jackson.bean.ContextSrc; import com.jackson.common.control.ProxyController; import com.jackson.common.source.CommonSource; import com.jackson.db.po.Proxy; import com.jackson.db.po.Url; import com.jackson.db.service.UrlService; import com.jackson.task.RequestParserTask; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; /** * Created by Jackson on 2016/11/7. * 不要使用,会有问题的 */ @Deprecated public class SimpleGetTask extends RequestParserTask { private static Logger logger = LogManager.getLogger(SimpleGetTask.class.getName()); private final UrlService urlService; private final Url url; private Proxy proxy; private final CommonSource source; public SimpleGetTask(Proxy proxy, Url url, UrlService urlService,CommonSource source) { this.url = url; this.proxy = proxy; this.source = source; this.urlService = urlService; } public SimpleGetTask(Url url, UrlService urlService,CommonSource source) { this(null,url,urlService,source); } @Override public HttpRequestBase getHttpRequest() { return source.pollHttpGet(getUrl()); } @Override public void onRequestException(Exception e, Url url, HttpRequestBase httpRequest) { logger.error("请求异常 url:{} Exception:{}", url.getUrl(),e.toString()); } @Override public void onRequestFinish(boolean isRequestOk, CloseableHttpClient httpClient, HttpRequestBase httpRequest, Url url) { if (!isRequestOk) { urlService.add(getUrl());//如果失败,就重新放入数据库,等着下次接着用,如果成功等着解析,解析成功后回收 } source.offerHttpGet((HttpGet) httpRequest); } @Override protected Url getUrl() { return url; } @Override protected Proxy getProxy() { return proxy; } @Override protected ProxyController getProxyController() { return null; } @Override protected UrlService getUrlService() { return urlService; } @Override protected boolean onStatusCode(int statusCode, CloseableHttpResponse response) { if(!(statusCode >= 200 && statusCode<300)){ if(getProxy()!=null){ logger.error("请求失败 statusCode:{} url:{} proxy:{}", statusCode,getUrl().getUrl(),proxy); }else { logger.error("请求失败 statusCode:{} url:{}", statusCode,getUrl().getUrl()); } } return statusCode >= 200 &&statusCode<300; } @Override protected ContextSrc getContextSrc() { return null; } @Override protected void onParseException(Exception e, Url url, String content) { logger.error("解析异常 url:{},parser:{},Exception{},content:{}", url.getUrl(),url.getParserClass(),e.toString(), content);//记录错误日志,方便排查问题,改解析代码 } @Override protected void onParseFinish(boolean parseSuccess, Url url, String content) { //解析失败一次,proxy 就放弃使用 //解析失败超过4次 ,url 放弃使用 if (parseSuccess){ urlService.completeUrl(getUrl());//设置成完成状态 if(proxy!=null){ // ProxyController.getInstance().getProxyPool().offer(proxy); } } else { if(url.getParserFailureTime()<4){ url.setParserFailureTime(url.getParserFailureTime()+1); urlService.add(url);//存入内存,下次接着爬取 logger.error("解析错误 错误次数为:{} proxy:{}:{},url:{},parser:{}",url.getParserFailureTime(),proxy.getHost(),proxy.getPort(),url.getUrl(),url.getParserClass());//记录错误日志,方便排查问题,改解析代码 }else { if(proxy!=null) logger.error("解析失败 url:{},proxy host:{} proxy port:{},parserName{},content:{}", url.getUrl(),proxy.getHost(),proxy.getPort(), url.getParserClass(), content);//记录错误日志,方便排查问题,改解析代码 else logger.error("解析失败 url:{},parserName{},content:{}", url.getUrl(), url.getParserClass(), content);//记录错误日志,方便排查问题,改解析代码 } } } }