package com.jackson.task.queue;
import com.jackson.bean.Bundle;
import com.jackson.bean.ContextSrc;
import com.jackson.common.control.ProxyController;
import com.jackson.db.po.Proxy;
import com.jackson.db.po.Url;
import com.jackson.net.HttpClientCreater;
import com.jackson.reservoir.ParserPool;
import com.jackson.task.ITask;
import com.jackson.task.RequestParserTask;
import com.jackson.utils.StringUtil;
import com.sun.istack.internal.NotNull;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.StatusLine;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.conn.ConnectTimeoutException;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.net.SocketTimeoutException;
/**
* Created by Jackson on 2017/2/10.
*/
public abstract class RequestQueueTask extends QueueTask implements ITask {
private Proxy proxy;
public RequestQueueTask(){}
@Override
protected QueueTaskCollection.TaskProcess doTask(Bundle bundle) {
//发起请求
onInit(getUrl(),getProxy());
String content = null;
boolean isRequestOk = false;
HttpRequestBase httpRequest = null;
CloseableHttpClient httpClient = null;
try {
httpRequest = getHttpRequest();
httpClient = getHttpClient();
content = doRequest(httpClient, httpRequest);
StatusLine statusLine = response.getStatusLine();
int statusCode = statusLine.getStatusCode();
isRequestOk = onStatusCode(statusCode, response);
} catch (Exception e) {
isRequestOk = false;
onRequestException(e, getUrl(), httpRequest);
} finally {
try {
if (response != null)
response.close();
} catch (IOException e) {
e.printStackTrace();
}
onRequestFinish(isRequestOk, httpClient, httpRequest, getUrl());
}
//下面开始解析
if(!isRequestOk)
return onRequestFailed();
boolean parseSuccess = false;
try {
parseSuccess = onResponseHandle(getProxy(),getProxyController(),getUrl(), httpRequest, response, content,bundle);
} catch (RuntimeException e) {
onParseException(e, getUrl(), content);
parseSuccess = false;
} catch (Exception e) {
onParseException(e, getUrl(), content);
parseSuccess = false;
} finally {
return onParseFinish(parseSuccess, getUrl(), content);
}
}
protected abstract QueueTaskCollection.TaskProcess onRequestFailed();
private static Logger logger = LogManager.getLogger(RequestParserTask.class.getName());
private static ParserPool parserPool= new ParserPool();
/**
* 确保每次获取的url是同一个,不允许为空
* @return
*/
public abstract Url getUrl();
/**
* 确保每次获取的proxy是同一个
* @return 允许返回null
*/
protected Proxy getProxy(){
return proxy;
}
public void setProxy(Proxy proxy){
this.proxy = proxy;
}
protected abstract ProxyController getProxyController();
/**
* 请求前的回调
*/
protected void onInit(Url url,Proxy proxy) {
}
/**
* @param statusCode 状态码
* @param response @return true:状态正确 false:状态错误,返回false 则代表请求失败,onRequestFinisn()里第一个参数为false
* <p>
* 并且不会回调
* getParser()、onParseSuccess(Url url, String content)、onParseException(Exception e, Url url, String content)
*/
protected boolean onStatusCode(int statusCode, CloseableHttpResponse response) {
return true;
}
/**
* @return true:代表解析成功 false:解析失败
*/
protected abstract boolean onResponseHandle(Proxy proxy,ProxyController proxyController, Url url, HttpRequestBase request, CloseableHttpResponse response, String content, Bundle bundle);
/**
*
* @return 允许返回null,表示不使用cookie
*/
protected abstract ContextSrc getContextSrc();
/**
* 解析异常出现的回调。通常设置url为完成状态,记录错误日志。出现这样的情况是代码问题,这样做可以避免继续解析错误,并方便排查问题。
*
* @param e
* @param url
* @param content
*/
protected abstract void onParseException(Exception e, Url url, String content);
/**
* 请求并解析成功后的回调, 通常用来回收url并设置成完成状态
*
* @param parseSuccess
* @param url
* @param content
* @return
*/
protected abstract @NotNull QueueTaskCollection.TaskProcess onParseFinish(boolean parseSuccess, Url url, String content);
@Override
public CloseableHttpClient getHttpClient() {
if(getUrl().getProtocolState()==Url.PROTOCOL_STATE_HTTP){
return HttpClientCreater.instance.getHttpClient();
}
if(getUrl().getProtocolState()==Url.PROTOCOL_STATE_HTTPS){
return HttpClientCreater.instance.getHttpsClient();
}
throw new RuntimeException("RequestParserTask 协议类型错误");
}
CloseableHttpResponse response = null;
@Override
public String doRequest(CloseableHttpClient client, HttpRequestBase requestBase) throws ClientProtocolException, SocketTimeoutException, ConnectTimeoutException, IOException {
if(getContextSrc()!=null){
if(!StringUtil.isEmpty(getContextSrc().getUserAgent()))
requestBase.setHeader("User-Agent",getContextSrc().getUserAgent());
if(getContextSrc().getProxy()!=null){
Proxy proxy = getContextSrc().getProxy();
if(proxy.getProtocolState()!=getUrl().getProtocolState()){
throw new RuntimeException("RequestParserTask proxy协议类型和 url需要的协议类型不匹配");
}
HttpHost httpHost = new HttpHost(proxy.getHost(), proxy.getPort(),getUrl().getProtocolScheme());
logger.info("请求 url:{} proxy:{}:{} userAgent:{}",getUrl().getUrl(),proxy.getHost(),proxy.getPort(),getContextSrc().getUserAgent());
response = client.execute(httpHost,requestBase,getContextSrc().getHttpContext());
}else {
logger.info("请求 url:{} userAgent:{}",getUrl().getUrl(),getContextSrc().getUserAgent());
response = client.execute(requestBase,getContextSrc().getHttpContext());
}
}else {
if(getProxy()!=null){
if(getProxy().getProtocolState()!=getUrl().getProtocolState()){
throw new RuntimeException("RequestParserTask proxy协议类型和 url需要的协议类型不匹配");
}
HttpHost httpHost = new HttpHost(getProxy().getHost(), getProxy().getPort(),getUrl().getProtocolScheme());
logger.info("请求 url:{} proxy:{}:{}",getUrl().getUrl(),getProxy().getHost(),getProxy().getPort());
response = client.execute(httpHost,requestBase);
}else {
logger.info("请求 url:{}",getUrl().getUrl());
response = client.execute(requestBase);
}
}
if(getUrl().getParserClass()==null)return "没有解析器";//如果没有解析器就返回""
HttpEntity entity = response.getEntity();
return EntityUtils.toString(entity, "utf-8");
}
}