package com.brucezee.jspider;
import com.brucezee.jspider.common.enums.ResponseType;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.apache.http.HttpStatus;
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 请求响应内容
* Created by brucezee on 2017/1/4.
*/
public class Page implements Serializable, Closeable {
private String currentUrl; //当前请求内容对应网址
private int statusCode; //请求响应的状态码
private Header[] headers; //请求响应的头
private Object result; //响应内容 根据Request的responseType来决定类型
private List<Request> targetRequests = new ArrayList<Request>(); //新的目标请求
private Document document; //请求响应文本转换成jsoup的document对象
public Page(String currentUrl, int statusCode, Header[] headers, Object result) {
this.currentUrl = currentUrl;
this.statusCode = statusCode;
this.headers = headers;
this.result = result;
}
public String getTextResult() {
return (String) result;
}
public byte[] getBytesResult() {
return (byte[]) result;
}
public InputStream getStreamResult() {
return (InputStream) result;
}
public Document document() {
if (document == null) {
String result = getTextResult();
if (StringUtils.isBlank(result)) {
throw new IllegalStateException("Response text is empty!");
}
document = Jsoup.parse(result, currentUrl);
}
return document;
}
public List<Request> getTargetRequests() {
return targetRequests;
}
public void addTargetRequests(List<String> requests) {
addTargetRequests(requests, ResponseType.TEXT);
}
public void addTargetRequests(List<String> requests, ResponseType responseType) {
synchronized (targetRequests) {
for (String url : requests) {
if (isValidUrl(url)) {
targetRequests.add(new Request(StringUtil.resolve(currentUrl, url), responseType));
}
}
}
}
public void addTargetRequest(String url) {
addTargetRequest(url, ResponseType.TEXT);
}
public void addTargetRequest(String url, ResponseType responseType) {
if (isValidUrl(url)) {
synchronized (targetRequests) {
targetRequests.add(new Request(StringUtil.resolve(currentUrl, url), responseType));
}
}
}
public void addTargetRequest(Request request) {
synchronized (targetRequests) {
targetRequests.add(request);
}
}
private boolean isValidUrl(String url) {
return StringUtils.isNotEmpty(url)
&& !"#".equals(url.trim())
&& !url.trim().toLowerCase().startsWith("javascript:");
}
public int getStatusCode() {
return statusCode;
}
public Header[] getHeaders() {
return headers;
}
public String getCurrentUrl() {
return currentUrl;
}
public Object getResult() {
return result;
}
public boolean isSuccess() {
return statusCode == HttpStatus.SC_OK && result != null;
}
@Override
public void close() throws IOException {
if (result != null && result instanceof InputStream) {
((InputStream) result).close();
}
}
@Override
public String toString() {
return "Page{" +
", currentUrl=" + currentUrl +
", statusCode=" + statusCode +
", headers=" + Arrays.toString(headers) +
", result=" + result +
", targetRequests=" + targetRequests +
'}';
}
}