package org.arong.egdownloader.spider;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.httpclient.ConnectTimeoutException;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.arong.util.https.HttpsUtils;
/**
* 获取远程url地址页面的源文件
* @author 阿荣
* @since 2013-8-18
*
*/
public class WebClient {
public static String postRequest(String url) throws ConnectTimeoutException, SocketTimeoutException, WebClientException{
return postRequestWithCookie(url, "utf-8", null, null);
}
public static String postRequest(String url, String encoding) throws ConnectTimeoutException, SocketTimeoutException, WebClientException{
return postRequestWithCookie(url, encoding, null, null);
}
public static String postRequestWithCookie(String url, String cookieInfo) throws ConnectTimeoutException, SocketTimeoutException, WebClientException{
return postRequestWithCookie(url, "utf-8", null, cookieInfo);
}
public static String postRequestWithCookie(String url, String encoding, Map<String, String> rawParams, String cookieInfo) throws WebClientException, ConnectTimeoutException, SocketTimeoutException {
return postRequestWithCookie( url, encoding, rawParams, cookieInfo, true);
}
/**
* @param url
* 发送请求的URL
* @param params
* 请求参数
* @return 服务器响应字符串
* @throws WebClientException
* @throws ConnectTimeoutException
* @throws SocketTimeoutException
*/
public static String postRequestWithCookie(String url, String encoding, Map<String, String> rawParams, String cookieInfo, boolean requestLocation) throws WebClientException, ConnectTimeoutException, SocketTimeoutException {
HttpClient httpClient = Proxy.getHttpClient();
// 创建HttpPost对象。
PostMethod postMethod = new PostMethod(url);
postMethod.setDoAuthentication(true);
postMethod.setFollowRedirects(false);
//如果参数不为空则添加参数
if(rawParams != null){
// 如果传递参数个数比较多的话可以对传递的参数进行封装
List<NameValuePair> params = new ArrayList<NameValuePair>();
for (String key : rawParams.keySet()) {
// 封装请求参数
params.add(new NameValuePair(key, rawParams.get(key)));
}
NameValuePair[] array = new NameValuePair[params.size()];
// 设置请求参数
postMethod.setRequestBody(params.toArray(array));
}
//设置cookie
if(cookieInfo != null){
postMethod.getParams().setCookiePolicy(CookiePolicy.RFC_2965);
postMethod.getParams().setParameter("http.protocol.cookie-policy",CookiePolicy.BROWSER_COMPATIBILITY);
postMethod.setRequestHeader("Cookie", cookieInfo);
}
//设置连接超时为20秒
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(20000);
//设置读取超时为20秒
httpClient.getHttpConnectionManager().getParams().setSoTimeout(20000);
int statusCode = 0;
String result = null;
try {
statusCode = httpClient.executeMethod(postMethod);
// System.out.println("type:" + postMethod.getResponseHeader("content-type"));
// 如果服务器成功地返回响应
if (statusCode == 200 || statusCode == 201) {
// 获取服务器响应字符串
postMethod.getParams().setParameter(HttpMethodParams.HTTP_CONTENT_CHARSET, encoding);
result = postMethod.getResponseBodyAsString();
}else if (statusCode == 302) {
// 重定向
String location = postMethod.getResponseHeader("Location").getValue();
if(requestLocation){
return postRequestWithCookie(location, encoding, rawParams, cookieInfo);
}else{
return location;
}
}
} catch (SocketTimeoutException e1){
throw e1;
} catch (ConnectTimeoutException e1){
throw e1;
} catch (HttpException e1) {
throw new ConnectTimeoutException(url + ":连接异常");
} catch (IOException e1) {
throw new WebClientException(url + ":IO异常,请检查网络是否正常");
} finally{
postMethod.releaseConnection();
}
return result;
}
/**
* 通过post方式不携带cookie信息请求,并获取cookie信息
* @param url
* @param encoding
* @param rawParams
* @return
* @throws WebClientException
* @throws ConnectTimeoutException
* @throws SocketTimeoutException
*/
public static String getCookieByPostWithoutCookie(String url, String encoding, Map<String, String> rawParams) throws WebClientException, ConnectTimeoutException, SocketTimeoutException {
return getCookieByPostWithCookie(url, encoding, rawParams, null);
}
/**
* 通过post方式携带cookie信息请求,并获取cookie信息
* @param url
* @param encoding
* @param rawParams
* @param cookieInfo
* @return
* @throws WebClientException
* @throws ConnectTimeoutException
* @throws SocketTimeoutException
*/
public static String getCookieByPostWithCookie(String url, String encoding, Map<String, String> rawParams, String cookieInfo) throws WebClientException, ConnectTimeoutException, SocketTimeoutException {
HttpClient httpClient = Proxy.getHttpClient();
// 创建HttpPost对象。
PostMethod postMethod = new PostMethod(url);
postMethod.setDoAuthentication(true);
postMethod.setFollowRedirects(false);
//如果参数不为空则添加参数
if(rawParams != null){
// 如果传递参数个数比较多的话可以对传递的参数进行封装
List<NameValuePair> params = new ArrayList<NameValuePair>();
for (String key : rawParams.keySet()) {
// 封装请求参数
params.add(new NameValuePair(key, rawParams.get(key)));
}
NameValuePair[] array = new NameValuePair[params.size()];
// 设置请求参数
postMethod.setRequestBody(params.toArray(array));
}
//设置cookie
if(cookieInfo != null){
postMethod.getParams().setCookiePolicy(CookiePolicy.RFC_2965);
postMethod.getParams().setParameter("http.protocol.cookie-policy",CookiePolicy.BROWSER_COMPATIBILITY);
postMethod.setRequestHeader("Cookie", cookieInfo);
}
//设置连接超时为20秒
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(20000);
//设置读取超时为20秒
httpClient.getHttpConnectionManager().getParams().setSoTimeout(20000);
int statusCode = 0;
String result = "";
try {
statusCode = httpClient.executeMethod(postMethod);
// System.out.println("statusCode:" + statusCode);
// 如果服务器成功地返回响应
if (statusCode == 200 || statusCode == 201) {
// 查看 cookie 信息
Cookie[] cookies = httpClient.getState().getCookies();
// System.out.println("body:" + postMethod.getResponseBodyAsString());
if (cookies.length == 0) {
System.out.println( "None" );
} else {
for ( int i = 0; i < cookies.length; i++) {
result += cookies[i].toString() + ";";
// System.out.println(cookies[i].toString());
}
}
}else if (statusCode == 302) {
// 重定向
String location = postMethod.getResponseHeader("Location").getValue();
return getCookieByPostWithCookie(location, encoding, rawParams, cookieInfo);
}
} catch (SocketTimeoutException e1){
throw e1;
} catch (ConnectTimeoutException e1){
throw e1;
} catch (HttpException e1) {
throw new ConnectTimeoutException(url + ":连接异常");
} catch (IOException e1) {
throw new WebClientException(url + ":IO异常,请检查网络是否正常");
} finally{
postMethod.releaseConnection();
}
return result;
}
public static InputStream postRequestAsStream(String url) throws ConnectTimeoutException, SocketTimeoutException{
try {
return postRequestAsStreamWithCookie(url, "utf-8", null, null);
} catch (WebClientException e) {
e.printStackTrace();
}
return null;
}
public static InputStream postRequestAsStream(String url, String encoding) throws ConnectTimeoutException, SocketTimeoutException{
try {
return postRequestAsStreamWithCookie(url, encoding, null, null);
} catch (WebClientException e) {
e.printStackTrace();
}
return null;
}
public static InputStream postRequestAsStreamWithCookie(String url, String cookieInfo) throws ConnectTimeoutException, SocketTimeoutException{
try {
return postRequestAsStreamWithCookie(url, "utf-8", null, cookieInfo);
} catch (WebClientException e) {
e.printStackTrace();
}
return null;
}
/**
* @param url
* 发送请求的URL
* @param params
* 请求参数
* @return 服务器响应字符串
* @throws WebClientException
* @throws ConnectTimeoutException
* @throws SocketTimeoutException
*/
public static InputStream postRequestAsStreamWithCookie(String url, String encoding, Map<String, String> rawParams, String cookieInfo) throws WebClientException, ConnectTimeoutException, SocketTimeoutException {
HttpClient httpClient = Proxy.getHttpClient();
// 创建HttpPost对象。
PostMethod postMethod = new PostMethod(url);
postMethod.setDoAuthentication(true);
postMethod.setFollowRedirects(false);
//如果参数不为空则添加参数
if(rawParams != null){
// 如果传递参数个数比较多的话可以对传递的参数进行封装
List<NameValuePair> params = new ArrayList<NameValuePair>();
for (String key : rawParams.keySet()) {
// 封装请求参数
params.add(new NameValuePair(key, rawParams.get(key)));
}
NameValuePair[] array = new NameValuePair[params.size()];
// 设置请求参数
postMethod.setRequestBody(params.toArray(array));
}
//设置cookie
if(cookieInfo != null){
postMethod.getParams().setCookiePolicy(CookiePolicy.RFC_2965);
postMethod.getParams().setParameter("http.protocol.cookie-policy",CookiePolicy.BROWSER_COMPATIBILITY);
postMethod.setRequestHeader("Cookie", cookieInfo);
}
//设置连接超时为20秒
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(20000);
//设置读取超时为20秒
httpClient.getHttpConnectionManager().getParams().setSoTimeout(20000);
int statusCode = 0;
InputStream result = null;
try {
statusCode = httpClient.executeMethod(postMethod);
// 如果服务器成功地返回响应
if (statusCode == 200 || statusCode == 201) {
// 获取服务器响应流
result = postMethod.getResponseBodyAsStream();
}else if (statusCode == 302) {
// 重定向
String location = postMethod.getResponseHeader("Location").getValue();
return postRequestAsStreamWithCookie(location, encoding, rawParams, cookieInfo);
}
} catch (SocketTimeoutException e1){
throw e1;
} catch (ConnectTimeoutException e1){
throw e1;
} catch (HttpException e1) {
throw new ConnectTimeoutException(url + ":连接异常");
} catch (IOException e1) {
throw new WebClientException(url + ":IO异常,请检查网络是否正常");
}
return result;
}
public static InputStream getStreamUseJava(final String urlString)
throws IOException,SocketTimeoutException,ConnectTimeoutException, KeyManagementException, NoSuchAlgorithmException {
return getStreamUseJavaWithCookie(urlString, null);
}
public static InputStream getStreamUseJavaWithCookie(final String urlString, final String cookie)
throws IOException,SocketTimeoutException,ConnectTimeoutException, KeyManagementException, NoSuchAlgorithmException {
String nURL = (urlString.startsWith("http://") || urlString
.startsWith("https://")) ? urlString : ("http:" + urlString)
.intern();
String method = "GET";
String post = null;
String digest = null;
InputStream inputStream = null;
boolean foundRedirect = false;
Map<String, String> headers = new HashMap<String, String>();
//URL url = new URL(nURL);
try{
do {
HttpURLConnection urlConnection = null;
if(Proxy.getNetProxy() != null){
urlConnection = HttpsUtils.getConnection(nURL, Proxy.getNetProxy());
if(Proxy.username != null && !"".equals(Proxy.username) && Proxy.pwd != null && !"".equals(Proxy.pwd)){
//格式如下:
//"Proxy-Authorization"= "Basic Base64.encode(user:password)"
String headerKey = "Proxy-Authorization";
String headerValue = "Basic " + Base64.encodeBase64((Proxy.username+":"+Proxy.pwd).getBytes());
urlConnection.setRequestProperty(headerKey, headerValue);
}
}else{
urlConnection = HttpsUtils.getConnection(nURL, null);
}
// 添加访问授权
if (digest != null) {
urlConnection.setRequestProperty("Authorization", digest);
}
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.setUseCaches(false);
urlConnection.setInstanceFollowRedirects(false);
urlConnection.setRequestMethod(method);
urlConnection.setConnectTimeout(20000);
urlConnection.setReadTimeout(20000);
//模拟http头文件
urlConnection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
urlConnection.setRequestProperty("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, */*");
if(cookie != null){
urlConnection.setRequestProperty("Cookie", cookie);
}
//追加http头文件
Set<Entry<String, String>> headersSet = headers.entrySet();
for (Iterator<Entry<String, String>> it = headersSet.iterator(); it.hasNext();) {
Entry<String, String> entry = (Entry<String, String>) it.next();
urlConnection.setRequestProperty((String) entry.getKey(),
(String) entry.getValue());
}
if (post != null) {
OutputStreamWriter outRemote = new OutputStreamWriter(
urlConnection.getOutputStream());
outRemote.write(post);
outRemote.flush();
}
// 获得响应状态
int responseCode = urlConnection.getResponseCode();
if (responseCode == 302) {
// 重定向
String location = urlConnection.getHeaderField("Location");
nURL = location;
foundRedirect = true;
} else {
if (responseCode == 200 || responseCode == 201) {
inputStream = urlConnection.getInputStream();
}else{
// 获得返回的数据长度
int responseLength = urlConnection.getContentLength();
BufferedInputStream in;
if (responseCode == 200 || responseCode == 201) {
in = new BufferedInputStream(urlConnection.getInputStream());
} else {
in = new BufferedInputStream(urlConnection.getErrorStream());
}
int size = responseLength == -1 ? 4096 : responseLength;
String responseContent = null;
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] bytes = new byte[size];
int read;
while ((read = in.read(bytes)) >= 0) {
out.write(bytes, 0, read);
}
responseContent = new String(out.toByteArray());
in.close();
out.close();
System.out.println(responseContent);
}
foundRedirect = false;
}
// 如果重定向则继续
} while (foundRedirect);
}catch (SocketTimeoutException e) {
//捕获到超时,不再请资源,返回null
throw e;
}catch (ConnectTimeoutException e) {
//捕获到超时,不再请资源,返回null
throw e;
}
return inputStream;
}
public static String getRequestUseJava(final String urlString,
final String encoding)
throws IOException, KeyManagementException, NoSuchAlgorithmException {
return getRequestUseJavaWithCookie(urlString, encoding, null);
}
/**
* 向指定url发送请求并获得响应数据(使用原生JDK API)
*
* @param urlString
* @param encoding
* @param parameter
* @return
* @throws IOException
* @throws NoSuchAlgorithmException
* @throws KeyManagementException
*/
public static String getRequestUseJavaWithCookie(final String urlString,
final String encoding, String cookie)
throws IOException, KeyManagementException, NoSuchAlgorithmException {
String nURL = (urlString.startsWith("http://") || urlString
.startsWith("https://")) ? urlString : ("http:" + urlString)
.intern();
String method = "GET";
String post = null;
String digest = null;
String responseContent = null;
boolean foundRedirect = false;
Map<String, String> headers = new HashMap<String, String>();
//URL url = new URL(nURL);
try{
do {
HttpURLConnection urlConnection = null;
if(Proxy.getNetProxy() != null){
urlConnection = HttpsUtils.getConnection(nURL, Proxy.getNetProxy());
if(Proxy.username != null && !"".equals(Proxy.username) && Proxy.pwd != null && !"".equals(Proxy.pwd)){
//格式如下:
//"Proxy-Authorization"= "Basic Base64.encode(user:password)"
String headerKey = "Proxy-Authorization";
String headerValue = "Basic " + Base64.encodeBase64((Proxy.username+":"+Proxy.pwd).getBytes());
urlConnection.setRequestProperty(headerKey, headerValue);
}
}else{
urlConnection = HttpsUtils.getConnection(nURL, null);
}
// 添加访问授权
if (digest != null) {
urlConnection.setRequestProperty("Authorization", digest);
}
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.setUseCaches(false);
urlConnection.setInstanceFollowRedirects(false);
urlConnection.setRequestMethod(method);
//模拟http头文件
urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0;)");
urlConnection.setRequestProperty("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, */*");
if(cookie != null){
urlConnection.setRequestProperty("Cookie", cookie);
}
//追加http头文件
Set<Entry<String, String>> headersSet = headers.entrySet();
for (Iterator<Entry<String, String>> it = headersSet.iterator(); it.hasNext();) {
Entry<String, String> entry = (Entry<String, String>) it.next();
urlConnection.setRequestProperty((String) entry.getKey(),
(String) entry.getValue());
}
if (post != null) {
OutputStreamWriter outRemote = new OutputStreamWriter(
urlConnection.getOutputStream());
outRemote.write(post);
outRemote.flush();
}
// 获得响应状态
int responseCode = urlConnection.getResponseCode();
// 获得返回的数据长度
int responseLength = urlConnection.getContentLength();
if (responseCode == 302) {
// 重定向
String location = urlConnection.getHeaderField("Location");
nURL = location;
foundRedirect = true;
} else {
BufferedInputStream in;
if (responseCode == 200 || responseCode == 201) {
in = new BufferedInputStream(urlConnection.getInputStream());
} else {
in = new BufferedInputStream(urlConnection.getErrorStream());
}
int size = responseLength == -1 ? 4096 : responseLength;
if (encoding != null) {
responseContent = read(in, size, encoding);
} else {
ByteArrayOutputStream out = new ByteArrayOutputStream();
byte[] bytes = new byte[size];
int read;
while ((read = in.read(bytes)) >= 0) {
out.write(bytes, 0, read);
}
responseContent = new String(out.toByteArray());
in.close();
out.close();
}
foundRedirect = false;
}
// 如果重定向则继续
} while (foundRedirect);
}catch (SocketTimeoutException e) {
//捕获到超时,不再请资源,返回null
}
return responseContent;
}
/**
* 向指定url发送请求并获得响应数据(使用原生JDK API)
*
* @param urlString
* @param encoding
* @param parameter
* @return
* @throws IOException
*/
public static String getCookieUseJava(final String urlString,
final String encoding)
throws IOException {
String nURL = (urlString.startsWith("http://") || urlString
.startsWith("https://")) ? urlString : ("http:" + urlString)
.intern();
String method = "GET";
String post = null;
String digest = null;
String cookie = "";
boolean foundRedirect = false;
Map<String, String> headers = new HashMap<String, String>();
URL url = new URL(nURL);
try{
do {
HttpURLConnection urlConnection = null;
if(Proxy.getNetProxy() != null){
urlConnection = (HttpURLConnection) url
.openConnection(Proxy.getNetProxy());
if(Proxy.username != null && !"".equals(Proxy.username) && Proxy.pwd != null && !"".equals(Proxy.pwd)){
//格式如下:
//"Proxy-Authorization"= "Basic Base64.encode(user:password)"
String headerKey = "Proxy-Authorization";
String headerValue = "Basic " + Base64.encodeBase64((Proxy.username+":"+Proxy.pwd).getBytes());
urlConnection.setRequestProperty(headerKey, headerValue);
}
}else{
urlConnection = (HttpURLConnection) url
.openConnection();
}
// 添加访问授权
if (digest != null) {
urlConnection.setRequestProperty("Authorization", digest);
}
urlConnection.setDoOutput(true);
urlConnection.setDoInput(true);
urlConnection.setUseCaches(false);
urlConnection.setInstanceFollowRedirects(false);
urlConnection.setRequestMethod(method);
//模拟http头文件
urlConnection.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 7.0;)");
urlConnection.setRequestProperty("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, */*");
//追加http头文件
Set<Entry<String, String>> headersSet = headers.entrySet();
for (Iterator<Entry<String, String>> it = headersSet.iterator(); it.hasNext();) {
Entry<String, String> entry = (Entry<String, String>) it.next();
urlConnection.setRequestProperty((String) entry.getKey(),
(String) entry.getValue());
}
if (post != null) {
OutputStreamWriter outRemote = new OutputStreamWriter(
urlConnection.getOutputStream());
outRemote.write(post);
outRemote.flush();
}
// 获得响应状态
int responseCode = urlConnection.getResponseCode();
if (responseCode == 302) {
// 重定向
String location = urlConnection.getHeaderField("Location");
url = new URL(location);
foundRedirect = true;
} else {
if (responseCode == 200 || responseCode == 201) {
String key = null;
for (int i = 1; (key = urlConnection.getHeaderFieldKey(i)) != null; i++){
System.out.print(key+":");
System.out.println(urlConnection.getHeaderField(key));
}
cookie = urlConnection.getHeaderField("set-cookie");
}
foundRedirect = false;
}
// 如果重定向则继续
} while (foundRedirect);
}catch (SocketTimeoutException e) {
//捕获到超时,不再请资源,返回null
}
return cookie;
}
/**
* 转化InputStream为String
*
* @param in
* @param size
* @return
* @throws IOException
*/
private static String read(final InputStream in, final int size,
final String encoding){
StringBuilder sbr = new StringBuilder();
int nSize = size;
if (nSize == 0) {
nSize = 1;
}
char[] buffer = new char[nSize];
int offset = 0;
InputStreamReader isr = null;
try {
isr = new InputStreamReader(in, encoding);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
try {
while ((offset = isr.read(buffer)) != -1) {
sbr.append(buffer, 0, offset);
}
} catch (IOException e) {
e.printStackTrace();
}
try {
in.close();
isr.close();
} catch (IOException e) {
e.printStackTrace();
}
return sbr.toString();
}
public static String read(final InputStream in, final Integer start){
byte[] buffer = new byte[4092];
int offset = 0;
int cursize = 0;
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
// in.skip(start);
while ((offset = in.read(buffer)) != -1) {
cursize += offset;
if(cursize >= start){
out.write(buffer, 0, offset);
}
}
} catch (IOException e) {
e.printStackTrace();
}
try {
in.close();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
try {
return new String(out.toByteArray(), "utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return null;
}
public static String getUrlStr(String url, Map<String, ?> params){
if(params != null){
Iterator<String> it = params.keySet().iterator();
String key;
while (it.hasNext()) {
key = it.next();
url += key + "=" + params.get(key) + "&";
}
url = url.substring(0, url.length() - 1);
}
return url;
}
}