package org.jcommons.net;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.Connection.Response;
import org.jsoup.nodes.Document;
public class JsoupUtils {
public static Document fetch(String url,boolean force,StringBuffer realUrl){
Response res = null;
boolean flag = false;
int times = 100;
while (!flag) {
try {
res = Jsoup.connect(url).timeout(5000).execute();
//times--;
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
} catch (IOException e) {
// TODO Auto-generated catch block
times--;
if(!force){
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
}
if (times < 0 && res == null)
flag = true;
}
}
if (res == null || res.statusCode() != 200){
realUrl = null;
return null;
}
realUrl.append(res.url().toString());
Document doc = Jsoup.parse(res.body());
return doc;
}
public static Document fetch(String url,boolean force){
Response res = null;
boolean flag = false;
int times = 1000;
while (!flag) {
try {
res = Jsoup.connect(url).timeout(5000).header("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0").execute();
//times--;
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
} catch (IOException e) {
// TODO Auto-generated catch block
times--;
if(!force){
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
}
if (times < 0 && res == null)
flag = true;
}
}
if (res == null || res.statusCode() != 200){
return null;
}
// String html = "";
// try {
// html = new String(res.body().getBytes("utf-8"),"gb2312");
// } catch (UnsupportedEncodingException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
Document doc = Jsoup.parse(res.body());
return doc;
}
public static Document fetch(String url,Map<String,String> cookies, boolean force){
Response res = null;
boolean flag = false;
int times = 1000;
while (!flag) {
try {
res = Jsoup.connect(url).timeout(5000).cookies(cookies).execute();
//times--;
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
} catch (IOException e) {
// TODO Auto-generated catch block
times--;
if(!force){
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
}
if (times < 0 && res == null)
flag = true;
}
}
if (res == null || res.statusCode() != 200){
return null;
}
String html = "";
try {
html = new String(res.body().getBytes("utf-8"),"gb2312");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Document doc = Jsoup.parse(res.body());
return doc;
}
public static Document fetch(String url,int times,boolean force){
Response res = null;
boolean flag = false;
while (!flag) {
try {
res = Jsoup.connect(url).timeout(5000).execute();
//times--;
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
} catch (IOException e) {
// TODO Auto-generated catch block
times--;
if(!force){
if (res != null
&& (res.statusCode() == 200 || res
.statusCode() == 404))
flag = true;
}
if (times < 0 && res == null)
flag = true;
}
}
if (res == null || res.statusCode() != 200){
return null;
}
Document doc = Jsoup.parse(res.body());
return doc;
}
}