package reptile.bcgm;
import java.io.File;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import utils.FileUtils;
import utils.HttpUtils;
public class BCGMUtils {
public static String DOMAIN_URL = "http://www.a-hospital.com/w/";
public static String MAIN_URL = DOMAIN_URL
+ "%E6%9C%AC%E8%8D%89%E7%BA%B2%E7%9B%AE#.E8.8D.89.E9.83.A8";
public static String IMG_URL = DOMAIN_URL
+ "%E4%B8%AD%E8%8D%AF%E5%9B%BE%E5%85%B8";
public static List<CaoMain> getMainUrl() {
// String response = HttpUtils.getString(mainUrl);
String response = FileUtils.readToString(
new File("temp" + File.separator + "reptile" + File.separator
+ "bcgm_main.txt"), "UTF-8");
String type = null;
List<CaoMain> caos = new ArrayList<CaoMain>();
Document parse = Jsoup.parse(response);
Elements allElements = parse.getAllElements();
for (int i = 0; i < allElements.size(); i++) {
Element element = allElements.get(i);
// <span class="mw-headline"
// id=".E8.8D.89.E9.83.A8">草部</span>
String attrClass = element.attr("class");
if ("mw-headline".equals(attrClass + "")) {
type = element.text();
continue;
}
// <a
// href="/w/%E6%9C%AC%E8%8D%89%E7%BA%B2%E7%9B%AE/%E7%94%98%E8%8D%89"
// title="本草纲目/甘草">甘草</a>
String title = element.attr("title");
String href = element.attr("href");
if (type != null && href != null && title != null
&& title.startsWith("本草纲目/")) {
CaoMain cao = new CaoMain();
cao.setType(type);
cao.setName(element.text());
cao.setHref(DOMAIN_URL + href);
caos.add(cao);
}
}
return caos;
}
public static List<CaoImg> getImgData(String url) throws Exception {
String response = HttpUtils.getString(url);
Document parse = Jsoup.parse(response);
Elements allElements = parse.getAllElements();
List<CaoImg> caoImgs = new ArrayList<CaoImg>();
for (int i = 0; i < allElements.size(); i++) {
Element element = allElements.get(i);
// <table class="wikitable"
// style="width: 22em; position: absolute; top: 0px; left: 0px;">
String nodeName = element.nodeName();
String attrClass = element.attr("class");
if (nodeName.equals("table") && "wikitable".equals(attrClass + "")) {
String title = element.getElementsByAttribute("title").get(0)
.attr("title");
Elements imgElement = element.getElementsByTag("img");
String src = imgElement.attr("src");
Elements styleElements = element
.getElementsByAttributeValueContaining("style",
"font-size");
String otherName = null;
String intro = null;
if (styleElements.size() == 1) {
intro = styleElements.get(0).text();
} else {
otherName = styleElements.get(0).text();
intro = styleElements.get(1).text();
}
CaoImg caoImg = new CaoImg();
caoImg.setName(title);
caoImg.setImg(src);
caoImg.setOtherName(otherName);
caoImg.setIntro(intro);
caoImgs.add(caoImg);
}
}
return caoImgs;
}
public static String getDetailData(String caoName) throws Exception {
String detail = null;
String response = null;
response = HttpUtils.getString(DOMAIN_URL
+ URLEncoder.encode(caoName, "UTF-8"));
// String response = FileUtils.readToString(
// new File("temp" + File.separator + "reptile" + File.separator
// + "bcgm_detail.txt"), "UTF-8");
Document parse = Jsoup.parse(response);
StringBuilder sb = new StringBuilder();
Elements pes = parse.getElementsByTag("p");
for (Element e : pes) {
String text = e.text();
if (text.startsWith("--")) {
break;
}
sb.append(e.text() + "<br/>");
}
detail = sb.toString().trim();
return detail;
}
}