package com.jzh.news.control;
import java.io.BufferedReader;
import java.io.IOException;
import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.jzh.news.dao.News_contentDaoImpl;
import com.jzh.news.entity.News_content;
public class saveNews {
public static void main(String[] args) {
runTask();
}
static URL url;
static BufferedReader br;
static Document doc = null;
public static void runTask() {
// execute �з���ִ�еķ���
// String[] geturls =
// {"http://www.nmc.gov.cn/publish/weather/capital.html"
// ,"http://www.nmc.gov.cn/publish/weather/range.html" };
// String[] geturls = { "http://www.nmc.cn/publish/forecast/china.html"
// };
String[] geturls = { "http://blog.csdn.net/lmj623565791?viewmode=contents" };
for (int i = 0; i < geturls.length; i++) {
String url = geturls[i];
getweather(url);
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
System.out.println("time end:" + sdf.format(new Date()));
}
public static void getweather(String urlstring) {
try {
// url = new URL(urlstring);
// doc = Jsoup.parse(url, 5000);
doc = Jsoup
.connect(urlstring)
.header(
"User-Agent",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2")
.get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Elements es_cnames2 = doc.getElementsByClass("list_item");
List<String> list ;
List<String> lists ;
for (int i = es_cnames2.size() - 1; i >= 0; i--) {
list = new ArrayList<String>();
lists = new ArrayList<String>();
String title = es_cnames2.get(i).getElementsByClass("link_title")
.text();
String date = es_cnames2.get(i).getElementsByClass("link_postdate")
.text();
Elements e1s = es_cnames2.get(i).getElementsByClass("link_title");
Element e1 = e1s.get(0);
Elements e11 = e1.getElementsByTag("a");
String str = e11.attr("href");
System.out
.println("----------------------------------��ʼ----------------------------------");
System.out.println(title + "\t" + date + "\t" + str);
try {
Document docs = Jsoup
.connect("http://blog.csdn.net" + str)
.header(
"User-Agent",
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.4; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2")
.get();
Element t1 = docs.getElementById("article_content");
Elements t2 = t1.getElementsByTag("p");
Elements t3 = t1.getElementsByTag("span");
StringBuilder sb = new StringBuilder();
for (int j = 1; j < t2.size(); j++) {
sb.append(t2.get(j).text() + "\n\n");
Elements e111 = t2.get(j).getElementsByTag("img");
for (int k = 0; k < e111.size(); k++) {
sb.append(";;" + e111.attr("src") +";;"+ "\n\n");
list.add(sb.toString());
lists.add(e111.attr("src"));
sb = new StringBuilder();
}
}
sb.append("������Ϣ��\n\n");
sb.append(t3.text() + "\n\n");
list.add(sb.toString());
String st = list.toString();
News_contentDaoImpl cdi = new News_contentDaoImpl();
News_content news = new News_content();
news.setCcontent(st);
if(lists.size()>0){
news.setCimage(lists.get(0));
}
news.setCtime(date);
news.setCtype("android����");
news.setCtitle(title);
news.setCauthor("hongyang");
news.setCzhaiyao(st);
news.setCpinglun("0");
if (cdi.save(news)) {
System.out.println("����ɹ���");
}
System.out
.println("----------------------------------����----------------------------------");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}