package com.roboo.like.google.news.list.utils;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedList;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.roboo.like.google.models.NewsItem;
import com.roboo.like.google.utils.MD5Utils;
public class CSDNNewsListUtils extends BaseNewsListUtils
{
public static LinkedList<NewsItem> getCSDNNewsList(String csdnUrl, int pageNo) throws IOException
{
// http://mobile.csdn.net/mobile/2 【移动开发】
LinkedList<NewsItem> data = null;
Document document;
Elements elements;
Elements majorElements;
Element majorElement;
Element minorElement;
String url = csdnUrl + pageNo;
document = Jsoup.connect(url).get();
majorElements = document.getElementsByClass("unit");
String title = null, source ="CSDN",subTitle = null, md5 = null, time = null, src = null, newsUrl = null;
md5 = MD5Utils.generate(url);
if (!majorElements.isEmpty())
{
data = new LinkedList<NewsItem>();
for (int i = 0; i < majorElements.size(); i++)
{
majorElement = majorElements.get(i);
elements = majorElement.getElementsByTag("h1");
if (!elements.isEmpty())
{
minorElement = elements.get(0);
title = minorElement.text();
System.out.println("title = " + title);
}
elements = majorElement.getElementsByClass("ago");
if (!elements.isEmpty())
{
minorElement = elements.get(0);
time = minorElement.text();
System.out.println("time = " + time);
if (!time.contains("小时前"))
{
if (time.contains(" "))
{
String date = time.split(" ")[0];
if (date.contains("-") && date.split("-").length > 2)
{
time = date.split("-")[1] + "月" + date.split("-")[2] + "日";
}
System.out.println("date = " + date);
}
}
else
{
time = new SimpleDateFormat("MM月dd日").format(new Date(System.currentTimeMillis()));
}
}
elements = majorElement.getElementsByTag("a");
if (!elements.isEmpty())
{
minorElement = elements.get(0);
newsUrl = minorElement.attr("href");
// System.out.println("newsUrl = " + newsUrl);
}
elements = majorElement.getElementsByTag("img");
if (!elements.isEmpty())
{
minorElement = elements.get(0);
src = minorElement.attr("src");
System.out.println("img = " + src);
}
elements = majorElement.getElementsByTag("dd");
if (!elements.isEmpty())
{
minorElement = elements.get(0);
subTitle = minorElement.text();
System.out.println("subTitle = " + subTitle);
System.out.println("\n");
}
NewsItem item = new NewsItem();
item.setSrc(src);
item.setTime(time);
item.setMd5(md5);
item.setUrl(newsUrl);
item.setTitle(title);
item.setSource(source);
item.setSubTitle(subTitle);
data.add(item);
}
}
return data;
}
@Override
public LinkedList<NewsItem> getNewsList(String baseUrl, int pageNo) throws Exception
{
return getCSDNNewsList(baseUrl, pageNo);
}
}