package edu.fudan.weixin.crawler.actions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import net.sf.ehcache.Cache;
import net.sf.ehcache.CacheManager;
import net.sf.ehcache.Element;
import org.apache.struts2.ServletActionContext;
import org.apache.struts2.convention.annotation.Action;
import org.apache.struts2.convention.annotation.Namespace;
import org.apache.struts2.convention.annotation.ParentPackage;
import org.apache.struts2.convention.annotation.Result;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.nodes.TextNode;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import com.mongodb.DBObject;
import edu.fudan.eservice.common.utils.CommonUtil;
import edu.fudan.eservice.common.utils.EncodeHelper;
import edu.fudan.eservice.common.utils.MongoUtil;
import edu.fudan.weixin.entity.News;
@ParentPackage("servicebase")
@Namespace("/crawler")
public class CampusEventAction extends CrawlerBase {
/**
*
*/
private static final long serialVersionUID = 8934539069991515679L;
protected static final String RD="http://news.fudan.edu.cn";
@SuppressWarnings("rawtypes")
@Action(value = "eventlist")
public String list() throws IOException {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "eventlist"+page ;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/calendar/?a=list&&m=recent&range=30&_="+System.currentTimeMillis()+"&type=0&place=0&type="+page );
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
try {
NodeList ls = p
.extractAllNodesThatMatch(new HasAttributeFilter("class","clear"));
if(ls.size()==2)
{
int tk1=ls.elementAt(0).getEndPosition();
int tk2=ls.elementAt(1).getStartPosition();
ServletActionContext.getResponse().setCharacterEncoding("utf-8");
p=Parser.createParser(retstr.substring(tk1+6, tk2), "utf-8");
NodeList nl=p.parse(null);
NodeList links=nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class),true);
SimpleNodeIterator i=links.elements();
while(i.hasMoreNodes())
{
LinkTag lt=(LinkTag)i.nextNode();
NodeList ll=new NodeList();
ll.add(new TextNode(lt.getAttribute("title")));
lt.setChildren(ll);
lt.removeAttribute("title");
}
ServletActionContext.getResponse().getWriter().print(nl.toHtml());
}
} catch (ParserException e) {
e.printStackTrace();
}
}
return NONE;
}
@Action(value = "eventcontent", results = { @Result(type = "json", params = {
"root", "en" }) })
public String content() {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "eventcontent" + newsid;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
en = (News) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/calendar/?a=one&evid="
+ newsid+"&_="+System.currentTimeMillis());
Parser p = Parser.createParser(retstr.toString(), "utf-8");
try {
NodeList nl = p.extractAllNodesThatMatch(new OrFilter(
new TagNameFilter("h1"), new TagNameFilter("table")));
SimpleNodeIterator i = nl.elements();
en = new News();
en.setId(newsid);
while (i.hasMoreNodes()) {
Node n = i.nextNode();
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
if (tn.getTagName().equalsIgnoreCase("h1"))
en.setTitle(tn.toPlainTextString());
if (tn.getTagName().equalsIgnoreCase("table")) {
en.setContent(tn.toHtml());
}
}
}
String str=retstr.toString().trim();
int tk=retstr.indexOf("imageurl");
if(tk>0)
{
tk=retstr.indexOf("'",tk);
int tk1=retstr.indexOf("'", tk+1);
String imgurl=RD+str.substring(tk+1,tk1);
String imgid = EncodeHelper.digest(
imgurl, "MD5");
BasicDBObject obj = new BasicDBObject("id",
imgid);
DBCollection col = MongoUtil.getInstance().getDB()
.getCollection("CrawlerImages");
DBObject dbo = col.findOne(obj);
if (dbo == null)
col.save(obj.append("url",imgurl));
en.setPubdate(imgid);
}
} catch (ParserException e) {
e.printStackTrace();
}
if (!CommonUtil.isEmpty(en) && !CommonUtil.isEmpty(en.getContent()))
c.put(new Element(ckey, en));
}
return SUCCESS;
}
}