/**
* @Title: BstSearchHelper.java
* @Description: TODO
* @author: Calvinyang
* @date: Dec 22, 2014 4:40:33 PM
* Copyright: Copyright (c) 2013
* @version: 1.0
*/
package edu.fudan.weixin.utils;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.fudan.eservice.common.utils.CommonUtil;
import edu.fudan.weixin.model.message.NewsJSONMessageBuilder;
import edu.fudan.weixin.model.message.NewsMessageBuilder;
/**
* @author: Calvinyang
* @Description: TODO
* @date: Dec 22, 2014 4:40:33 PM
*/
public class BstSearchHelper {
private static Log log = LogFactory.getLog(BstSearchHelper.class);
/**
*
* @Title: getNewsForKeywords
* @Description: 根据指定关键词搜索百事通 并将结果解析为图文列表数据
* @param keywords
* @return
*/
public static NewsMessageBuilder getNewsForKeywords(String keywords) {
String urlstr = null;
try {
urlstr = "http://baishitong.fudan.edu.cn/index.php?limit=10&fulltext=Search&search=" + URLEncoder.encode(keywords, "utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
log.error(e.getMessage());
return null;
}
StringBuffer ret = null;
try {
ret = CommonUtil.getWebContent(urlstr);
} catch (Exception e) {
e.printStackTrace();
log.error(e.getMessage());
return null;
}
int pos = ret.indexOf("mw-search-result-heading");
NewsMessageBuilder mb = new NewsJSONMessageBuilder();
int atcount = 0;
while (pos > 0 && atcount < 10) {
pos = ret.indexOf("href=", pos);
String ttitle = "", turl = "", tcontent = "";
int pos2 = -1;
if (pos > 0) {
pos = ret.indexOf("\"", pos + 1);
pos2 = ret.indexOf("\"", pos + 1);
turl = "http://baishitong.fudan.edu.cn" + ret.substring(pos + 1, pos2);
}
pos = ret.indexOf("title=", pos2);
if (pos > 0) {
pos = ret.indexOf("\"", pos + 1);
pos2 = ret.indexOf("\"", pos + 1);
ttitle = ret.substring(pos + 1, pos2);
}
pos = ret.indexOf("searchresult", pos2);
if (pos > 0) {
pos = ret.indexOf(">", pos + 1);
pos2 = ret.indexOf("</div>", pos + 1);
tcontent = ret.substring(pos + 1, pos2);
tcontent = tcontent.replaceAll("(<.+>|\\[|\\])", "");
}
atcount++;
mb.addArticle(ttitle, tcontent, turl, "");
pos = ret.indexOf("mw-search-result-heading", pos2);
}
if (atcount == 0)
mb.addArticle("没有查询到相关信息", "Nothing was found", "", "");
mb.setContent(null);
System.gc();
return mb;
}
}