/**
* @Title: BstProxyAction.java
* @Description: TODO
* @author: Calvinyang
* @date: Nov 11, 2014 9:09:49 AM
* Copyright: Copyright (c) 2013
* @version: 1.0
*/
package edu.fudan.weixin.crawler.actions;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.net.URLEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.struts2.ServletActionContext;
import org.apache.struts2.convention.annotation.Action;
import org.apache.struts2.convention.annotation.Namespace;
import org.apache.struts2.convention.annotation.ParentPackage;
import org.apache.struts2.convention.annotation.Result;
import edu.fudan.eservice.common.utils.Config;
/**
* @author: Calvinyang
* @Description: TODO
* @date: Nov 11, 2014 9:09:49 AM
*/
@ParentPackage("servicebase")
@Namespace("/crawler")
@SuppressWarnings("serial")
public class BstProxyAction extends CrawlerBase {
private InputStream inputStream;
private String contentType;
@Override
@Action(value = "bst", results = { @Result(type = "stream") })
public String execute() throws Exception {
String targetUrl = ServletActionContext.getRequest().getParameter("targetUrl");
if (targetUrl == null) {
targetUrl = "http://baishitong.fudan.edu.cn/index.php?title=%E9%A6%96%E9%A1%B5";
} else {
targetUrl = "http://baishitong.fudan.edu.cn/index.php?title=" + targetUrl;
}
targetUrl += "&mobileaction=toggle_view_mobile";
StringBuffer retstr = fetch(targetUrl);
// 内链处理
String html = retstr.toString();
String html2 = retstr.toString();
Pattern p = Pattern.compile("(?<=href=\").*?(?=\")");
Matcher m = p.matcher(html);
while (m.find()) {
String link = m.group();
if (link.startsWith("/wiki")) {
html2 = html2.replace(link, Config.getInstance().get("bstProxy.baseUrl") + URLEncoder.encode(link.substring(6), "UTF-8"));
}
}
p = Pattern.compile("(?<=action=\").*?(?=\")");
m = p.matcher(html);
while(m.find()) {
String link = m.group();
if (link.equals("/index.php")) {
html2 = html2.replace(link, Config.getInstance().get("bstProxy.searchUrl"));
}
}
inputStream = new ByteArrayInputStream(html2.getBytes("UTF-8"));
System.gc();
return SUCCESS;
}
@Action(value = "search", results = { @Result(type = "stream") })
public String search() throws Exception {
String query = ServletActionContext.getRequest().getParameter("search");
//query = new String(query.getBytes("ISO-8859-1"),"UTF-8");
String targetUrl = "http://baishitong.fudan.edu.cn/index.php?title=%E7%89%B9%E6%AE%8A:%E6%90%9C%E7%B4%A2&search=" + URLEncoder.encode(query, "utf-8") + "&fulltext=search&mobileaction=toggle_view_mobile";
StringBuffer retstr = fetch(targetUrl);
// 内链处理
String html = retstr.toString();
String html2 = retstr.toString();
Pattern p = Pattern.compile("(?<=href=\").*?(?=\")");
Matcher m = p.matcher(html);
while (m.find()) {
String link = m.group();
if (link.startsWith("/wiki")) {
html2 = html2.replace(link, Config.getInstance().get("bstProxy.baseUrl") + URLEncoder.encode(link.substring(6), "UTF-8"));
}
}
inputStream = new ByteArrayInputStream(html2.getBytes("UTF-8"));
System.gc();
return SUCCESS;
}
/**
* @return the inputStream
*/
public InputStream getInputStream() {
return inputStream;
}
/**
* @param inputStream
* the inputStream to set
*/
public void setInputStream(InputStream inputStream) {
this.inputStream = inputStream;
}
/**
* @return the contentType
*/
public String getContentType() {
return "text/html; charset=UTF-8";
}
/**
* @param contentType
* the contentType to set
*/
public void setContentType(String contentType) {
this.contentType = contentType;
}
}