package org.infoglue.common.webappintegrator;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.log4j.Logger;
import org.infoglue.cms.controllers.kernel.impl.simple.InterceptionPointController;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class WebappIntegrator
{
private final static Logger logger = Logger.getLogger(WebappIntegrator.class.getName());
//Proxy-part
private String proxyHost = null;
private Integer proxyPort = null;
private String urlToIntegrate = null;
private Connection.Method method = Connection.Method.GET;
private String referrer = null;
private String userAgent = null;
private Integer timeout = null;
private Map<String,String> requestProperties = new HashMap();
private Map<String,String> requestParameters = new HashMap();
private Map<String,String> cookies = new HashMap();
private String currentBaseUrl = null;
private String elementSelector = null;
public WebappIntegrator()
{
}
public void setUrlToIntegrate(String urlToIntegrate)
{
this.urlToIntegrate = urlToIntegrate;
}
public void setCurrentBaseUrl(String currentBaseUrl)
{
this.currentBaseUrl = currentBaseUrl;
}
public void setElementSelector(String elementSelector)
{
this.elementSelector = elementSelector;
}
public void setProxyHost(String proxyHost)
{
this.proxyHost = proxyHost;
}
public void setProxyPort(Integer proxyPort)
{
this.proxyPort = proxyPort;
}
public String integrate(Map<String,String> returnCookies, Map<String,String> returnHeaders, Map<String,String> statusData, List<String> blockedParameters, String hrefExclusionRegexp, String linkExclusionRegexp, String srcExclusionRegexp) throws Exception
{
String responseBody = new PageFetcher().fetchPage(this.urlToIntegrate, method.name(), this.proxyHost, this.proxyPort, cookies, requestProperties, requestParameters, returnCookies, returnHeaders, statusData, blockedParameters);
String baseURI = this.urlToIntegrate;
if(baseURI.indexOf("?") > -1)
baseURI = baseURI.substring(0, baseURI.indexOf("?"));
Document doc = Jsoup.parse(responseBody, baseURI);
//Document doc = Jsoup.parse(responseBody);
String title = doc.title();
logger.info("title:" + title);
logger.info("elementSelector:" + elementSelector);
Element sourceElement = doc.select(elementSelector).first();
if(sourceElement == null)
sourceElement = doc.select("#pageContent").first();
if(sourceElement == null)
sourceElement = doc.body();
if(sourceElement != null)
{
Elements links = sourceElement.select("a[href]");
Elements forms = sourceElement.select("form");
Elements media = doc.select("[src]");
Elements imports = doc.select("link[href]");
for (Element link : links)
{
String href = link.attr("href");
String oldUrl = link.attr("abs:href");
if(!href.matches(hrefExclusionRegexp) && href.indexOf("javascript:") == -1 && oldUrl != null)
{
String newUrl = currentBaseUrl + (currentBaseUrl.indexOf("?") > -1 ? "&" : "?") + "proxyUrl=" + URLEncoder.encode(oldUrl, "utf-8");
link.attr("href", newUrl);
}
}
for (Element src : media)
{
String oldSrc = src.attr("abs:src");
if(!oldSrc.matches(srcExclusionRegexp) && oldSrc != null)
{
logger.info("Changing to oldSrc:" + oldSrc);
src.attr("src", oldSrc);
}
}
for (Element link : imports)
{
String oldHref = link.attr("abs:href");
if(!oldHref.matches(linkExclusionRegexp) && oldHref != null)
{
logger.info("Changing to oldHref:" + oldHref);
link.attr("href", oldHref);
}
}
for (Element form : forms)
{
String oldAction = form.attr("abs:action");
logger.info("oldAction:" + oldAction);
if(oldAction == null || oldAction.equals(""))
{
oldAction = this.urlToIntegrate;
logger.info("oldAction:" + oldAction);
String newAction = currentBaseUrl + (currentBaseUrl.indexOf("?") > -1 ? "&" : "?") + "proxyUrl=" + oldAction;
form.attr("action", newAction);
}
else
{
logger.info("oldAction:" + oldAction);
//String newAction = currentBaseUrl + (currentBaseUrl.indexOf("?") > -1 ? "&" : "?") + "proxyUrl=" + URLEncoder.encode(URLEncoder.encode(oldAction, "utf-8"),"utf-8");
String newAction = currentBaseUrl + (currentBaseUrl.indexOf("?") > -1 ? "&" : "?") + "proxyUrl=" + URLEncoder.encode(oldAction, "utf-8");
form.attr("action", newAction);
}
}
return sourceElement.html();
}
else
{
return doc.body().html();
}
}
public void setMethod(String method)
{
if(method != null && method.equalsIgnoreCase("post"))
this.method = Connection.Method.POST;
}
public void setReferrer(String referrer)
{
this.referrer = referrer;
}
public void setUserAgent(String userAgent)
{
this.userAgent = userAgent;
}
public void setRequestProperties(Map<String, String> requestProperties)
{
this.requestProperties = requestProperties;
}
public void setRequestParameters(Map<String, String> requestParameters)
{
this.requestParameters = requestParameters;
}
public void setCookies(Map<String, String> cookies)
{
this.cookies = cookies;
}
public void setTimeout(Integer timeout)
{
this.timeout = timeout;
}
/**
* @param args
*/
public static void main(String[] args)
{
WebappIntegrator wi = new WebappIntegrator();
wi.setCurrentBaseUrl("http://localhost:8080/infoglueDeliverWorking/ViewPage.action?siteNodeId=3");
wi.setUrlToIntegrate("https://forum.tewss.telia.se");
wi.setElementSelector("#ipbwrapper");
try
{
wi.integrate(new HashMap<String,String>(), new HashMap<String,String>(), new HashMap<String,String>(), new ArrayList<String>(), "", "", "");
}
catch (Exception e)
{
e.printStackTrace();
}
}
}