package controllers;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import models.User;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import play.Logger;
import play.Play;
import play.mvc.Controller;
import play.mvc.Result;
import play.mvc.Security;
import uk.bl.exception.ActException;
public class WaybackController extends Controller {
private static URL wayback_url;
public static String getWaybackEndpoint() {
String prefix = Play.application().configuration().getString("application.wayback.url");
if( ! prefix.endsWith("/")) {
prefix = prefix + "/";
}
return prefix;
}
public static String getAccessResolverEndpoint() {
String prefix = Play.application().configuration().getString("application.access.resolver.url");
if( ! prefix.endsWith("/")) {
prefix = prefix + "/";
}
return prefix;
}
public static String getWaybackQueryEndpoint() {
return Play.application().configuration().getString("application.wayback.query.path");
}
@Security.Authenticated(SecuredController.class)
public static Result wayback(String url) throws ActException, ClientProtocolException, IOException {
User user = User.findByEmail(session().get("email"));
if( ! user.isLDLMember() ) {
return unauthorized(
"unauthorized - you must be a member of a Legal Deposit library organisation to view the crawled resources"
);
}
String wayBackUrl = getWaybackEndpoint();
// Build up the wayback query:
String waybackBuilder = wayBackUrl + url;
String q = ctx()._requestHeader().rawQueryString();
if( q != null && q.length() > 0 ) {
Logger.info("Passing through raw Query String: "+q);
waybackBuilder += "?"+q;
}
final String wayback = waybackBuilder;
Logger.info("Using URL: "+wayback);
// Build up URL and copy over query parameters:
CloseableHttpClient httpclient = HttpClientBuilder.create()
.disableRedirectHandling()
.build();
//
HttpGet httpGet = new HttpGet(wayback);
CloseableHttpResponse response = httpclient.execute(httpGet);
// If this looks like a redirect, return that:
if ( response.getFirstHeader(LOCATION) != null ) {
String location = response.getFirstHeader(LOCATION).getValue();
response.close();
Logger.info("Got LOCATION: "+location);
// Issue the redirect directly...
return redirect(location);
}
// Otherwise, return the body, copying over the headers:
// Except this does not work, because doing this here overrides/breaks the Play frameworks response handling.
//for( Header h : response.getAllHeaders() ) {
// response().setHeader(h.getName(), h.getValue());
//}
HttpEntity entity = response.getEntity();
Header contentType = response.getFirstHeader(CONTENT_TYPE);
Logger.debug("Response content type: " + contentType);
if( contentType != null ) {
return status(response.getStatusLine().getStatusCode(), entity.getContent()).as(contentType.getValue());
} else {
return status(response.getStatusLine().getStatusCode(), entity.getContent());
}
}
public static Result waybackRoot() throws ActException, ClientProtocolException, IOException {
return wayback("");
}
/**Method to fetch number of crawled urls**/
public static int getTotalCrawledUrls(String url) {
Logger.debug("getTotalCrawledUrls url:"+url);
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
String wayBackUrl = getWaybackEndpoint();
DocumentBuilder db = dbf.newDocumentBuilder();
/***Check the http status code***/
wayback_url = new URL(wayBackUrl+"xmlquery.jsp?type=prefixquery&url="+url);
HttpURLConnection http = (HttpURLConnection)wayback_url.openConnection();
http.setRequestMethod("GET");
http.connect();
int statusCode = http.getResponseCode();
Logger.debug("getTotalCrawledUrls statusCode:"+ statusCode);
/********************************/
if(statusCode==200){
Logger.debug("getTotalCrawledUrls parsing XML...");
Document doc = db.parse(http.getInputStream());
Logger.debug("getTotalCrawledUrls getting values from XML...");
NodeList nl = doc.getElementsByTagName("result");
Logger.debug("getTotalCrawledUrls = "+ nl.getLength());
return nl.getLength();
}
}catch( Exception e ) {
Logger.warn("Exception while lookup up getTotalCrawledUrls",e);
}
return 0;
}
/**Method to fetch number of times the specific url has been crawled**/
public static int getTotalCrawledInstances(String url) {
Logger.debug("getTotalCrawledInstances url:"+url);
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
String wayBackUrl = getWaybackEndpoint();
DocumentBuilder db = dbf.newDocumentBuilder();
/***Check the http status code***/
wayback_url = new URL(wayBackUrl + "xmlquery.jsp?type=urlquery&url=" + url);
HttpURLConnection http = (HttpURLConnection)wayback_url.openConnection();
http.setRequestMethod("GET");
http.connect();
int statusCode = http.getResponseCode();
Logger.debug("getTotalCrawledInstances statusCode:"+ statusCode);
/********************************/
if(statusCode==200){
Document doc = db.parse(http.getInputStream());
NodeList nl = doc.getElementsByTagName("result");
return nl.getLength();
}
} catch (Exception e) {
Logger.warn("Exception while lookup up getTotalCrawledInstances",e);
}
return 0;
}
}