package org.nextprot.api.core.app.dbxrefanalyser;
import com.google.common.base.Preconditions;
import org.nextprot.api.core.domain.DbXref;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.*;
import java.util.*;
import java.util.logging.FileHandler;
import java.util.logging.Logger;
import java.util.logging.SimpleFormatter;
/**
* Visit and check http statuses of urls and resolved urls in DbXrefs
*/
class DbXrefUrlVisitor implements Closeable, Flushable {
private static final Logger LOGGER = Logger.getLogger(DbXrefUrlVisitor.class.getSimpleName());
private static final int TIMEOUT = 5000;
private final PrintWriter pw;
private final Set<String> visitedTemplateURLs;
private final Map<String, Set<String>> dbxrefNon200HttpStatusMap;
DbXrefUrlVisitor(String outName, String logName) throws IOException {
Preconditions.checkNotNull(outName);
Preconditions.checkNotNull(logName);
pw = new PrintWriter(outName);
FileHandler fileHandler = new FileHandler(logName);
fileHandler.setFormatter(new SimpleFormatter());
LOGGER.addHandler(fileHandler);
visitedTemplateURLs = new HashSet<>();
dbxrefNon200HttpStatusMap = new TreeMap<>((status1, status2) -> {
boolean isStatus1Integer = status1.matches("\\d+");
boolean isStatus2Integer = status2.matches("\\d+");
// integer comes first
if (isStatus1Integer && isStatus2Integer) {
return Integer.parseInt(status1) - (Integer.parseInt(status2));
}
else if (isStatus1Integer) {
return -1;
}
else if (isStatus2Integer) {
return 1;
}
return status1.compareTo(status2);
});
pw.write("accession\tdb\txref ac\turl\thttp status\tresolved url\thttp status\n");
}
/**
* Visit all xrefs and report statuses into outName file
* @param accession the accession
* @param xrefs xrefs that belong to accession
* @throws IOException
*/
void visit(String accession, List<DbXref> xrefs) throws IOException {
if (xrefs != null) {
for (DbXref xref : xrefs) {
xref.setProteinAccessionReferer(accession);
String resolvedUrl = xref.getResolvedUrl();
// url template
String dbName = xref.getDatabaseName();
String templateURL = dbName + "^" + xref.getLinkUrl();
if (!visitedTemplateURLs.contains(templateURL)) {
int currentTimeOut = TIMEOUT;
Response response = requestUrls(xref, currentTimeOut);
int j = 0;
int tries = 3;
while (response.getResolvedUrlHttpStatus().equals("TIMEOUT") && j < tries) {
currentTimeOut *= 2;
response = requestUrls(xref, currentTimeOut);
j++;
}
String xrefAcc = xref.getAccession();
String url = xref.getUrl();
pw.write(accession);
pw.write("\t");
pw.write(dbName);
pw.write("\t");
pw.write(xrefAcc);
pw.write("\t");
pw.write(url);
pw.write("\t");
pw.write(response.getUrlHttpStatus());
pw.write("\t");
pw.write(resolvedUrl);
pw.write("\t");
pw.write(response.getResolvedUrlHttpStatus());
pw.write("\n");
if (!response.isUrlOK())
addDbNameUrlStatus(response, dbName + " => " + url);
if (!response.isResolvedUrlOK())
addResolvedDbNameUrlStatus(response, dbName + " => " + resolvedUrl);
visitedTemplateURLs.add(templateURL);
}
}
}
}
private void addDbNameUrlStatus(Response response, String url) {
if (!dbxrefNon200HttpStatusMap.containsKey(response.getUrlHttpStatus())) {
dbxrefNon200HttpStatusMap.put(response.getUrlHttpStatus(), new HashSet<>());
}
dbxrefNon200HttpStatusMap.get(response.getUrlHttpStatus()).add(url);
}
private void addResolvedDbNameUrlStatus(Response response, String url) {
if (!dbxrefNon200HttpStatusMap.containsKey(response.getResolvedUrlHttpStatus())) {
dbxrefNon200HttpStatusMap.put(response.getResolvedUrlHttpStatus(), new HashSet<>());
}
dbxrefNon200HttpStatusMap.get(response.getResolvedUrlHttpStatus()).add(url);
}
@Override
public void flush() {
pw.flush();
}
@Override
public void close() {
if (!dbxrefNon200HttpStatusMap.isEmpty()) {
StringBuilder sb = new StringBuilder("\n\nUnsuccessful requests\n---------------------\n");
for (Map.Entry<String, Set<String>> entry : dbxrefNon200HttpStatusMap.entrySet()) {
sb.append(entry.getKey()).append(":\n");
for (String url : entry.getValue()) {
sb.append("\t").append(url).append("\n");
}
}
LOGGER.info(sb.toString());
}
dbxrefNon200HttpStatusMap.clear();
visitedTemplateURLs.clear();
pw.close();
}
private Response requestUrls(DbXref xref, int timeOut) throws IOException {
String urlHttpStatus = getResponseCode(xref, xref.getUrl(), timeOut);
String resolvedUrlHttpStatus = getResponseCode(xref, xref.getResolvedUrl(), timeOut);
return new Response(urlHttpStatus, resolvedUrlHttpStatus);
}
private String getResponseCode(DbXref xref, String url, int timeOut) {
String status="-1";
String response;
HttpURLConnection con = null;
String headerMessage = "xref="+xref.getAccession()+";db="+xref.getDatabaseName()+";url="+url;
if (url == null || url.equalsIgnoreCase("none") || url.isEmpty()) {
LOGGER.info(headerMessage+"; Cannot execute request\n");
return "UNDEFINED URL";
}
try {
URL obj = new URL(url);
con = (HttpURLConnection) obj.openConnection();
con.setRequestMethod("HEAD");
con.setRequestProperty("User-Agent", "Mozilla/5.0");
con.setConnectTimeout(timeOut);
con.setReadTimeout(timeOut);
con.connect();
status = String.valueOf(con.getResponseCode());
LOGGER.info(headerMessage+";status="+status);
response = status;
} catch (SocketTimeoutException e) {
response = "SOCKET TIMEOUT EXCEPTION";
LOGGER.warning(buildErrorMessage(e, status, response, headerMessage));
} catch (ProtocolException e) {
response = "PROTOCOL EXCEPTION";
LOGGER.warning(buildErrorMessage(e, status, response, headerMessage));
} catch (MalformedURLException e) {
response = "MALFORMEDURL EXCEPTION";
LOGGER.warning(buildErrorMessage(e, status, response, headerMessage));
} catch (IOException e) {
response = "IO EXCEPTION";
LOGGER.warning(buildErrorMessage(e, status, response, headerMessage));
} catch (IllegalArgumentException e) {
response = "ILLEGAL ARGUMENT EXCEPTION";
LOGGER.warning(buildErrorMessage(e, status, response, headerMessage));
}
if (con != null)
con.disconnect();
return response;
}
private String buildErrorMessage(Exception e, String status, String response, String headerMessage) {
return headerMessage+";status="+status+";response="+response+";message="+e.getMessage();
}
private static class Response {
String urlHttpStatus;
String resolvedUrlHttpStatus;
private Response(String urlHttpStatus, String resolvedUrlHttpStatus) {
this.urlHttpStatus = urlHttpStatus;
this.resolvedUrlHttpStatus = resolvedUrlHttpStatus;
}
String getUrlHttpStatus() {
return urlHttpStatus;
}
String getResolvedUrlHttpStatus() {
return resolvedUrlHttpStatus;
}
boolean isUrlOK() {
return urlHttpStatus.equals("200");
}
boolean isResolvedUrlOK() {
return resolvedUrlHttpStatus.equals("200");
}
}
}