package org.genedb.db.loading.auxiliary;
import org.gmod.schema.feature.Polypeptide;
import org.gmod.schema.mapped.DbXRef;
import org.gmod.schema.mapped.FeatureDbXRef;
import org.apache.log4j.Logger;
import org.hibernate.Session;
import org.hibernate.Transaction;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.Proxy;
import java.net.SocketAddress;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class OPIReferenceLoader extends Loader {
private static final Logger logger = Logger.getLogger(OPIReferenceLoader.class);
private static final String proxyURL = "http://wwwcache.sanger.ac.uk:3128";
private static final String opiURL = "http://chemlims.com/OPI/MServlet.ChemInfo?module=Gene&DataSet=1&saveAll=YES";
private Proxy getProxy() throws MalformedURLException {
if (proxyURL == null) {
return Proxy.NO_PROXY;
}
URL url = new URL(proxyURL);
SocketAddress proxyAddress = new InetSocketAddress(url.getHost(), url.getPort());
return new Proxy(Proxy.Type.HTTP, proxyAddress);
}
private URLConnection getOPIConnection() throws MalformedURLException, IOException {
CookieManager cookieManager = new CookieManager();
cookieManager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
CookieHandler.setDefault(cookieManager);
new URL(opiURL).openConnection(getProxy()).getContent(); // Set session cookie
/* Issue POST request */
HttpURLConnection opiConnection = (HttpURLConnection) new URL(opiURL).openConnection(getProxy());
opiConnection.setDoOutput(true);
Writer writer = new OutputStreamWriter(opiConnection.getOutputStream());
writer.write("act=saveCSV");
writer.close();
return opiConnection;
}
private BufferedReader getOPIBufferedReader() throws MalformedURLException, IOException {
InputStream inputStream = getOPIConnection().getInputStream();
InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
return new BufferedReader(inputStreamReader);
}
@Override
public boolean loadsFromFile() {
return false;
}
@Override
protected void doLoad(InputStream inputStream, Session session)
throws MalformedURLException, IOException {
assert inputStream == null;
BufferedReader br = getOPIBufferedReader();
boolean firstLine = true;
String line;
List<String[]> fieldsList = new ArrayList<String[]>();
while (null != (line = br.readLine())) {
if (firstLine) {
// ignore header line
firstLine = false;
}
else {
if (line.length() == 0) {
logger.warn("Ignoring empty line");
} else {
fieldsList.add(parseLine(line));
}
}
}
br.close();
Transaction transaction = session.getTransaction();
int n=1;
for (String[] fields: fieldsList) {
String gene = fields[1];
String description = fields[2];
logger.debug(String.format("[%d/%d] Loading OPI reference for %s (%s)", n++, fieldsList.size(), gene, description));
transaction.begin();
loadReference(gene, description);
transaction.commit();
if (n % 100 == 1) {
logger.info("Clearing session");
session.clear();
}
}
}
private void loadReference(String gene, String description) {
Polypeptide polypeptide = this.getPolypeptideForGene(gene);
if (polypeptide != null) {
DbXRef dbXRef = objectManager.getDbXRef("OPI", gene, description);
FeatureDbXRef featureDbXRef = new FeatureDbXRef(dbXRef, polypeptide, true);
sequenceDao.persist(featureDbXRef);
}
}
private static Pattern csvFieldPattern = Pattern.compile("\\G(?:\"([^\"]*)\"|([^,]*))(?:,|\\z)");
private String[] parseLine(String line) {
String[] ret = new String[4];
Matcher matcher = csvFieldPattern.matcher(line);
int i=0;
while (matcher.find()) {
if (i >= 4) {
throw new IllegalArgumentException(String.format("Failed to parse line '%s': found too many fields",
line));
}
String quotedField = matcher.group(1);
String unquotedField = matcher.group(2);
if (quotedField != null) {
ret[i] = quotedField;
}
else if (unquotedField != null) {
ret[i] = unquotedField;
}
logger.trace(String.format("Field %d is '%s'", i, ret[i]));
i++;
}
if (i != 4) {
throw new IllegalArgumentException(String.format("Failed to parse line '%s': found too few fields",
line));
}
return ret;
}
}