/*
* Copyright (c) 2006-2014 by Public Library of Science
*
* http://plos.org
* http://ambraproject.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* You may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ambraproject.service.crossref;
import com.google.gson.Gson;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import org.ambraproject.filestore.FileStoreException;
import org.ambraproject.filestore.FileStoreService;
import org.ambraproject.service.hibernate.HibernateServiceImpl;
import org.ambraproject.service.xml.XMLServiceImpl;
import org.ambraproject.util.XPathUtil;
import org.ambraproject.views.CrossRefSearch;
import org.ambraproject.xml.transform.cache.CachedSource;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.RequestEntity;
import org.hibernate.HibernateException;
import org.hibernate.Query;
import org.hibernate.Session;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import org.springframework.orm.hibernate3.HibernateCallback;
import org.springframework.transaction.annotation.Transactional;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.EntityResolver;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
/**
* Query crossref for article details
*
* @author Joe Osowski
*/
public class CrossRefLookupServiceImpl extends HibernateServiceImpl implements CrossRefLookupService {
private static final Logger log = LoggerFactory.getLogger(CrossRefLookupServiceImpl.class);
private String crossRefUrl;
private HttpClient httpClient;
private FileStoreService fileStoreService;
/**
* Store the harvested citation data
*
* @param articleDOI
* @param keyColumn
* @param citationDOI
*/
@Transactional
private void setCitationDoi(final String articleDOI, final long keyColumn, final String citationDOI) {
hibernateTemplate.execute(new HibernateCallback<Object>() {
@Override
public Object doInHibernate(Session session) throws HibernateException, SQLException {
Query query = session.createSQLQuery("select articleID from article where doi = :doi")
.setString("doi", articleDOI);
long articleID = ((BigInteger)query.uniqueResult()).longValue();
query = session.createSQLQuery("update citedArticle set doi = :doi, lastModified = NOW()" +
" where articleID = :articleID and keyColumn = :keyColumn")
.setString("doi", citationDOI)
.setLong("articleID", articleID)
.setLong("keyColumn", keyColumn);
if(query.executeUpdate() == 0) {
log.error("Error setting articleID: {}, Key: {} to value: {}", new Object[] { articleID, keyColumn, citationDOI });
//throw new HibernateException("No rows updated for articleID: " + articleID + " key: " + keyColumn);
} else {
log.debug("Set articleID: {}, Key: {} to value: {}", new Object[] { articleID, keyColumn, citationDOI });
}
return null;
}
});
}
private Document getArticle(String doi) throws FileStoreException {
String fsid = fileStoreService.objectIDMapper().doiTofsid(doi, "XML");
Document doc;
InputStream is = fileStoreService.getFileInStream(fsid);
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
DocumentBuilder builder = factory.newDocumentBuilder();
EntityResolver resolver = CachedSource.getResolver(XMLServiceImpl.NLM_DTD_URL);
builder.setEntityResolver(resolver);
doc = builder.parse(is);
} catch (Exception e) {
log.error("Error parsing the article xml for article " + doi, e);
return null;
}
return doc;
}
/**
* {@inheritDoc}
*/
@Override
@Transactional
public void refreshCitedArticles(String articleDOI) throws Exception {
log.info("refreshArticleCitation for article DOI: {}", articleDOI);
Document article = getArticle(articleDOI);
CrossRefSearch crossRefSearches[] = getCrossRefSearchTerms(article);
for(int a = 0; a < crossRefSearches.length; a++) {
CrossRefSearch crossRefSearch = crossRefSearches[a];
String searchTerms = crossRefSearch.buildQuery();
if(searchTerms.length() == 0) {
log.info("No data for citation, not searching for DOI");
} else {
String crossrefDoi = findDoi(searchTerms);
if (crossrefDoi != null && !crossrefDoi.isEmpty()) {
//A fix for FEND-1077. crossref seems to append a URL to the DOI
crossrefDoi = crossrefDoi.replace("http://dx.doi.org/","");
String label = crossRefSearch.getLabel();
long keyColumn;
if(label != null) {
keyColumn = Long.valueOf(label);
} else {
//Not able to determine value for key column, take a guess here
//Based on the order of the element found in the XML
//Some articles do not contain well structured XML
keyColumn = crossRefSearch.getOriginalOrder() + 1;
}
log.info("refreshArticleCitation doi found: {}", crossrefDoi);
setCitationDoi(articleDOI, keyColumn, crossrefDoi);
} else {
log.info("refreshArticleCitation nothing found");
}
}
}
}
/**
* Generate a list of CrossRefSearch pojos from the article DOM to be used for looking up DOIs for cited articles
*
* @param article the article DOM
*
* @return a list of pojos parsed out of the article DOM
*
* @throws Exception
*/
protected CrossRefSearch[] getCrossRefSearchTerms(Document article) throws Exception {
if(article == null) {
throw new Exception("Article can not be null");
} else {
XPathUtil xPathUtil = new XPathUtil();
NodeList nodes = xPathUtil.selectNodes(article, ".//back/ref-list/ref");
List<CrossRefSearch> terms = new ArrayList<CrossRefSearch>(nodes.getLength());
for(int a = 0; a < nodes.getLength(); a++) {
Node node = nodes.item(a);
Node pubtypeNode = xPathUtil.selectNode(node, ".//*[@publication-type='journal']");
if(pubtypeNode != null) {
//Keep track of the order the elements are found in the XML (the 'a' value)
terms.add(new CrossRefSearch(node, a));
}
}
return terms.toArray(new CrossRefSearch[terms.size()]);
}
}
@Override
@Transactional(readOnly = true)
public String findDoi(String searchString) throws Exception {
CrossRefResponse response = queryCrossRef(searchString);
if(response != null && response.results.length > 0) {
return response.results[0].doi;
} else {
return null;
}
}
private CrossRefResponse queryCrossRef(String searchString)
{
PostMethod post = createCrossRefPost(searchString);
try {
long timestamp = System.currentTimeMillis();
int response = httpClient.executeMethod(post);
log.debug("Http post finished in {} ms", System.currentTimeMillis() - timestamp);
if (response == 200) {
String result = post.getResponseBodyAsString();
if(result != null) {
log.trace("JSON response received: {}", result);
return parseJSON(result);
}
log.error("Received empty response, response code {}, when executing query {}", response, crossRefUrl);
} else {
log.error("Received response code {} when executing query {}", response, crossRefUrl);
}
} catch (Exception ex) {
log.error(ex.getMessage(), ex);
} finally {
// be sure the connection is released back to the connection manager
post.releaseConnection();
}
return null;
}
/**
* Parse the JSON into native types
*
* @param json the JSON string to convert to a java native type
*
* @return a CrossRefResponse object
*/
private CrossRefResponse parseJSON(final String json) {
return new CrossRefResponse() {{
JsonParser parser = new JsonParser();
JsonObject responseObject = parser.parse(json).getAsJsonObject();
queryOK = (responseObject.getAsJsonPrimitive("query_ok")).getAsBoolean();
List<CrossRefResult> resultTemp = new ArrayList<CrossRefResult>();
for(final JsonElement resultElement : responseObject.getAsJsonArray("results")) {
JsonObject resultObj = resultElement.getAsJsonObject();
CrossRefResult res = new CrossRefResult();
if(resultObj.getAsJsonPrimitive("text") != null) {
res.text = resultObj.getAsJsonPrimitive("text").getAsString();
}
if(resultObj.getAsJsonPrimitive("match") != null) {
res.match = resultObj.getAsJsonPrimitive("match").getAsBoolean();
}
if(resultObj.getAsJsonPrimitive("doi") != null) {
res.doi = resultObj.getAsJsonPrimitive("doi").getAsString();
}
if(resultObj.getAsJsonPrimitive("score") != null) {
res.score = resultObj.getAsJsonPrimitive("score").getAsString();
}
//Some results aren't actually valid
if(res.doi != null) {
resultTemp.add(res);
}
}
this.results = resultTemp.toArray(new CrossRefResult[resultTemp.size()]);
}};
}
private PostMethod createCrossRefPost(String searchString)
{
StringBuilder builder = new StringBuilder();
//Example query to post:
//["Young GC,Analytical methods in palaeobiogeography, and the role of early vertebrate studies;Palaeoworld;19;160-173"]
//Use toJSON to encode strings with proper escaping
final String json = "[" + (new Gson()).toJson(searchString) + "]";
if(this.crossRefUrl == null) {
throw new RuntimeException("ambra.services.crossref.query.url value not found in configuration.");
}
return new PostMethod(this.crossRefUrl) {{
addRequestHeader("Content-Type","application/json");
setRequestEntity(new RequestEntity() {
@Override
public boolean isRepeatable() {
return false;
}
@Override
public void writeRequest(OutputStream outputStream) throws IOException {
outputStream.write(json.getBytes());
}
@Override
public long getContentLength() {
return json.getBytes().length;
}
@Override
public String getContentType() {
return "application/json";
}
});
}};
}
/* utility class for internally tracking data */
private class CrossRefResult {
public String text;
public Boolean match;
public String doi;
public String score;
}
/* utility class for internally tracking data */
private class CrossRefResponse {
public CrossRefResult[] results;
public Boolean queryOK;
}
@Required
public void setHttpClient(HttpClient httpClient) {
this.httpClient = httpClient;
}
@Required
public void setCrossRefUrl(String crossRefUrl) {
this.crossRefUrl = crossRefUrl;
}
@Required
public void setFileStoreService(FileStoreService fileStoreService) {
this.fileStoreService = fileStoreService;
}
}