/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.importer.external.pubmed.service; import org.apache.axiom.om.OMElement; import org.apache.axiom.om.OMXMLBuilderFactory; import org.apache.axiom.om.OMXMLParserWrapper; import org.apache.axiom.om.xpath.AXIOMXPath; import org.dspace.content.Item; import org.dspace.importer.external.exception.MetadataSourceException; import org.dspace.importer.external.datamodel.Query; import org.dspace.importer.external.datamodel.ImportRecord; import org.dspace.importer.external.service.AbstractImportMetadataSourceService; import org.jaxen.JaxenException; import javax.ws.rs.client.Client; import javax.ws.rs.client.ClientBuilder; import javax.ws.rs.client.Invocation; import javax.ws.rs.client.WebTarget; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import java.io.StringReader; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.concurrent.Callable; /** * Implements a data source for querying PubMed Central * * @author Roeland Dillen (roeland at atmire dot com) */ public class PubmedImportMetadataSourceServiceImpl extends AbstractImportMetadataSourceService<OMElement> { private String baseAddress; private WebTarget pubmedWebTarget; /** * Find the number of records matching a query; * * @param query a query string to base the search on. * @return the sum of the matching records over this import source * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public int getNbRecords(String query) throws MetadataSourceException { return retry(new GetNbRecords(query)); } /** * Find the number of records matching a query; * * @param query a query object to base the search on. * @return the sum of the matching records over this import source * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public int getNbRecords(Query query) throws MetadataSourceException { return retry(new GetNbRecords(query)); } /** * Find the number of records matching a string query. Supports pagination * * @param query a query string to base the search on. * @param start offset to start at * @param count number of records to retrieve. * @return a set of records. Fully transformed. * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public Collection<ImportRecord> getRecords(String query, int start, int count) throws MetadataSourceException { return retry(new GetRecords(query, start, count)); } /** * Find records based on a object query. * * @param query a query object to base the search on. * @return a set of records. Fully transformed. * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public Collection<ImportRecord> getRecords(Query query) throws MetadataSourceException { return retry(new GetRecords(query)); } /** * Get a single record from the source. * * @param id identifier for the record * @return the first matching record * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public ImportRecord getRecord(String id) throws MetadataSourceException { return retry(new GetRecord(id)); } /** * Get a single record from the source. * * @param query a query matching a single record * @return the first matching record * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public ImportRecord getRecord(Query query) throws MetadataSourceException { return retry(new GetRecord(query)); } /** * The string that identifies this import implementation. Preferable a URI * * @return the identifying uri */ @Override public String getImportSource() { return "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; } /** * Finds records based on an item * * @param item an item to base the search on * @return a collection of import records. Only the identifier of the found records may be put in the record. * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public Collection<ImportRecord> findMatchingRecords(Item item) throws MetadataSourceException { return retry(new FindMatchingRecords(item)); } /** * Finds records based on query object. * Delegates to one or more MetadataSource implementations based on the uri. Results will be aggregated. * * @param query a query object to base the search on. * @return a collection of import records. Only the identifier of the found records may be put in the record. * @throws MetadataSourceException if the underlying methods throw any exception. */ @Override public Collection<ImportRecord> findMatchingRecords(Query query) throws MetadataSourceException { return retry(new FindMatchingRecords(query)); } /** * Initialize the class * * @throws Exception on generic exception */ @Override public void init() throws Exception { Client client = ClientBuilder.newClient(); WebTarget webTarget = client.target(baseAddress); pubmedWebTarget = webTarget.queryParam("db", "pubmed"); } /** * Return the baseAddress set to this object * * @return The String object that represents the baseAddress of this object */ public String getBaseAddress() { return baseAddress; } /** * Set the baseAddress to this object * * @param baseAddress The String object that represents the baseAddress of this object */ public void setBaseAddress(String baseAddress) { this.baseAddress = baseAddress; } private class GetNbRecords implements Callable<Integer> { private GetNbRecords(String queryString) { query = new Query(); query.addParameter("query",queryString); } private Query query; public GetNbRecords(Query query) { this.query = query; } @Override public Integer call() throws Exception { WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", query.getParameterAsClass("query", String.class)); getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); String responseString = response.readEntity(String.class); String count = getSingleElementValue(responseString, "Count"); return Integer.parseInt(count); } } private String getSingleElementValue(String src, String elementName){ OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(src)); OMElement element = records.getDocumentElement(); AXIOMXPath xpath = null; String value = null; try { xpath = new AXIOMXPath("//" + elementName); List<OMElement> recordsList = xpath.selectNodes(element); if(!recordsList.isEmpty()) { value = recordsList.get(0).getText(); } } catch (JaxenException e) { value = null; } return value; } private class GetRecords implements Callable<Collection<ImportRecord>> { private Query query; private GetRecords(String queryString, int start, int count) { query = new Query(); query.addParameter("query",queryString); query.addParameter("start",start); query.addParameter("count",count); } private GetRecords(Query q) { this.query = q; } @Override public Collection<ImportRecord> call() throws Exception { String queryString = query.getParameterAsClass("query",String.class); Integer start = query.getParameterAsClass("start",Integer.class); Integer count = query.getParameterAsClass("count",Integer.class); if(count==null || count < 0){ count = 10; } if(start==null || start < 0){ start = 0; } List<ImportRecord> records = new LinkedList<ImportRecord>(); WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", queryString); getRecordIdsTarget = getRecordIdsTarget.queryParam("retstart", start); getRecordIdsTarget = getRecordIdsTarget.queryParam("retmax", count); getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y"); getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); String responseString = response.readEntity(String.class); String queryKey = getSingleElementValue(responseString, "QueryKey"); String webEnv = getSingleElementValue(responseString, "WebEnv"); WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv); getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey); getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml"); getRecordsTarget = getRecordsTarget.path("efetch.fcgi"); getRecordsTarget = getRecordsTarget.queryParam("retmax", count); getRecordsTarget = getRecordsTarget.queryParam("retstart", start); invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE); response = invocationBuilder.get(); List<OMElement> omElements = splitToRecords(response.readEntity(String.class)); for (OMElement record : omElements) { records.add(transformSourceRecords(record)); } return records; } } private List<OMElement> splitToRecords(String recordsSrc) { OMXMLParserWrapper records = OMXMLBuilderFactory.createOMBuilder(new StringReader(recordsSrc)); OMElement element = records.getDocumentElement(); AXIOMXPath xpath = null; try { xpath = new AXIOMXPath("//PubmedArticle"); List<OMElement> recordsList = xpath.selectNodes(element); return recordsList; } catch (JaxenException e) { return null; } } private class GetRecord implements Callable<ImportRecord> { private Query query; private GetRecord(String id) { query = new Query(); query.addParameter("id",id); } public GetRecord(Query q) { query = q; } @Override public ImportRecord call() throws Exception { String id = query.getParameterAsClass("id", String.class); WebTarget getRecordTarget = pubmedWebTarget.queryParam("id", id); getRecordTarget = getRecordTarget.queryParam("retmode", "xml"); getRecordTarget = getRecordTarget.path("efetch.fcgi"); Invocation.Builder invocationBuilder = getRecordTarget.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); List<OMElement> omElements = splitToRecords(response.readEntity(String.class)); if(omElements.size()==0) { return null; } return transformSourceRecords(omElements.get(0)); } } private class FindMatchingRecords implements Callable<Collection<ImportRecord>> { private Query query; private FindMatchingRecords(Item item) throws MetadataSourceException { query = getGenerateQueryForItem().generateQueryForItem(item); } public FindMatchingRecords(Query q) { query = q; } @Override public Collection<ImportRecord> call() throws Exception { List<ImportRecord> records = new LinkedList<ImportRecord>(); WebTarget getRecordIdsTarget = pubmedWebTarget.queryParam("term", query.getParameterAsClass("term", String.class)); getRecordIdsTarget = getRecordIdsTarget.queryParam("field", query.getParameterAsClass("field",String.class)); getRecordIdsTarget = getRecordIdsTarget.queryParam("usehistory", "y"); getRecordIdsTarget = getRecordIdsTarget.path("esearch.fcgi"); Invocation.Builder invocationBuilder = getRecordIdsTarget.request(MediaType.TEXT_PLAIN_TYPE); Response response = invocationBuilder.get(); String responseString = response.readEntity(String.class); String queryKey = getSingleElementValue(responseString, "QueryKey"); String webEnv = getSingleElementValue(responseString, "WebEnv"); WebTarget getRecordsTarget = pubmedWebTarget.queryParam("WebEnv", webEnv); getRecordsTarget = getRecordsTarget.queryParam("query_key", queryKey); getRecordsTarget = getRecordsTarget.queryParam("retmode", "xml"); getRecordsTarget = getRecordsTarget.path("efetch.fcgi"); invocationBuilder = getRecordsTarget.request(MediaType.TEXT_PLAIN_TYPE); response = invocationBuilder.get(); List<OMElement> omElements = splitToRecords(response.readEntity(String.class)); for (OMElement record : omElements) { records.add(transformSourceRecords(record)); } return records; } } }