/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.submit.lookup; import gr.ekt.bte.core.Record; import java.io.IOException; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.List; import java.util.Set; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.lang.StringUtils; import org.apache.http.HttpException; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.StatusLine; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.CoreConnectionPNames; import org.apache.http.params.HttpParams; import org.dspace.app.util.XMLUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; /** * @author Andrea Bollini * @author Kostas Stamatis * @author Luigi Andrea Pascarelli * @author Panagiotis Koutsourakis */ public class ArXivService { private int timeout = 1000; /** * How long to wait for a connection to be established. * * @param timeout milliseconds */ public void setTimeout(int timeout) { this.timeout = timeout; } public List<Record> getByDOIs(Set<String> dois) throws HttpException, IOException { if (dois != null && dois.size() > 0) { String doisQuery = StringUtils.join(dois.iterator(), " OR "); return search(doisQuery, null, 100); } return null; } public List<Record> searchByTerm(String title, String author, int year) throws HttpException, IOException { StringBuffer query = new StringBuffer(); if (StringUtils.isNotBlank(title)) { query.append("ti:\"").append(title).append("\""); } if (StringUtils.isNotBlank(author)) { // [FAU] if (query.length() > 0) query.append(" AND "); query.append("au:\"").append(author).append("\""); } return search(query.toString(), "", 10); } protected List<Record> search(String query, String arxivid, int max_result) throws IOException, HttpException { List<Record> results = new ArrayList<Record>(); HttpGet method = null; try { HttpClient client = new DefaultHttpClient(); HttpParams params = client.getParams(); params.setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); try { URIBuilder uriBuilder = new URIBuilder("http://export.arxiv.org/api/query"); uriBuilder.addParameter("id_list", arxivid); uriBuilder.addParameter("search_query", query); uriBuilder.addParameter("max_results", String.valueOf(max_result)); method = new HttpGet(uriBuilder.build()); } catch (URISyntaxException ex) { throw new HttpException(ex.getMessage()); } // Execute the method. HttpResponse response = client.execute(method); StatusLine responseStatus = response.getStatusLine(); int statusCode = responseStatus.getStatusCode(); if (statusCode != HttpStatus.SC_OK) { if (statusCode == HttpStatus.SC_BAD_REQUEST) throw new RuntimeException("arXiv query is not valid"); else throw new RuntimeException("Http call failed: " + responseStatus); } try { DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); DocumentBuilder db = factory.newDocumentBuilder(); Document inDoc = db.parse(response.getEntity().getContent()); Element xmlRoot = inDoc.getDocumentElement(); List<Element> dataRoots = XMLUtils.getElementList(xmlRoot, "entry"); for (Element dataRoot : dataRoots) { Record crossitem = ArxivUtils .convertArxixDomToRecord(dataRoot); if (crossitem != null) { results.add(crossitem); } } } catch (Exception e) { throw new RuntimeException( "ArXiv identifier is not valid or not exist"); } } finally { if (method != null) { method.releaseConnection(); } } return results; } public Record getByArXivIDs(String raw) throws HttpException, IOException { if (StringUtils.isNotBlank(raw)) { raw = raw.trim(); if (raw.startsWith("http://arxiv.org/abs/")) { raw = raw.substring("http://arxiv.org/abs/".length()); } else if (raw.toLowerCase().startsWith("arxiv:")) { raw = raw.substring("arxiv:".length()); } List<Record> result = search("", raw, 1); if (result != null && result.size() > 0) { return result.get(0); } } return null; } }