/** * The contents of this file are subject to the license and copyright * detailed in the LICENSE and NOTICE files at the root of the source * tree and available online at * * http://www.dspace.org/license/ */ package org.dspace.submit.lookup; import gr.ekt.bte.core.Record; import java.io.IOException; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.http.HttpException; import org.apache.http.HttpResponse; import org.apache.http.HttpStatus; import org.apache.http.StatusLine; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.CoreConnectionPNames; import org.apache.log4j.Logger; import org.dspace.app.util.XMLUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; /** * @author Keiji Suzuki */ public class CiNiiService { /** log4j category */ private static final Logger log = Logger.getLogger(CiNiiService.class); protected int timeout = 1000; public void setTimeout(int timeout) { this.timeout = timeout; } public Record getByCiNiiID(String id, String appId) throws HttpException, IOException { return search(id, appId); } public List<Record> searchByTerm(String title, String author, int year, int maxResults, String appId) throws HttpException, IOException { List<Record> records = new ArrayList<Record>(); List<String> ids = getCiNiiIDs(title, author, year, maxResults, appId); if (ids != null && ids.size() > 0) { for (String id : ids) { Record record = search(id, appId); if (record != null) { records.add(record); } } } return records; } /** * Get metadata by searching CiNii RDF API with CiNii NAID * * @param id * CiNii NAID to search by * @param appId * registered application identifier for the API * @return record metadata * @throws IOException * A general class of exceptions produced by failed or interrupted I/O operations. * @throws HttpException * Represents a XML/HTTP fault and provides access to the HTTP status code. */ protected Record search(String id, String appId) throws IOException, HttpException { HttpGet method = null; try { HttpClient client = new DefaultHttpClient(); client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); method = new HttpGet("http://ci.nii.ac.jp/naid/"+id+".rdf?appid="+appId); // Execute the method. HttpResponse response = client.execute(method); StatusLine statusLine = response.getStatusLine(); int statusCode = statusLine.getStatusCode(); if (statusCode != HttpStatus.SC_OK) { if (statusCode == HttpStatus.SC_BAD_REQUEST) throw new RuntimeException("CiNii RDF is not valid"); else throw new RuntimeException("CiNii RDF Http call failed: " + statusLine); } try { DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); DocumentBuilder db = factory.newDocumentBuilder(); Document inDoc = db.parse(response.getEntity().getContent()); Element xmlRoot = inDoc.getDocumentElement(); return CiNiiUtils.convertCiNiiDomToRecord(xmlRoot); } catch (Exception e) { throw new RuntimeException( "CiNii RDF identifier is not valid or not exist"); } } finally { if (method != null) { method.releaseConnection(); } } } /** * Get CiNii NAIDs by searching CiNii OpenURL API with title, author and year * * @param title * record title * @param author * record author * @param year * record year * @param maxResults * maximun number of results returned * @param appId * registered application identifier for the API * @return matching NAIDs * @throws IOException * A general class of exceptions produced by failed or interrupted I/O operations. * @throws HttpException * Represents a XML/HTTP fault and provides access to the HTTP status code. */ protected List<String> getCiNiiIDs(String title, String author, int year, int maxResults, String appId) throws IOException, HttpException { // Need at least one query term if (title == null && author == null && year == -1) { return null; } HttpGet method = null; List<String> ids = new ArrayList<String>(); try { HttpClient client = new DefaultHttpClient(); client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, timeout); StringBuilder query = new StringBuilder(); query.append("format=rss&appid=").append(appId) .append("&count=").append(maxResults); if (title != null) { query.append("&title=").append(URLEncoder.encode(title, "UTF-8")); } if (author != null) { query.append("&author=").append(URLEncoder.encode(author, "UTF-8")); } if (year != -1) { query.append("&year_from=").append(String.valueOf(year)); query.append("&year_to=").append(String.valueOf(year)); } method = new HttpGet("http://ci.nii.ac.jp/opensearch/search?"+query.toString()); // Execute the method. HttpResponse response = client.execute(method); StatusLine statusLine = response.getStatusLine(); int statusCode = statusLine.getStatusCode(); if (statusCode != HttpStatus.SC_OK) { if (statusCode == HttpStatus.SC_BAD_REQUEST) throw new RuntimeException("CiNii OpenSearch query is not valid"); else throw new RuntimeException("CiNii OpenSearch call failed: " + statusLine); } try { DocumentBuilderFactory factory = DocumentBuilderFactory .newInstance(); factory.setValidating(false); factory.setIgnoringComments(true); factory.setIgnoringElementContentWhitespace(true); DocumentBuilder db = factory.newDocumentBuilder(); Document inDoc = db.parse(response.getEntity().getContent()); Element xmlRoot = inDoc.getDocumentElement(); List<Element> items = XMLUtils.getElementList(xmlRoot, "item"); int url_len = "http://ci.nii.ac.jp/naid/".length(); for (Element item : items) { String about = item.getAttribute("rdf:about"); if (about.length() > url_len) { ids.add(about.substring(url_len)); } } return ids; } catch (Exception e) { throw new RuntimeException( "CiNii OpenSearch results is not valid or not exist"); } } finally { if (method != null) { method.releaseConnection(); } } } }