package org.datacite.conres.service.impl; import com.google.common.base.Charsets; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.sun.jersey.api.client.Client; import com.sun.jersey.api.client.WebResource; import nu.xom.*; import org.datacite.conres.Configuration; import org.datacite.conres.model.Model; import org.datacite.conres.service.SearchService; import org.datacite.conres.view.Representation; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; import javax.ws.rs.core.MediaType; import javax.xml.bind.DatatypeConverter; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLEncoder; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; public class SearchServiceImpl implements SearchService { private static Client client = Client.create(); private static DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTimeParser(); public static LoadingCache<String, String> solrResponsesCache = CacheBuilder.newBuilder() .maximumSize(Configuration.SOLR_CACHE_SIZE) .expireAfterWrite(60, TimeUnit.SECONDS) .recordStats() .build(new CacheLoader<String, String>() { public String load(String key) { return getRawMetadata(key); } }); private Document document; private static String getUrl(String doi) throws UnsupportedEncodingException { return Configuration.SOLR_API_URL + "?q=doi:%22"+ URLEncoder.encode(doi, Charsets.UTF_8.name()) + "%22&fl=allocator,datacentre,media,xml,uploaded&wt=xml"; } private static String getRawMetadata(String doi) { String result; WebResource r; String url; try { url = getUrl(doi); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } try { r = client.resource(url); } catch (Exception e) { throw new RuntimeException("could not reach search service"); } result = r.get(String.class); return result; } private MediaType getMediaType(String mediaTypeStr){ String[] subtypes = mediaTypeStr.split("/"); return new MediaType(subtypes[0],subtypes[1]); } private void registerMedia(Map<MediaType, URI> result, String media) { int firstComma = media.indexOf(":"); URI uri = null; try { String uriStr = media.substring(firstComma+1); uri = new URI(uriStr); } catch (URISyntaxException e) { // TODO log rubbish URI } if (uri != null) { String mediaStr = media.substring(0, firstComma); result.put(getMediaType(mediaStr), uri); } } @Override public Model getMetadata(String doi, String contextPath, String acceptHeader) { String rawMetadata = null; try { rawMetadata = solrResponsesCache.get(doi); } catch (ExecutionException e) { throw new RuntimeException(e); } if (rawMetadata != null && !"".equals(rawMetadata)){ Builder parser = new Builder(); try { document = parser.build(rawMetadata, null); } catch (Exception e) { throw new RuntimeException(e); } } else return null; if (extractNumFound() == 0) return null; String allocatorName = ""; String datacentreName = ""; byte[] xml = null; Map<MediaType, URI> userMedia = new HashMap<MediaType, URI>(); Nodes nodes = document.query("//*[local-name() = 'str']"); for(int i = 0; i < nodes.size(); i++){ Node node = nodes.get(i); Element el = (Element) node; Attribute attr = el.getAttribute("name"); if(attr == null){ // media type registerMedia(userMedia, el.getValue()); } else if (attr.getValue().equals("allocator")){ allocatorName = el.getValue().substring(el.getValue().indexOf("-") + 1).trim(); } else if (attr.getValue().equals("datacentre")){ datacentreName = el.getValue().substring(el.getValue().indexOf("-") + 1).trim(); } else if (attr.getValue().equals("xml")){ xml = DatatypeConverter.parseBase64Binary(el.getValue()); } } Nodes dates = document.query("//*[local-name() = 'date']"); Date uploaded = null; for(int i = 0; i < dates.size(); i++){ Node node = dates.get(i); Element el = (Element) node; Attribute attr = el.getAttribute("name"); if (attr.getValue().equals("uploaded")){ uploaded = dateTimeFormatter.parseDateTime(el.getValue()).toDate(); } } return new Model(doi, xml, userMedia, contextPath, allocatorName, datacentreName, extractBiblioAttr(acceptHeader, "style"), extractBiblioAttr(acceptHeader, "locale"), uploaded); } private int extractNumFound() { Nodes nodes = document.query("//*[local-name() = 'result']"); if (nodes.size() == 1){ Element el = (Element) nodes.get(0); Attribute attr = el.getAttribute("numFound"); return Integer.valueOf(attr.getValue()); } else return 0; } private String extractBiblioAttr(String header, String attr) { if (header == null || attr == null) { return ""; } String result = ""; for(String h : header.split(",")){ if (h.trim().startsWith(Representation.TEXT_BIBLIOGRAPHY.toString())){ for(String s : h.split(";")) { if (s.trim().startsWith(attr)){ String[] l = s.split("="); result = l.length == 2 ? l[1].trim() : ""; break; } } break; } } return result; } }