package uk.bl.odin.orcid.doi;
import java.io.IOException;
import java.util.List;
import javax.inject.Inject;
import javax.inject.Singleton;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
/**
* Not ideal but reasonable first pass implementation of name->prefix mappings
* provider
*
* TODO: for datacite we can get http://search.datacite.org/list/datacentres for
* list of names get http://search.datacite.org/list/prefixes for a list of
* prefixes get
* http://search.datacite.org/list/prefixes?fq=datacentre_symbol:TIB
* .GFZ&facet.mincount=1 to match them up.
*
* Or parse whole dump from: curl
* "http://search.datacite.org/api?q=prefix:*&fl=prefix,datacentre&wt=csv&csv.header=false&rows=99999999"
* which is datacite-all.json TODO: for crossref we can parse
* http://www.crossref.org/xref/xml/mddb.xml
*
* @author tom
*
*/
@Singleton
public class DOIPrefixMapper {
// name -> doi list
private final ImmutableMultimap<String, String> publisherMap;
private final ImmutableMultimap<String, String> datacentreMap;
@Inject
public DOIPrefixMapper() {
publisherMap = loadPublisherMap("doi-prefix-publishers.csv");
datacentreMap = loadBasicDatacentreMap("datacentre-prefixes.json");
}
private ImmutableMultimap<String, String> loadBasicDatacentreMap(String file) {
Multimap<String, String> m = LinkedHashMultimap.create();
ObjectMapper mapper = new ObjectMapper();
try {
List<DatacentrePrefixMapping> prefixes = mapper.readValue(getClass().getResourceAsStream(file),
new TypeReference<List<DatacentrePrefixMapping>>() {
});
for (DatacentrePrefixMapping mapping : prefixes) {
m.putAll(mapping.datacentre, mapping.prefixes);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return ImmutableMultimap.copyOf(m);
}
private ImmutableMultimap<String, String> loadPublisherMap(String file) {
// todo make sortedsetmultimap
Multimap<String, String> temp = LinkedHashMultimap.create();
CsvMapper mapper = new CsvMapper();
mapper.enable(CsvParser.Feature.WRAP_AS_ARRAY);
try {
MappingIterator<Object[]> it = mapper.reader(Object[].class).readValues(
getClass().getResourceAsStream(file));
while (it.hasNext()) {
Object[] row = it.next();
if (row.length > 1 && (row[0] != null && row[1] != null)
&& (!row[0].toString().isEmpty() && !row[1].toString().isEmpty())) {
temp.put(row[1].toString().trim(), row[0].toString().trim());
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return ImmutableMultimap.copyOf(temp);
}
public ImmutableMultimap<String, String> getDatacentreMap() {
return datacentreMap;
}
/**
* A map of Publisher name -> DOI prefixes
*
* @return sorted by publisher name
*/
public ImmutableMultimap<String, String> getPublisherMap() {
return publisherMap;
}
public static class DatacentrePrefixMapping {
public String datacentre;
public List<String> prefixes;
}
}