/**
* =============================================================================
*
* ORCID (R) Open Source
* http://orcid.org
*
* Copyright (c) 2012-2014 ORCID, Inc.
* Licensed under an MIT-Style License (MIT)
* http://orcid.org/open-source-license
*
* This copyright and license information (including a link to the full license)
* shall be included in its entirety in all copies or substantial portion of
* the software.
*
* =============================================================================
*/
package org.orcid.listener.solr;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import org.apache.commons.lang.StringUtils;
import org.orcid.jaxb.model.record.summary_v2.EducationSummary;
import org.orcid.jaxb.model.record.summary_v2.EmploymentSummary;
import org.orcid.jaxb.model.record.summary_v2.WorkGroup;
import org.orcid.jaxb.model.record.summary_v2.WorkSummary;
import org.orcid.jaxb.model.record_rc1.WorkExternalIdentifierType;
import org.orcid.jaxb.model.record_v2.ExternalID;
import org.orcid.jaxb.model.record_v2.Funding;
import org.orcid.jaxb.model.record_v2.PersonExternalIdentifier;
import org.orcid.jaxb.model.record_v2.Record;
import org.orcid.jaxb.model.record_v2.Relationship;
import org.orcid.utils.NullUtils;
import org.orcid.utils.solr.entities.OrcidSolrDocument;
import org.orcid.utils.solr.entities.SolrConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class OrcidRecordToSolrDocument {
private final boolean indexProfile;
private final JAXBContext jaxbContext_2_0_api;
public OrcidRecordToSolrDocument(boolean indexProfile){
this.indexProfile=indexProfile;
try {
this.jaxbContext_2_0_api = JAXBContext.newInstance(Record.class);
} catch (JAXBException e) {
throw new RuntimeException(e);
}
}
Logger LOG = LoggerFactory.getLogger(OrcidRecordToSolrDocument.class);
public OrcidSolrDocument convert(Record record, List<Funding> fundings) {
OrcidSolrDocument profileIndexDocument = new OrcidSolrDocument();
profileIndexDocument.setOrcid(record.getOrcidIdentifier().getPath());
if(record.getHistory() != null) {
if (record.getHistory().getLastModifiedDate() != null){
profileIndexDocument.setProfileLastModifiedDate(record.getHistory().getLastModifiedDate().getValue().toGregorianCalendar().getTime());
}
if (record.getHistory().getSubmissionDate() != null){
profileIndexDocument.setProfileSubmissionDate(record.getHistory().getSubmissionDate().getValue().toGregorianCalendar().getTime());
}
}
if (record.getDeprecated() != null) {
profileIndexDocument.setPrimaryRecord(record.getDeprecated().getPrimaryRecord() != null ? record.getDeprecated().getPrimaryRecord().getOrcidIdentifier().getPath() : null);
}
if (record.getPerson() != null) {
if (record.getPerson().getName() !=null){
profileIndexDocument.setFamilyName(record.getPerson().getName().getFamilyName() != null ? record.getPerson().getName().getFamilyName().getContent() : null);
profileIndexDocument.setGivenNames(record.getPerson().getName().getGivenNames() != null ? record.getPerson().getName().getGivenNames().getContent() : null);
profileIndexDocument.setCreditName(record.getPerson().getName().getCreditName() != null ? record.getPerson().getName().getCreditName().getContent() : null);
}
if (record.getPerson().getOtherNames() != null){
if (record.getPerson().getOtherNames().getOtherNames() != null && !record.getPerson().getOtherNames().getOtherNames().isEmpty()){
List<String> names = new ArrayList<String>();
for (org.orcid.jaxb.model.record_v2.OtherName on : record.getPerson().getOtherNames().getOtherNames()){
names.add(on.getContent());
}
profileIndexDocument.setOtherNames(names);
}
}
if (record.getPerson().getEmails() != null && record.getPerson().getEmails().getEmails() != null){
for (org.orcid.jaxb.model.record_v2.Email e : record.getPerson().getEmails().getEmails()){
profileIndexDocument.addEmailAddress(e.getEmail());
}
}
//weird, the type is not indexed...!
if (record.getPerson().getExternalIdentifiers() != null && record.getPerson().getExternalIdentifiers().getExternalIdentifiers() != null){
List<String> extIdOrcids = new ArrayList<String>();
List<String> extIdRefs = new ArrayList<String>();
List<String> extIdOrcidsAndRefs = new ArrayList<String>();
for (PersonExternalIdentifier externalIdentifier : record.getPerson().getExternalIdentifiers().getExternalIdentifiers()){
String sourcePath = null;
if (externalIdentifier.getSource() != null && externalIdentifier.getSource().retrieveSourcePath() != null) {
sourcePath = externalIdentifier.getSource().retrieveSourcePath();
extIdOrcids.add(sourcePath);
}
if (externalIdentifier.getValue() != null) {
extIdRefs.add(externalIdentifier.getValue());//weird, the type is not indexed...!
}
if (NullUtils.noneNull(sourcePath, externalIdentifier.getValue())) {
extIdOrcidsAndRefs.add(sourcePath + "=" + externalIdentifier.getValue());
}
}
if (!extIdOrcids.isEmpty()) {
profileIndexDocument.setExternalIdSources(extIdOrcids);
}
if (!extIdRefs.isEmpty()) {
profileIndexDocument.setExternalIdReferences(extIdRefs);
}
if (!extIdOrcidsAndRefs.isEmpty()) {
profileIndexDocument.setExternalIdSourcesAndReferences(extIdOrcidsAndRefs);
}
}
//weird, we only index keywords if activities exist...!
if (record.getActivitiesSummary() != null){
if (record.getPerson().getKeywords() != null && record.getPerson().getKeywords().getKeywords() != null){
List<String> keywordValues = new ArrayList<String>();
for (org.orcid.jaxb.model.record_v2.Keyword keyword : record.getPerson().getKeywords().getKeywords()) {
keywordValues.add(keyword.getContent());
}
profileIndexDocument.setKeywords(keywordValues);
}
}
if (record.getActivitiesSummary() != null && record.getActivitiesSummary().getWorks() != null && record.getActivitiesSummary().getWorks().getWorkGroup() != null){
//work ids
Map<String, List<String>> allExternalIdentifiers = new HashMap<String, List<String>>();
Map<String, List<String>> partOf = new HashMap<String, List<String>>();
Map<String, List<String>> self = new HashMap<String, List<String>>();
Set<String> workTitles = new HashSet<String>();
for (WorkGroup wg : record.getActivitiesSummary().getWorks().getWorkGroup()){
if (wg.getWorkSummary()!=null){
for (WorkSummary w : wg.getWorkSummary()){ // have to use summaries here as group does not include part-of
if (w.getExternalIdentifiers() != null && w.getExternalIdentifiers().getExternalIdentifier() != null){
for (ExternalID id : w.getExternalIdentifiers().getExternalIdentifier()){
//old way
if (!allExternalIdentifiers.containsKey(id.getType())){
allExternalIdentifiers.put(id.getType(), new ArrayList<String>());
}
if (!allExternalIdentifiers.get(id.getType()).contains(id.getValue())){
allExternalIdentifiers.get(id.getType()).add(id.getValue());
}
//new way
if (Relationship.SELF.equals(id.getRelationship())){
if (!self.containsKey(id.getType()+SolrConstants.DYNAMIC_SELF)){
self.put(id.getType()+SolrConstants.DYNAMIC_SELF, new ArrayList<String>());
}
if (!self.get(id.getType()+SolrConstants.DYNAMIC_SELF).contains(id.getValue())){
self.get(id.getType()+SolrConstants.DYNAMIC_SELF).add(id.getValue());
}
}
if (Relationship.PART_OF.equals(id.getRelationship())){
if (!partOf.containsKey(id.getType()+SolrConstants.DYNAMIC_PART_OF)){
partOf.put(id.getType()+SolrConstants.DYNAMIC_PART_OF, new ArrayList<String>());
}
if (!partOf.get(id.getType()+SolrConstants.DYNAMIC_PART_OF).contains(id.getValue())){
partOf.get(id.getType()+SolrConstants.DYNAMIC_PART_OF).add(id.getValue());
}
}
}
}
if (w.getTitle() != null){
if (w.getTitle().getTitle() !=null && StringUtils.isNotEmpty(w.getTitle().getTitle().getContent())){
workTitles.add(w.getTitle().getTitle().getContent());
}
if (w.getTitle().getSubtitle() !=null && StringUtils.isNotEmpty(w.getTitle().getSubtitle().getContent())){
workTitles.add(w.getTitle().getSubtitle().getContent());
}
if (w.getTitle().getTranslatedTitle() !=null && StringUtils.isNotEmpty(w.getTitle().getTranslatedTitle().getContent())){
workTitles.add(w.getTitle().getTranslatedTitle().getContent());
}
}
}
}
}
profileIndexDocument.setSelfIds(self);
profileIndexDocument.setPartOfIds(partOf);
//now add them to the doc, the old way
addExternalIdentifiersToIndexDocument(profileIndexDocument, allExternalIdentifiers);
profileIndexDocument.setWorkTitles(new ArrayList<String>(workTitles));
}
Map<String, List<String>> organisationIds = new HashMap<String,List<String>>();
organisationIds.put(SolrConstants.FUNDREF_ORGANISATION_ID, new ArrayList<String>());
organisationIds.put(SolrConstants.RINGGOLD_ORGANISATION_ID, new ArrayList<String>());
Map<String, List<String>> organisationNames = new HashMap<String,List<String>>();
organisationNames.put(SolrConstants.AFFILIATION_ORGANISATION_NAME, new ArrayList<String>());
organisationNames.put(SolrConstants.FUNDING_ORGANISATION_NAME, new ArrayList<String>());
if (!fundings.isEmpty()){
Set<String> fundingTitle = new HashSet<String>();
Set<String> fundingGrantNumbers = new HashSet<String>();
for (Funding f : fundings){
if (f.getTitle() != null){
if (f.getTitle().getTitle() != null && StringUtils.isNotEmpty(f.getTitle().getTitle().getContent())){
fundingTitle.add(f.getTitle().getTitle().getContent());
}
if (f.getTitle().getTranslatedTitle() != null && StringUtils.isNotEmpty(f.getTitle().getTranslatedTitle().getContent())){
fundingTitle.add(f.getTitle().getTranslatedTitle().getContent());
}
}
if (f.getExternalIdentifiers() != null && f.getExternalIdentifiers().getExternalIdentifier() !=null){
for (ExternalID id : f.getExternalIdentifiers().getExternalIdentifier()){
if (id.getType().equals("grant_number")){
fundingGrantNumbers.add(id.getValue());
}
}
}
if (f.getOrganization() != null){
organisationNames.get(SolrConstants.FUNDING_ORGANISATION_NAME).add(f.getOrganization().getName());
if (f.getOrganization().getDisambiguatedOrganization() !=null)
organisationIds.get(SolrConstants.FUNDREF_ORGANISATION_ID).add(f.getOrganization().getDisambiguatedOrganization().getDisambiguatedOrganizationIdentifier());
}
}
profileIndexDocument.setFundingTitles(new ArrayList<String>(fundingTitle));
profileIndexDocument.setGrantNumbers(new ArrayList<String>(fundingGrantNumbers));
}
//now do affiliations
if (record.getActivitiesSummary() != null && record.getActivitiesSummary().getEducations() != null && record.getActivitiesSummary().getEducations().getSummaries() != null){
for (EducationSummary e : record.getActivitiesSummary().getEducations().getSummaries()){
if (e.getOrganization() !=null){
organisationNames.get(SolrConstants.AFFILIATION_ORGANISATION_NAME).add(e.getOrganization().getName());
if (e.getOrganization().getDisambiguatedOrganization() != null)
organisationIds.get(SolrConstants.RINGGOLD_ORGANISATION_ID).add(e.getOrganization().getDisambiguatedOrganization().getDisambiguatedOrganizationIdentifier());
}
}
}
if (record.getActivitiesSummary() != null && record.getActivitiesSummary().getEmployments() != null && record.getActivitiesSummary().getEmployments().getSummaries() != null){
for (EmploymentSummary e: record.getActivitiesSummary().getEmployments().getSummaries()){
if (e.getOrganization() !=null){
organisationNames.get(SolrConstants.AFFILIATION_ORGANISATION_NAME).add(e.getOrganization().getName());
if (e.getOrganization().getDisambiguatedOrganization() != null)
organisationIds.get(SolrConstants.RINGGOLD_ORGANISATION_ID).add(e.getOrganization().getDisambiguatedOrganization().getDisambiguatedOrganizationIdentifier());
}
}
}
profileIndexDocument.setOrganisationIds(organisationIds);
profileIndexDocument.setOrganisationNames(organisationNames);
}
if (indexProfile){
try {
StringWriter sw = new StringWriter();
jaxbContext_2_0_api.createMarshaller().marshal(record, sw);
profileIndexDocument.setPublicProfileMessage(sw.getBuffer().toString()/*.replaceAll("<[^>]+>", " ")*/);
} catch (JAXBException e) {
LOG.error("problem marshalling xml",e);
}
}
LOG.debug(profileIndexDocument.toString());
return profileIndexDocument;
}
/**
* Fill all the different external identifiers in the profile index
* document.
*
* @param profileIndexDocument
* The document that will be indexed by solr
* @param externalIdentifiers
* The list of external identifiers
*/
private void addExternalIdentifiersToIndexDocument(OrcidSolrDocument profileIndexDocument, Map<String, List<String>> externalIdentifiers) {
Iterator<Entry<String, List<String>>> it = externalIdentifiers.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<String, List<String>> entry = (Map.Entry<String, List<String>>) it.next();
if (entry.getKey() != null && entry.getValue() != null && !entry.getValue().isEmpty()) {
switch (WorkExternalIdentifierType.fromValue(entry.getKey())) {
case AGR:
profileIndexDocument.setAgr(entry.getValue());
break;
case ARXIV:
profileIndexDocument.setArxiv(entry.getValue());
break;
case ASIN:
profileIndexDocument.setAsin(entry.getValue());
break;
case ASIN_TLD:
profileIndexDocument.setAsintld(entry.getValue());
break;
case BIBCODE:
profileIndexDocument.setBibcode(entry.getValue());
break;
case CBA:
profileIndexDocument.setCba(entry.getValue());
break;
case CIT:
profileIndexDocument.setCit(entry.getValue());
break;
case CTX:
profileIndexDocument.setCtx(entry.getValue());
break;
case DOI:
profileIndexDocument.setDigitalObjectIds(entry.getValue());
break;
case EID:
profileIndexDocument.setEid(entry.getValue());
break;
case ETHOS:
profileIndexDocument.setEthos(entry.getValue());
break;
case HANDLE:
profileIndexDocument.setHandle(entry.getValue());
break;
case HIR:
profileIndexDocument.setHir(entry.getValue());
break;
case ISBN:
profileIndexDocument.setIsbn(entry.getValue());
break;
case ISSN:
profileIndexDocument.setIssn(entry.getValue());
break;
case JFM:
profileIndexDocument.setJfm(entry.getValue());
break;
case JSTOR:
profileIndexDocument.setJstor(entry.getValue());
break;
case LCCN:
profileIndexDocument.setLccn(entry.getValue());
break;
case MR:
profileIndexDocument.setMr(entry.getValue());
break;
case OCLC:
profileIndexDocument.setOclc(entry.getValue());
break;
case OL:
profileIndexDocument.setOl(entry.getValue());
break;
case OSTI:
profileIndexDocument.setOsti(entry.getValue());
break;
case OTHER_ID:
profileIndexDocument.setOtherIdentifierType(entry.getValue());
break;
case PAT:
profileIndexDocument.setPat(entry.getValue());
break;
case PMC:
profileIndexDocument.setPmc(entry.getValue());
break;
case PMID:
profileIndexDocument.setPmid(entry.getValue());
break;
case RFC:
profileIndexDocument.setRfc(entry.getValue());
break;
case SOURCE_WORK_ID:
profileIndexDocument.setSourceWorkId(entry.getValue());
break;
case SSRN:
profileIndexDocument.setSsrn(entry.getValue());
break;
case URI:
profileIndexDocument.setUri(entry.getValue());
break;
case URN:
profileIndexDocument.setUrn(entry.getValue());
break;
case WOSUID:
profileIndexDocument.setWosuid(entry.getValue());
case ZBL:
profileIndexDocument.setZbl(entry.getValue());
break;
}
}
}
}
}