/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.authority.orcid.xml;
import org.dspace.authority.orcid.model.Bio;
import org.dspace.authority.orcid.model.BioExternalIdentifier;
import org.dspace.authority.orcid.model.BioName;
import org.dspace.authority.orcid.model.BioResearcherUrl;
import org.dspace.authority.util.XMLUtils;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.xpath.XPathExpressionException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
*
* @author Antoine Snyers (antoine at atmire.com)
* @author Kevin Van de Velde (kevin at atmire dot com)
* @author Ben Bosman (ben at atmire dot com)
* @author Mark Diggory (markd at atmire dot com)
*/
public class XMLtoBio extends Converter {
/**
* log4j logger
*/
private static Logger log = Logger.getLogger(XMLtoBio.class);
/**
* orcid-message XPATHs
*/
protected String ORCID_BIO = "//orcid-bio";
// protected String ORCID = "parent::*/orcid";
protected String ORCID = "parent::*/orcid-identifier/path";
protected String PERSONAL_DETAILS = "personal-details";
protected String GIVEN_NAMES = PERSONAL_DETAILS + "/given-names";
protected String FAMILY_NAME = PERSONAL_DETAILS + "/family-name";
protected String CREDIT_NAME = PERSONAL_DETAILS + "/credit-name";
protected String OTHER_NAMES = PERSONAL_DETAILS + "/other-names";
protected String OTHER_NAME = OTHER_NAMES + "/other-name";
protected String CONTACT_DETAILS = "contact-details";
protected String COUNTRY = CONTACT_DETAILS + "/address/country";
protected String KEYWORDS = "keywords";
protected String KEYWORD = KEYWORDS + "/keyword";
protected String EXTERNAL_IDENTIFIERS = "external-identifiers";
protected String EXTERNAL_IDENTIFIER = EXTERNAL_IDENTIFIERS + "/external-identifier";
protected String EXTERNAL_ID_ORCID = "external-id-orcid";
protected String EXTERNAL_ID_COMMNON_NAME = "external-id-common-name";
protected String EXTERNAL_ID_REFERENCE = "external-id-reference";
protected String EXTERNAL_ID_URL = "external-id-url";
protected String RESEARCHER_URLS = "researcher-urls";
protected String RESEARCHER_URL = "researcher-urls/researcher-url";
protected String URL_NAME = "url-name";
protected String URL = "url";
protected String BIOGRAPHY = ORCID_BIO + "/biography";
protected String AFFILIATIONS = ORCID_BIO + "/affiliation";
/**
* Regex
*/
protected String ORCID_NOT_FOUND = "ORCID [\\d-]* not found";
public List<Bio> convert(Document xml) {
List<Bio> result = new ArrayList<Bio>();
if (XMLErrors.check(xml)) {
try {
Iterator<Node> iterator = XMLUtils.getNodeListIterator(xml, ORCID_BIO);
while (iterator.hasNext()) {
Bio bio = convertBio(iterator.next());
result.add(bio);
}
} catch (XPathExpressionException e) {
log.error("Error in xpath syntax", e);
}
} else {
processError(xml);
}
return result;
}
private Bio convertBio(Node node) {
Bio bio = new Bio();
setOrcid(node,bio);
setPersonalDetails(node, bio);
setContactDetails(node, bio);
setKeywords(node, bio);
setExternalIdentifiers(node, bio);
setResearcherUrls(node, bio);
setBiography(node, bio);
return bio;
}
protected void processError(Document xml) {
String errorMessage = XMLErrors.getErrorMessage(xml);
if(errorMessage.matches(ORCID_NOT_FOUND))
{
// do something?
}
log.error("The orcid-message reports an error: " + errorMessage);
}
private void setOrcid(Node node, Bio bio) {
try {
String orcid = XMLUtils.getTextContent(node, ORCID);
bio.setOrcid(orcid);
} catch (XPathExpressionException e) {
log.debug("Error in finding the biography in bio xml.", e);
}
}
protected void setBiography(Node xml, Bio bio) {
try {
String biography = XMLUtils.getTextContent(xml, BIOGRAPHY);
bio.setBiography(biography);
} catch (XPathExpressionException e) {
log.error("Error in finding the biography in bio xml.", e);
}
}
protected void setResearcherUrls(Node xml, Bio bio) {
try {
NodeList researcher_urls = XMLUtils.getNodeList(xml, RESEARCHER_URL);
if (researcher_urls != null) {
for (int i = 0; i < researcher_urls.getLength(); i++) {
Node researcher_url = researcher_urls.item(i);
if (researcher_url.getNodeType() != Node.TEXT_NODE) {
String url_name = XMLUtils.getTextContent(researcher_url, URL_NAME);
String url = XMLUtils.getTextContent(researcher_url, URL);
BioResearcherUrl researcherUrl = new BioResearcherUrl(url_name, url);
bio.addResearcherUrl(researcherUrl);
}
}
}
} catch (XPathExpressionException e) {
log.error("Error in finding the researcher url in bio xml.", e);
}
}
protected void setExternalIdentifiers(Node xml, Bio bio) {
try {
Iterator<Node> iterator = XMLUtils.getNodeListIterator(xml, EXTERNAL_IDENTIFIER);
while (iterator.hasNext()) {
Node external_identifier = iterator.next();
String id_orcid = XMLUtils.getTextContent(external_identifier, EXTERNAL_ID_ORCID);
String id_common_name = XMLUtils.getTextContent(external_identifier, EXTERNAL_ID_COMMNON_NAME);
String id_reference = XMLUtils.getTextContent(external_identifier, EXTERNAL_ID_REFERENCE);
String id_url = XMLUtils.getTextContent(external_identifier, EXTERNAL_ID_URL);
BioExternalIdentifier externalIdentifier = new BioExternalIdentifier(id_orcid, id_common_name, id_reference, id_url);
bio.addExternalIdentifier(externalIdentifier);
}
} catch (XPathExpressionException e) {
log.error("Error in finding the external identifier in bio xml.", e);
}
}
protected void setKeywords(Node xml, Bio bio) {
try {
NodeList keywords = XMLUtils.getNodeList(xml, KEYWORD);
if (keywords != null) {
for (int i = 0; i < keywords.getLength(); i++) {
String keyword = keywords.item(i).getTextContent();
String[] split = keyword.split(",");
for (String k : split) {
bio.addKeyword(k.trim());
}
}
}
} catch (XPathExpressionException e) {
log.error("Error in finding the keywords in bio xml.", e);
}
}
protected void setContactDetails(Node xml, Bio bio) {
try {
String country = XMLUtils.getTextContent(xml, COUNTRY);
bio.setCountry(country);
} catch (XPathExpressionException e) {
log.error("Error in finding the country in bio xml.", e);
}
}
protected void setPersonalDetails(Node xml, Bio bio) {
BioName name = bio.getName();
try {
String givenNames = XMLUtils.getTextContent(xml, GIVEN_NAMES);
name.setGivenNames(givenNames);
} catch (XPathExpressionException e) {
log.error("Error in finding the given names in bio xml.", e);
}
try {
String familyName = XMLUtils.getTextContent(xml, FAMILY_NAME);
name.setFamilyName(familyName);
} catch (XPathExpressionException e) {
log.error("Error in finding the family name in bio xml.", e);
}
try {
String creditName = XMLUtils.getTextContent(xml, CREDIT_NAME);
name.setCreditName(creditName);
} catch (XPathExpressionException e) {
log.error("Error in finding the credit name in bio xml.", e);
}
try {
Iterator<Node> iterator = XMLUtils.getNodeListIterator(xml, OTHER_NAME);
while (iterator.hasNext()) {
Node otherName = iterator.next();
String textContent = otherName.getTextContent();
name.getOtherNames().add(textContent.trim());
}
} catch (XPathExpressionException e) {
log.error("Error in finding the other names in bio xml.", e);
}
}
}