/**
* Copyright 2008 The University of North Carolina at Chapel Hill
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.unc.lib.dl.data.ingest.solr.indexing;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.unc.lib.dl.acl.service.AccessControlService;
import edu.unc.lib.dl.acl.util.ObjectAccessControlsBean;
import edu.unc.lib.dl.data.ingest.solr.exception.IndexingException;
import edu.unc.lib.dl.data.ingest.solr.exception.OrphanedObjectException;
import edu.unc.lib.dl.fedora.AccessClient;
import edu.unc.lib.dl.fedora.FedoraException;
import edu.unc.lib.dl.fedora.ManagementClient;
import edu.unc.lib.dl.fedora.NotFoundException;
import edu.unc.lib.dl.fedora.PID;
import edu.unc.lib.dl.fedora.ServiceException;
import edu.unc.lib.dl.fedora.types.MIMETypedStream;
import edu.unc.lib.dl.search.solr.model.IndexDocumentBean;
import edu.unc.lib.dl.util.ContentModelHelper;
import edu.unc.lib.dl.util.ContentModelHelper.CDRProperty;
import edu.unc.lib.dl.util.ContentModelHelper.Datastream;
import edu.unc.lib.dl.util.ContentModelHelper.Relationship;
import edu.unc.lib.dl.util.TripleStoreQueryService;
import edu.unc.lib.dl.xml.FOXMLJDOMUtil;
import edu.unc.lib.dl.xml.JDOMNamespaceUtil;
import edu.unc.lib.dl.xml.NamespaceConstants;
/**
* Loads data to populate fields in a DocumentIndexingPackage
*
* @author bbpennel
* @date Jun 22, 2015
*/
public class DocumentIndexingPackageDataLoader {
private static final Logger log = LoggerFactory.getLogger(DocumentIndexingPackageDataLoader.class);
private static final String OBJECT_STATE_RELATION = ContentModelHelper.FedoraProperty.state.toString();
private ManagementClient managementClient ;
private AccessClient accessClient;
private TripleStoreQueryService tsqs;
private AccessControlService accessControlService;
private DocumentIndexingPackageFactory factory;
private int maxRetries = 2;
private long retryDelay = 1000L;
public Document loadFOXML(DocumentIndexingPackage dip) throws IndexingException {
PID pid = dip.getPid();
try {
log.debug("Retrieving FOXML for {}", pid.getPid());
Document foxml = null;
int tries = maxRetries;
do {
if (tries < maxRetries) {
Thread.sleep(retryDelay);
log.debug("Retrieving FOXML for DIP, tries remaining: {}", tries);
}
try {
foxml = managementClient.getObjectXML(pid);
if (foxml != null) {
return foxml;
}
} catch (ServiceException | NotFoundException e) {
// If there are retries left, retry on service exception
if (tries > 1) {
log.warn("Failed to retrieve FOXML for " + pid.getPid() + ", retrying.", e);
} else {
throw new IndexingException("Failed to retrieve FOXML for " + pid.getPid() + " after " + maxRetries
+ " tries.", e);
}
}
} while (--tries > 0);
throw new IndexingException("Failed to retrieve FOXML for " + pid.getPid());
} catch (FedoraException e) {
throw new IndexingException("Failed to retrieve FOXML for " + pid.getPid(), e);
} catch (InterruptedException e) {
throw new IndexingException("Interrupted while waiting to retry FOXML retrieval for " + pid.getPid(), e);
}
}
public ObjectAccessControlsBean loadAccessControlBean(DocumentIndexingPackage dip) throws IndexingException {
if (!dip.hasParentDocument() || !dip.getParentDocument().hasAclBean()) {
// No parent object, ask fedora for access control
return accessControlService.getObjectAccessControls(dip.getPid());
}
return new ObjectAccessControlsBean(dip.getParentDocument().getAclBean(), dip.getPid(), dip.getTriples());
}
public List<PID> loadChildren(DocumentIndexingPackage dip) throws IndexingException {
Map<String, List<String>> triples = dip.getTriples();
List<String> childrenRelations = triples.get(Relationship.contains.toString());
if (childrenRelations == null) {
return Collections.<PID>emptyList();
}
List<PID> children = new ArrayList<PID>(childrenRelations.size());
for (String childRelation : childrenRelations) {
children.add(new PID(childRelation));
}
return children;
}
public Map<String, List<String>> loadTriples(DocumentIndexingPackage dip) throws IndexingException {
if (dip.hasFoxml()) {
return this.extractTriples(dip);
}
return tsqs.fetchAllTriples(dip.getPid());
}
private Map<String, List<String>> extractTriples(DocumentIndexingPackage dip) throws IndexingException {
PID pid = dip.getPid();
Element objectProperties = FOXMLJDOMUtil.getObjectProperties(dip.getFoxml());
Element relsExt = getDatastream(dip, Datastream.RELS_EXT);
Map<String, Element> datastreams = FOXMLJDOMUtil.getMostRecentDatastreamMap(dip.getFoxml());
Map<String, List<String>> triples = new HashMap<String, List<String>>();
if (relsExt != null) {
List<?> tripleEls = relsExt.getChild("Description", JDOMNamespaceUtil.RDF_NS).getChildren();
for (Object tripleObject : tripleEls) {
Element tripleEl = (Element) tripleObject;
String predicate = tripleEl.getNamespaceURI() + tripleEl.getName();
String value = tripleEl.getAttributeValue("resource", JDOMNamespaceUtil.RDF_NS);
if (value == null)
value = tripleEl.getText();
List<String> predicateTriples = triples.get(predicate);
if (predicateTriples == null) {
predicateTriples = new ArrayList<String>();
triples.put(predicate, predicateTriples);
}
predicateTriples.add(value);
}
}
if (objectProperties != null) {
List<?> tripleEls = objectProperties.getChildren();
for (Object tripleObject : tripleEls) {
Element tripleEl = (Element) tripleObject;
String predicate = tripleEl.getAttributeValue("NAME");
String value = tripleEl.getAttributeValue("VALUE");
// Fedora prefixes the state value into a URI in the triple store, so add in prefix
if (OBJECT_STATE_RELATION.equals(predicate))
value = NamespaceConstants.FEDORA_MODEL_URI + value;
List<String> predicateTriples = triples.get(predicate);
if (predicateTriples == null) {
predicateTriples = new ArrayList<String>();
triples.put(predicate, predicateTriples);
}
predicateTriples.add(value);
}
}
if (datastreams.size() > 0) {
List<String> predicateTriples = new ArrayList<String>();
triples.put(ContentModelHelper.FedoraProperty.disseminates.toString(), predicateTriples);
for (String datastream : datastreams.keySet()) {
predicateTriples.add(pid.getURI() + "/" + datastream);
}
}
return triples;
}
public DocumentIndexingPackage loadParentDip(DocumentIndexingPackage dip) throws IndexingException {
PID parentPid = dip.getParentPid();
return factory.createDip(parentPid);
}
public PID loadParentPid(DocumentIndexingPackage dip) throws IndexingException {
IndexDocumentBean idb = dip.getDocument();
PID parentPID = null;
// Try to get the parent pid from the items ancestors if available.
if (idb.getAncestorPath() != null && idb.getAncestorPath().size() > 0) {
String ancestor = idb.getAncestorPath().get(idb.getAncestorPath().size() - 1);
int index = ancestor.indexOf(',');
ancestor = ancestor.substring(index + 1);
index = ancestor.indexOf(',');
ancestor = ancestor.substring(0, index);
parentPID = new PID(ancestor);
} else {
try {
log.debug("Retrieving parent pid for " + dip.getPid().getPid());
parentPID = tsqs.fetchByPredicateAndLiteral(ContentModelHelper.Relationship.contains.toString(), dip.getPid()).get(0);
} catch (IndexOutOfBoundsException e) {
throw new OrphanedObjectException("Could not retrieve parent pid for " + dip.getPid().getPid());
}
}
return parentPID;
}
public DocumentIndexingPackage loadDefaultWebObject(DocumentIndexingPackage dip) throws IndexingException {
Map<String, List<String>> triples = dip.getTriples();
List<String> defaultWebObject = triples.get(CDRProperty.defaultWebObject.getURI().toString());
if (defaultWebObject != null && defaultWebObject.size() > 0) {
return factory.createDip(new PID(defaultWebObject.get(0)));
}
return null;
}
public String loadDefaultWebData(DocumentIndexingPackage dip) throws IndexingException {
String defaultWebData = dip.getFirstTriple(CDRProperty.defaultWebData.toString());
// If this object does not have a defaultWebData but its defaultWebObject does, then use that instead.
if (defaultWebData == null && dip.getDefaultWebObject() != null) {
defaultWebData = dip.getDefaultWebObject().getFirstTriple(CDRProperty.defaultWebData.toString());
}
return defaultWebData;
}
public Element loadMDDescriptive(DocumentIndexingPackage dip) throws IndexingException {
return loadDatastream(dip, Datastream.MD_DESCRIPTIVE, true);
}
public Element loadMDContents(DocumentIndexingPackage dip) throws IndexingException {
return loadDatastream(dip, Datastream.MD_CONTENTS, true);
}
private Element getDatastream(DocumentIndexingPackage dip, Datastream ds) throws IndexingException {
Element dsEl = null;
if (dip.hasFoxml()) {
dsEl = FOXMLJDOMUtil.getDatastreamContent(ds, dip.getFoxml());
}
return dsEl;
}
private Element loadDatastream(DocumentIndexingPackage dip, Datastream ds, boolean checkFoxml) throws IndexingException {
PID pid = dip.getPid();
String datastreamName = ds.getName();
log.debug("Attempting to get datastream " + datastreamName + " for object " + pid);
if (checkFoxml) {
Element dsEl = getDatastream(dip, ds);
if (dsEl != null) {
return dsEl;
}
}
try {
while (true) {
edu.unc.lib.dl.fedora.types.Datastream datastream = managementClient.getDatastream(pid, datastreamName);
if (datastream == null) {
return null;
}
try {
MIMETypedStream mts = accessClient.getDatastreamDissemination(pid, datastreamName, null);
try (ByteArrayInputStream bais = new ByteArrayInputStream(mts.getStream())) {
Document dsDoc = new SAXBuilder().build(bais);
return dsDoc.detachRootElement();
} catch (JDOMException | IOException e) {
throw new ServiceException("Failed to parse datastream " + datastreamName + " for object " + pid, e);
}
} catch (NotFoundException e) {
log.debug("No dissemination version for datastream {} on object {} found, retrying",
datastreamName, pid);
}
}
} catch (NotFoundException e) {
return null;
} catch (FedoraException e) {
throw new IndexingException("Failed to get datastream " + datastreamName + " for object " + pid, e);
}
}
public void setManagementClient(ManagementClient managementClient) {
this.managementClient = managementClient;
}
public void setAccessClient(AccessClient accessClient) {
this.accessClient = accessClient;
}
public void setTsqs(TripleStoreQueryService tsqs) {
this.tsqs = tsqs;
}
public void setAccessControlService(AccessControlService accessControlService) {
this.accessControlService = accessControlService;
}
public void setMaxRetries(int maxRetries) {
this.maxRetries = maxRetries;
}
public void setRetryDelay(long retryDelay) {
this.retryDelay = retryDelay;
}
public void setFactory(DocumentIndexingPackageFactory factory) {
this.factory = factory;
}
}