/* * Licensed to DuraSpace under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional information * regarding copyright ownership. * * DuraSpace licenses this file to you under the Apache License, * Version 2.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fcrepo.kernel.modeshape.utils.iterators; import static org.apache.jena.rdf.model.ModelFactory.createDefaultModel; import static org.apache.jena.rdf.model.ResourceFactory.createResource; import static org.apache.jena.vocabulary.RDF.type; import static java.lang.String.join; import static org.fcrepo.kernel.modeshape.rdf.ManagedRdf.isManagedMixin; import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.getJcrNode; import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary; import static org.fcrepo.kernel.api.FedoraTypes.FCR_METADATA; import static org.slf4j.LoggerFactory.getLogger; import org.fcrepo.kernel.api.models.FedoraResource; import org.fcrepo.kernel.api.exception.ConstraintViolationException; import org.fcrepo.kernel.api.exception.IncorrectTripleSubjectException; import org.fcrepo.kernel.api.exception.MalformedRdfException; import org.fcrepo.kernel.api.exception.ServerManagedTypeException; import org.fcrepo.kernel.api.exception.OutOfDomainSubjectException; import org.fcrepo.kernel.api.exception.RepositoryRuntimeException; import org.fcrepo.kernel.api.identifiers.IdentifierConverter; import javax.jcr.RepositoryException; import javax.jcr.Session; import org.fcrepo.kernel.api.RdfStream; import org.fcrepo.kernel.modeshape.rdf.JcrRdfTools; import org.fcrepo.kernel.api.rdf.DefaultRdfStream; import org.slf4j.Logger; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.SettableFuture; import org.apache.jena.graph.Node; import org.apache.jena.graph.Triple; import org.apache.jena.rdf.model.Model; import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; import java.util.ArrayList; import java.util.List; import java.util.function.Predicate; /** * @author ajs6f * @since Oct 24, 2013 */ public abstract class PersistingRdfStreamConsumer implements RdfStreamConsumer { private final RdfStream stream; private final IdentifierConverter<Resource, FedoraResource> idTranslator; // if it's not about a Fedora resource, we don't care. protected final Predicate<Triple> isFedoraSubjectTriple; private final JcrRdfTools jcrRdfTools; private static final Model m = createDefaultModel(); private static final Logger LOGGER = getLogger(PersistingRdfStreamConsumer.class); private final List<String> exceptions; /** * Ordinary constructor. * * @param idTranslator the id translator * @param session the session * @param stream the rdf stream */ public PersistingRdfStreamConsumer(final IdentifierConverter<Resource, FedoraResource> idTranslator, final Session session, final RdfStream stream) { this.idTranslator = idTranslator; this.jcrRdfTools = new JcrRdfTools(idTranslator, session); this.isFedoraSubjectTriple = t -> { final Node subject = t.getSubject(); final Node topic = stream().topic(); // blank nodes are okay if (!t.getSubject().isBlank()) { final String subjectURI = subject.getURI(); final int hashIndex = subjectURI.lastIndexOf("#"); // a hash URI with the same base as the topic is okay, as is the topic itself if ((hashIndex > 0 && topic.getURI().equals(subjectURI.substring(0, hashIndex))) || topic.equals(subject)) { LOGGER.debug("Discovered a Fedora-relevant subject in triple: {}.", t); return true; } else if (topic.getURI().equals(subject.getURI() + "/" + FCR_METADATA) && isFedoraBinary.test(getJcrNode(translator().convert(createResource(subject.getURI()))))) { LOGGER.debug("Discovered a NonRDFSource subject in triple: {}.", t); return true; } // the subject was inappropriate in one of two ways if (translator().inDomain(m.asRDFNode(subject).asResource())) { // it was in-domain, but not within this resource LOGGER.error("{} is not in the topic of this RDF, which is {}.", subject, topic); throw new IncorrectTripleSubjectException(subject + " is not in the topic of this RDF, which is " + topic); } // it wasn't even in in-domain! LOGGER.error("subject {} is not in repository domain.", subject); throw new OutOfDomainSubjectException(subject); } return true; }; this.stream = new DefaultRdfStream(stream.topic(), stream.filter(isFedoraSubjectTriple)); this.exceptions = new ArrayList<>(); } @Override public void consume() throws MalformedRdfException { stream.forEach(t -> { final Statement s = m.asStatement(t); LOGGER.debug("Operating on triple {}.", s); try { operateOnTriple(s); } catch (final ConstraintViolationException e) { throw e; } catch (final MalformedRdfException e) { exceptions.add(e.getMessage()); } }); if (!exceptions.isEmpty()) { throw new MalformedRdfException(join("\n", exceptions)); } } protected void operateOnTriple(final Statement input) throws MalformedRdfException { try { final Statement t = jcrRdfTools.skolemize(idTranslator, input, stream().topic().toString()); final Resource subject = t.getSubject(); final FedoraResource subjectNode = translator().convert(subject); // if this is a user-managed RDF type assertion, update the node's // mixins. If it isn't, treat it as a "data" property. if (t.getPredicate().equals(type) && t.getObject().isResource()) { final Resource mixinResource = t.getObject().asResource(); if (!isManagedMixin.test(mixinResource)) { LOGGER.debug("Operating on node: {} with mixin: {}.", subjectNode, mixinResource); operateOnMixin(mixinResource, subjectNode); } else { LOGGER.error("Found repository-managed mixin {} in triple {} on which we will not operate.", mixinResource, t); throw new ServerManagedTypeException(String.format( "The repository type (%s) of this resource is system managed.", mixinResource)); } } else { LOGGER.debug("Operating on node: {} from triple: {}.", subjectNode, t); operateOnProperty(t, subjectNode); } } catch (final ConstraintViolationException e) { throw e; } catch (final RepositoryException | RepositoryRuntimeException e) { throw new MalformedRdfException(e.getMessage(), e); } } protected abstract void operateOnProperty(final Statement t, final FedoraResource subjectNode) throws RepositoryException; protected abstract void operateOnMixin(final Resource mixinResource, final FedoraResource subjectNode) throws RepositoryException; @Override public ListenableFuture<Boolean> consumeAsync() { // TODO make this actually asynch final SettableFuture<Boolean> result = SettableFuture.create(); try { consume(); result.set(true); } catch (final MalformedRdfException e) { LOGGER.warn("Got exception consuming RDF stream", e); result.setException(e); result.set(false); } return result; } /** * @return the stream */ public RdfStream stream() { return stream; } /** * @return the idTranslator */ public IdentifierConverter<Resource, FedoraResource> translator() { return idTranslator; } /** * @return the jcrRdfTools */ public JcrRdfTools jcrRdfTools() { return jcrRdfTools; } }