/*
* Licensed to DuraSpace under one or more contributor license agreements.
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership.
*
* DuraSpace licenses this file to you under the Apache License,
* Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fcrepo.kernel.modeshape;
import com.codahale.metrics.Counter;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Timer;
import org.apache.jena.rdf.model.Resource;
import org.fcrepo.kernel.api.exception.InvalidChecksumException;
import org.fcrepo.kernel.api.exception.PathNotFoundRuntimeException;
import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
import org.fcrepo.kernel.api.identifiers.IdentifierConverter;
import org.fcrepo.kernel.api.models.NonRdfSourceDescription;
import org.fcrepo.kernel.api.models.FedoraBinary;
import org.fcrepo.kernel.api.models.FedoraResource;
import org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint;
import org.fcrepo.kernel.api.RdfStream;
import org.fcrepo.kernel.api.utils.CacheEntry;
import org.fcrepo.kernel.api.utils.ContentDigest;
import org.fcrepo.kernel.api.utils.FixityResult;
import org.fcrepo.kernel.modeshape.rdf.impl.FixityRdfContext;
import org.fcrepo.kernel.modeshape.utils.impl.CacheEntryFactory;
import org.fcrepo.metrics.RegistryService;
import org.modeshape.jcr.api.Binary;
import org.modeshape.jcr.api.ValueFactory;
import org.slf4j.Logger;
import javax.jcr.Node;
import javax.jcr.PathNotFoundException;
import javax.jcr.Property;
import javax.jcr.RepositoryException;
import javax.jcr.Value;
import java.io.InputStream;
import java.net.URI;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import static com.codahale.metrics.MetricRegistry.name;
import static org.apache.jena.datatypes.xsd.XSDDatatype.XSDstring;
import static org.fcrepo.kernel.api.utils.ContentDigest.DIGEST_ALGORITHM.SHA1;
import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.FIELD_DELIMITER;
import static org.fcrepo.kernel.modeshape.services.functions.JcrPropertyFunctions.property2values;
import static org.fcrepo.kernel.modeshape.utils.FedoraTypesUtils.isFedoraBinary;
import static org.modeshape.jcr.api.JcrConstants.JCR_CONTENT;
import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
import static org.slf4j.LoggerFactory.getLogger;
/**
* @author cabeer
* @since 9/19/14
*/
public class FedoraBinaryImpl extends FedoraResourceImpl implements FedoraBinary {
private static final Logger LOGGER = getLogger(FedoraBinaryImpl.class);
static final RegistryService registryService = RegistryService.getInstance();
static final Counter fixityCheckCounter
= registryService.getMetrics().counter(name(FedoraBinary.class, "fixity-check-counter"));
static final Timer timer = registryService.getMetrics().timer(
name(NonRdfSourceDescription.class, "fixity-check-time"));
static final Histogram contentSizeHistogram =
registryService.getMetrics().histogram(name(FedoraBinary.class, "content-size"));
/**
* Wrap an existing Node as a Fedora Binary
* @param node the node
*/
public FedoraBinaryImpl(final Node node) {
super(node);
if (node.isNew()) {
initializeNewBinaryProperties();
}
}
private void initializeNewBinaryProperties() {
try {
decorateContentNode(node, new HashSet<>());
} catch (final RepositoryException e) {
LOGGER.warn("Count not decorate {} with FedoraBinary properties: {}", node, e);
}
}
@Override
public FedoraResource getDescription() {
try {
return new NonRdfSourceDescriptionImpl(getNode().getParent());
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
/*
* (non-Javadoc)
* @see org.fcrepo.kernel.api.models.FedoraBinary#getContent()
*/
@Override
public InputStream getContent() {
try {
return getBinaryContent().getStream();
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
/**
* Retrieve the JCR Binary object
* @return a JCR-wrapped Binary object
*/
private javax.jcr.Binary getBinaryContent() {
try {
return getProperty(JCR_DATA).getBinary();
} catch (final PathNotFoundException e) {
throw new PathNotFoundRuntimeException(e);
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
/*
* (non-Javadoc)
* @see org.fcrepo.kernel.api.models.FedoraBinary#setContent(java.io.InputStream,
* java.lang.String, java.net.URI, java.lang.String,
* org.fcrepo.kernel.api.services.policy.StoragePolicyDecisionPoint)
*/
@Override
public void setContent(final InputStream content, final String contentType,
final Collection<URI> checksums, final String originalFileName,
final StoragePolicyDecisionPoint storagePolicyDecisionPoint)
throws InvalidChecksumException {
try {
final Node contentNode = getNode();
if (contentNode.canAddMixin(FEDORA_BINARY)) {
contentNode.addMixin(FEDORA_BINARY);
}
if (contentType != null) {
contentNode.setProperty(HAS_MIME_TYPE, contentType);
}
if (originalFileName != null) {
contentNode.setProperty(FILENAME, originalFileName);
}
LOGGER.debug("Created content node at path: {}", contentNode.getPath());
String hint = null;
if (storagePolicyDecisionPoint != null) {
hint = storagePolicyDecisionPoint.evaluatePolicies(this);
}
final ValueFactory modevf =
(ValueFactory) node.getSession().getValueFactory();
final Binary binary = modevf.createBinary(content, hint);
/*
* This next line of code deserves explanation. If we chose for the
* simpler line: Property dataProperty =
* contentNode.setProperty(JCR_DATA, requestBodyStream); then the JCR
* would not block on the stream's completion, and we would return to
* the requester before the mutation to the repo had actually completed.
* So instead we use createBinary(requestBodyStream), because its
* contract specifies: "The passed InputStream is closed before this
* method returns either normally or because of an exception." which
* lets us block and not return until the job is done! The simpler code
* may still be useful to us for an asynchronous method that we develop
* later.
*/
final Property dataProperty = contentNode.setProperty(JCR_DATA, binary);
// Ensure provided checksums are valid
final Collection<URI> nonNullChecksums = (null == checksums) ? new HashSet<>() : checksums;
verifyChecksums(nonNullChecksums, dataProperty);
decorateContentNode(contentNode, nonNullChecksums);
touch();
((FedoraResourceImpl) getDescription()).touch();
LOGGER.debug("Created data property at path: {}", dataProperty.getPath());
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
/**
* This method ensures that the arg checksums are valid against the binary associated with the arg dataProperty.
* If one or more of the checksums are invalid, an InvalidChecksumException is thrown.
*
* @param checksums that the user provided
* @param dataProperty containing the binary against which the checksums will be verified
* @throws InvalidChecksumException
* @throws RepositoryException
*/
private void verifyChecksums(final Collection<URI> checksums, final Property dataProperty)
throws InvalidChecksumException, RepositoryException {
final Map<URI, URI> checksumErrors = new HashMap<>();
// Loop through provided checksums validating against computed values
checksums.forEach(checksum -> {
final String algorithm = ContentDigest.getAlgorithm(checksum);
try {
// The case internally supported by ModeShape
if (algorithm.equals(SHA1.algorithm)) {
final String dsSHA1 = ((Binary) dataProperty.getBinary()).getHexHash();
final URI dsSHA1Uri = ContentDigest.asURI(SHA1.algorithm, dsSHA1);
if (!dsSHA1Uri.equals(checksum)) {
LOGGER.debug("Failed checksum test");
checksumErrors.put(checksum, dsSHA1Uri);
}
// The case that requires re-computing the checksum
} else {
final CacheEntry cacheEntry = CacheEntryFactory.forProperty(dataProperty);
cacheEntry.checkFixity(algorithm).stream().findFirst().ifPresent(
fixityResult -> {
if (!fixityResult.matches(checksum)) {
LOGGER.debug("Failed checksum test");
checksumErrors.put(checksum, fixityResult.getComputedChecksum());
}
}
);
}
} catch (RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
});
// Throw an exception if any checksum errors occurred
if (!checksumErrors.isEmpty()) {
final String template = "Checksum Mismatch of %1$s and %2$s\n";
final StringBuilder error = new StringBuilder();
checksumErrors.forEach((key, value) -> error.append(String.format(template, key, value)));
throw new InvalidChecksumException(error.toString());
}
}
/*
* (non-Javadoc)
* @see org.fcrepo.kernel.api.models.FedoraBinary#getContentSize()
*/
@Override
public long getContentSize() {
try {
if (hasProperty(CONTENT_SIZE)) {
return getProperty(CONTENT_SIZE).getLong();
}
} catch (final RepositoryException e) {
LOGGER.info("Could not get contentSize(): {}", e.getMessage());
}
return -1L;
}
/*
* (non-Javadoc)
* @see org.fcrepo.kernel.api.models.FedoraBinary#getContentDigest()
*/
@Override
public URI getContentDigest() {
try {
// Determine which digest algorithm to use
final String algorithm = hasProperty(DEFAULT_DIGEST_ALGORITHM) ?
property2values.apply(getProperty(DEFAULT_DIGEST_ALGORITHM)).findFirst().get().getString() :
ContentDigest.DEFAULT_ALGORITHM;
final String algorithmWithoutStringType = algorithm.replace(FIELD_DELIMITER + XSDstring.getURI(), "");
if (hasProperty(CONTENT_DIGEST)) {
// Select the stored digest that matches the digest algorithm
Optional<Value> digestValue = property2values.apply(getProperty(CONTENT_DIGEST)).filter(digest -> {
try {
final URI digestUri = URI.create(digest.getString());
return algorithmWithoutStringType.equalsIgnoreCase(ContentDigest.getAlgorithm(digestUri));
} catch (RepositoryException e) {
LOGGER.warn("Exception thrown when getting digest property {}, {}", digest, e.getMessage());
return false;
}
}).findFirst();
// Success, return the digest value
if (digestValue.isPresent()) {
return URI.create(digestValue.get().getString());
}
}
LOGGER.warn("No digest value was found to match the algorithm: {}", algorithmWithoutStringType);
} catch (final RepositoryException e) {
LOGGER.warn("Could not get content digest: {}", e.getMessage());
}
return ContentDigest.missingChecksum();
}
/*
* (non-Javadoc)
* @see org.fcrepo.kernel.api.models.FedoraBinary#getMimeType()
*/
@Override
public String getMimeType() {
try {
if (hasProperty(HAS_MIME_TYPE)) {
return getProperty(HAS_MIME_TYPE).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
}
return "application/octet-stream";
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
/*
* (non-Javadoc)
* @see org.fcrepo.kernel.api.models.FedoraBinary#getFilename()
*/
@Override
public String getFilename() {
try {
if (hasProperty(FILENAME)) {
return getProperty(FILENAME).getString().replace(FIELD_DELIMITER + XSDstring.getURI(), "");
}
return node.getParent().getName();
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
@Override
public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator) {
return getFixity(idTranslator, getContentDigest(), getContentSize());
}
@Override
public RdfStream getFixity(final IdentifierConverter<Resource, FedoraResource> idTranslator,
final URI digestUri,
final long size) {
fixityCheckCounter.inc();
try (final Timer.Context context = timer.time()) {
LOGGER.debug("Checking resource: " + getPath());
final String algorithm = ContentDigest.getAlgorithm(digestUri);
final long contentSize = size < 0 ? getBinaryContent().getSize() : size;
final Collection<FixityResult> fixityResults
= CacheEntryFactory.forProperty(getProperty(JCR_DATA)).checkFixity(algorithm);
return new FixityRdfContext(this, idTranslator, fixityResults, digestUri, contentSize);
} catch (final RepositoryException e) {
throw new RepositoryRuntimeException(e);
}
}
/**
* When deleting the binary, we also need to clean up the description document.
*/
@Override
public void delete() {
final FedoraResource description = getDescription();
super.delete();
description.delete();
}
@Override
public FedoraResource getBaseVersion() {
return getDescription().getBaseVersion();
}
private static void decorateContentNode(final Node contentNode, final Collection<URI> checksums)
throws RepositoryException {
if (contentNode == null) {
LOGGER.warn("{} node appears to be null!", JCR_CONTENT);
return;
}
if (contentNode.canAddMixin(FEDORA_BINARY)) {
contentNode.addMixin(FEDORA_BINARY);
}
if (contentNode.hasProperty(JCR_DATA)) {
final Property dataProperty = contentNode.getProperty(JCR_DATA);
final Binary binary = (Binary) dataProperty.getBinary();
final String dsChecksum = binary.getHexHash();
contentSizeHistogram.update(dataProperty.getLength());
checksums.add(ContentDigest.asURI(SHA1.algorithm, dsChecksum));
final String[] checksumArray = new String[checksums.size()];
checksums.stream().map(Object::toString).collect(Collectors.toSet()).toArray(checksumArray);
contentNode.setProperty(CONTENT_DIGEST, checksumArray);
contentNode.setProperty(CONTENT_SIZE, dataProperty.getLength());
LOGGER.debug("Decorated data property at path: {}", dataProperty.getPath());
}
}
@Override
public boolean isVersioned() {
return getDescription().isVersioned();
}
@Override
public void enableVersioning() {
super.enableVersioning();
getDescription().enableVersioning();
}
@Override
public void disableVersioning() {
super.disableVersioning();
getDescription().disableVersioning();
}
/**
* Check if the given node is a Fedora binary
* @param node the given node
* @return whether the given node is a Fedora binary
*/
public static boolean hasMixin(final Node node) {
return isFedoraBinary.test(node);
}
}