package dk.kb.yggdrasil.preservation;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;
import java.util.List;
import java.util.UUID;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.TransformerException;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import dk.kb.yggdrasil.HttpPayload;
import dk.kb.yggdrasil.config.Models;
import dk.kb.yggdrasil.config.RequestHandlerContext;
import dk.kb.yggdrasil.db.PreservationRequestState;
import dk.kb.yggdrasil.exceptions.ArgumentCheck;
import dk.kb.yggdrasil.exceptions.PreservationException;
import dk.kb.yggdrasil.exceptions.YggdrasilException;
import dk.kb.yggdrasil.json.JSONMessaging;
import dk.kb.yggdrasil.json.preservation.PreservationRequest;
import dk.kb.yggdrasil.messaging.MessageRequestHandler;
import dk.kb.yggdrasil.xslt.XmlErrorHandler;
import dk.kb.yggdrasil.xslt.XmlValidationResult;
import dk.kb.yggdrasil.xslt.XmlValidator;
import dk.kb.yggdrasil.xslt.XslErrorListener;
import dk.kb.yggdrasil.xslt.XslTransformer;
import dk.kb.yggdrasil.xslt.XslUriResolver;
/**
* The handler class for preservation requests.
*/
public class PreservationRequestHandler extends MessageRequestHandler<PreservationRequest> {
/** Logging mechanism. */
private Logger logger = LoggerFactory.getLogger(this.getClass().getName());
/** The class reading the mapping between models and xslt scripts used for the metadata transformation. */
private final Models metadataModel;
/** Context for this preservation. */
private final RequestHandlerContext context;
/** Preservation packaging manager. */
private final PreservationPackagingManager preservationManager;
/**
* Constructor.
* @param context The context for the preservation.
* @param models The metadatamodel mapper.
*/
public PreservationRequestHandler(RequestHandlerContext context, Models models) {
ArgumentCheck.checkNotNull(context, "PreservationContext context");
ArgumentCheck.checkNotNull(models, "Models models");
this.metadataModel = models;
this.context = context;
this.preservationManager = new PreservationPackagingManager(context);
}
@Override
public PreservationRequest extractRequest(byte[] b) throws YggdrasilException {
return JSONMessaging.getRequest(new PushbackInputStream(new ByteArrayInputStream(b), PUSHBACKBUFFERSIZE),
PreservationRequest.class);
}
/**
* Handles the PreservationRequest.
* @param request The preservation request to handle.
* @throws YggdrasilException if anything goes wrong.
*/
public void handleRequest(PreservationRequest request) throws YggdrasilException {
logger.info("Preservation request received.");
if (!request.isMessageValid()) {
logger.error("Skipping invalid message");
return;
}
PreservationRequestState prs = new PreservationRequestState(request,
PreservationState.PRESERVATION_REQUEST_RECEIVED, request.UUID);
try {
if (validateMessage(prs)) {
performPreservation(prs);
}
} catch (PreservationException e) {
// Fault barrier to ensure, that failures will send update and remove stuff.
logger.warn("Preservation message handling fault barrier caught exception.", e);
context.getRemotePreservationStateUpdater().sendPreservationResponseWithSpecificDetails(prs,
e.getState(), e.getMessage());
context.getStateDatabase().delete(prs.getUUID());
throw new YggdrasilException(e.getMessage(), e);
}
logger.info("Finished processing the preservation request");
}
/**
* Validates whether the content of the request is valid.
* @param prs The preservation request state.
* @return Whether or not is is valid.
* @throws YggdrasilException
*/
private boolean validateMessage(PreservationRequestState prs) throws YggdrasilException {
// Add check about whether the profile is a known collectionID or not known
String preservationProfile = prs.getRequest().Preservation_profile;
List<String> possibleCollections = context.getBitrepository().getKnownCollections();
if (!possibleCollections.contains(preservationProfile)) {
String errMsg = "The given preservation profile '" + preservationProfile
+ "' does not match a known collection ID. Expected one of: " + possibleCollections;
logger.error(errMsg);
context.getRemotePreservationStateUpdater().sendPreservationResponseWithSpecificDetails(prs,
PreservationState.PRESERVATION_REQUEST_FAILED, errMsg);
return false;
}
context.getRemotePreservationStateUpdater().sendPreservationResponse(prs,
PreservationState.PRESERVATION_REQUEST_RECEIVED);
context.getStateDatabase().putPreservationRecord(prs.getUUID(), prs);
return true;
}
/**
* Performs the preservation, thus transforming the metadata, retrieving the file, packaging all of it in a
* Warc file, and sending the WARC file to the Bitrepository.
* @param prs The preservation request to be preserved.
* @throws PreservationException
* @throws YggdrasilException
*/
private void performPreservation(PreservationRequestState prs) throws PreservationException, YggdrasilException {
if (prs.getRequest().Content_URI != null) {
logger.info("Fetching content for preseravtion request with UUID '" + prs.getUUID() + "'");
try {
fetchContent(prs);
} catch (IOException e) {
String reason = "An issue occured when fetching the content for preservation request '"
+ prs.getUUID() + "'";
throw new PreservationException(PreservationState.PRESERVATION_REQUEST_FAILED, reason, e);
}
}
transformMetadata(prs);
context.getStateDatabase().putPreservationRecord(prs.getUUID(), prs);
preservationManager.addToWarcFile(prs.getRequest().Preservation_profile, prs);
logger.info("Finished handling the preservation request with UUID '" + prs.getUUID() + "' successfully");
}
/**
* Transform the metadata included with the request to the proper METS preservation format.
* @param prs The current request
* @param currentUUID The UUID of the current request
* @throws YggdrasilException Failure to transform the metadata.
*/
private void transformMetadata(PreservationRequestState prs) throws PreservationException, YggdrasilException {
String theMetadata = prs.getRequest().metadata;
String modelToUse = prs.getRequest().Model.toLowerCase();
if (!metadataModel.getMapper().containsKey(modelToUse)) {
final String errMsg = "The given metadata-model'" + modelToUse
+ "' is unknown. Expected one of: " + metadataModel.getMapper().keySet();
throw new PreservationException(PreservationState.PRESERVATION_REQUEST_FAILED, errMsg);
}
File xsltDir = new File(context.getConfig().getConfigDir(), "xslt");
if (!xsltDir.isDirectory()) {
final String errMsg = "The xslt directory '" + xsltDir.getAbsolutePath()
+ "' does not exist!";
throw new PreservationException(PreservationState.PRESERVATION_REQUEST_FAILED, errMsg);
}
File xslFile = new File(xsltDir, metadataModel.getMapper().get(modelToUse));
if (!xslFile.isFile()) {
final String errMsg = "The needed xslt-script '" + xslFile.getAbsolutePath()
+ "' does not exist!";
throw new PreservationException(PreservationState.PRESERVATION_REQUEST_FAILED, errMsg);
}
try {
InputStream metadataInputStream = null;
FileInputStream xmlFileStream = null;
File outputFile = null;
try {
XslTransformer xsltransform = XslTransformer.getTransformer(xslFile);
XslUriResolver uriResolver = new XslUriResolver();
XslErrorListener errorListener = new XslErrorListener();
metadataInputStream = new ByteArrayInputStream(theMetadata.getBytes(Charset.defaultCharset()));
Source xmlSource = new StreamSource(metadataInputStream);
outputFile = new File(context.getConfig().getTemporaryDir(), UUID.randomUUID().toString());
Result outputTarget = new StreamResult(outputFile);
xsltransform.transform(xmlSource, uriResolver, errorListener, outputTarget);
EntityResolver entityResolver = null;
xmlFileStream = new FileInputStream(outputFile);
XmlErrorHandler errorHandler = new XmlErrorHandler();
XmlValidationResult result = new XmlValidationResult();
boolean bValid = new XmlValidator().testDefinedValidity(xmlFileStream, entityResolver,
errorHandler, result);
if (!bValid) {
StringBuffer errMsg = new StringBuffer();
errMsg.append("The output metadata is invalid: ");
try {
errMsg.append(FileUtils.readFileToString(outputFile));
} catch (IOException e) {
logger.warn("Exception while reading output file:", e);
}
// Add errors/warnings to errmsg, so Valhal gets to see them.
if (errorHandler.hasErrors()) {
if (!errorHandler.errors.isEmpty()) {
errMsg.append("Errors: \n");
for (String error: errorHandler.errors) {
errMsg.append(error + "\n");
}
errMsg.append("\n");
}
if (!errorHandler.fatalErrors.isEmpty()) {
errMsg.append("Fatal errors: \n");
for (String fatalerror: errorHandler.fatalErrors) {
errMsg.append(fatalerror + "\n");
}
}
if (!errorHandler.warnings.isEmpty()) {
errMsg.append("Warnings: \n");
for (String warning: errorHandler.warnings) {
errMsg.append(warning + "\n");
}
}
}
throw new PreservationException(PreservationState.PRESERVATION_METADATA_PACKAGED_FAILURE,
errMsg.toString());
} else {
prs.setMetadataPayload(outputFile);
context.getRemotePreservationStateUpdater().sendPreservationResponse(prs,
PreservationState.PRESERVATION_METADATA_PACKAGED_SUCCESSFULLY);
}
} finally {
if(xmlFileStream != null) {
xmlFileStream.close();
}
}
} catch (TransformerException e) {
final String errMsg = "Error occurred during transformation of metadata for uuid '"
+ prs.getUUID() + "'";
throw new PreservationException(PreservationState.PRESERVATION_METADATA_PACKAGED_FAILURE, errMsg, e);
} catch (IOException e) {
final String errMsg = "Error occurred during transformation of metadata for uuid '"
+ prs.getUUID() + "'";
throw new PreservationException(PreservationState.PRESERVATION_METADATA_PACKAGED_FAILURE, errMsg, e);
}
}
/**
* Try and download the content using the Content_URI.
* @param prs The current request
* @throws YggdrasilException
*/
private void fetchContent(PreservationRequestState prs) throws PreservationException,
YggdrasilException, IOException {
// Try to download resource from Content_URI
File tmpFile = null;
PreservationRequest pr = prs.getRequest();
logger.info("Attempting to download resource from '"
+ pr.Content_URI + "'");
HttpPayload payload = context.getHttpCommunication().get(pr.Content_URI);
if (payload != null) {
tmpFile = payload.writeToFile();
prs.setContentPayload(tmpFile);
context.getRemotePreservationStateUpdater().sendPreservationResponse(prs,
PreservationState.PRESERVATION_RESOURCES_DOWNLOAD_SUCCESS);
context.getStateDatabase().putPreservationRecord(prs.getUUID(), prs);
} else {
throw new PreservationException(PreservationState.PRESERVATION_RESOURCES_DOWNLOAD_FAILURE,
"Failed to download resource.");
}
}
}