package dk.statsbiblioteket.medieplatform.autonomous;
import com.sun.jersey.api.client.Client;
import com.sun.jersey.api.client.filter.HTTPBasicAuthFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import dk.statsbiblioteket.doms.central.connectors.BackendInvalidCredsException;
import dk.statsbiblioteket.doms.central.connectors.BackendInvalidResourceException;
import dk.statsbiblioteket.doms.central.connectors.BackendMethodFailedException;
import dk.statsbiblioteket.doms.central.connectors.EnhancedFedora;
import dk.statsbiblioteket.doms.central.connectors.EnhancedFedoraImpl;
import dk.statsbiblioteket.doms.central.connectors.fedora.pidGenerator.PIDGeneratorException;
import dk.statsbiblioteket.sbutil.webservices.authentication.Credentials;
import dk.statsbiblioteket.medieplatform.autonomous.iterator.common.TreeIterator;
import dk.statsbiblioteket.medieplatform.autonomous.iterator.fedora3.ConfigurableFilter;
import dk.statsbiblioteket.medieplatform.autonomous.iterator.fedora3.IteratorForFedora3;
import dk.statsbiblioteket.medieplatform.autonomous.iterator.filesystem.transforming.TransformingIteratorForFileSystems;
import dk.statsbiblioteket.util.Streams;
import javax.xml.bind.JAXBException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;
/**
* An abstract runnable component that uses SBOI for event triggers and DOMS for event storage,
* and provides access to a tree iterator.
*/
@SuppressWarnings("deprecation")//Credentials
public abstract class TreeProcessorAbstractRunnableComponent extends AbstractRunnableComponent<Batch> {
private static final String BATCH_STRUCTURE = "BATCHSTRUCTURE";
private EnhancedFedora fedora;
private Logger log = LoggerFactory.getLogger(getClass());
protected TreeProcessorAbstractRunnableComponent(Properties properties) {
super(properties);
}
/**
* Create a tree iterator for the given batch. It will use the properties construct to get necessary properties
* 1. useFileSystem: boolean: Determines if the batch should be read from the filesystem (in this case 2-5 are
* used)
* 2. scratch: path: The folder where the batch resides
* 3. groupingChar: Char. The character that separates the prefix and the postfix. Default "."
* 4. dataFilePattern: The regular expression pattern to identify datafiles. Default ".*\\.jp2$"
* 5. checksumPostFix: The postfix to append to filenames to get their checksum files. Default ".md5"
* 6. fedora.admin.username: The username for communicating with fedora
* 7. fedora.admin.password: The password used for communicating with fedora
* 8. fedora.server: The fedora server used
* 9. fedora.iterator.attributenames Datastream names in fedora used for attributes
* 10. fedora.iterator.predicatenames Predicates of relations in fedora used for generating tree
*
* @param batch the batch
*
* @return a tree iterator
*/
protected TreeIterator createIterator(Batch batch) {
String dataFilePattern = getProperties().getProperty(ConfigConstants.ITERATOR_DATAFILEPATTERN, TransformingIteratorForFileSystems.DATA_FILE_PATTERN_JP2_VALUE);
boolean useFileSystem = Boolean.parseBoolean(
getProperties().getProperty(ConfigConstants.ITERATOR_USE_FILESYSTEM, "true"));
if (useFileSystem) {
File scratchDir = new File(getProperties().getProperty(ConfigConstants.ITERATOR_FILESYSTEM_BATCHES_FOLDER));
File batchDir = new File(scratchDir, batch.getFullID());
String groupingChar = Pattern
.quote(getProperties().getProperty(ConfigConstants.ITERATOR_FILESYSTEM_GROUPINGCHAR, "."));
String checksumPostFix = getProperties().getProperty(ConfigConstants.ITERATOR_FILESYSTEM_CHECKSUMPOSTFIX,
TransformingIteratorForFileSystems.CHECKSUM_POSTFIX_DEFAULT_VALUE);
String[] ignoredFiles = getProperties().getProperty(ConfigConstants.ITERATOR_FILESYSTEM_IGNOREDFILES,
TransformingIteratorForFileSystems.IGNORED_FILES_DEFAULT_VALUE)
.split(",");
for (int i = 0; i < ignoredFiles.length; i++) {
ignoredFiles[i] = ignoredFiles[i].trim();
}
return new TransformingIteratorForFileSystems(batchDir, groupingChar, dataFilePattern, checksumPostFix,
Arrays.asList(ignoredFiles));
} else {
Client client = Client.create();
client.addFilter(
new HTTPBasicAuthFilter(
getProperties().getProperty(ConfigConstants.DOMS_USERNAME),
getProperties().getProperty(ConfigConstants.DOMS_PASSWORD)));
String pid;
try {
EnhancedFedora fedora = getEnhancedFedora();
pid = getRoundTripObject(batch, fedora);
} catch (MalformedURLException | PIDGeneratorException | BackendMethodFailedException | JAXBException |
BackendInvalidCredsException e) {
log.error("Unable to initialise iterator", e);
throw new InitialisationException("Unable to initialise iterator", e);
}
return new IteratorForFedora3(
pid, client, getProperties().getProperty(ConfigConstants.DOMS_URL), new ConfigurableFilter(
Arrays.asList(getProperties().getProperty(ConfigConstants.ITERATOR_DOMS_ATTRIBUTENAMES).split(",")), Arrays.asList(
getProperties().getProperty(ConfigConstants.ITERATOR_DOMS_PREDICATENAMES).split(","))), dataFilePattern);
}
}
/**
* Retrieve the batch structure from DOMS or from the file system.
* If the property "batchStructure.useFileSystem" is true (default), retrieve the structure from the
* "batchStructure.storageDir"
* otherwise retrieve it from the datastream named MANIFEST on the round trip object
*
* @param batch the batch in question
*
* @return the input stream to the batch structure
* @throws IOException if an inputstream could not be opened
*/
public InputStream retrieveBatchStructure(Batch batch) throws IOException {
boolean useFileSystem = Boolean.parseBoolean(
getProperties().getProperty(ConfigConstants.ITERATOR_USE_FILESYSTEM, "true"));
if (useFileSystem) {
File batchStructureFile = getBatchStructureFile(batch);
return new FileInputStream(batchStructureFile);
} else {
String pid;
try {
EnhancedFedora fedora = getEnhancedFedora();
pid = getRoundTripObject(batch, fedora);
String batchStructure = fedora.getXMLDatastreamContents(pid, BATCH_STRUCTURE);
return new ByteArrayInputStream(batchStructure.getBytes("UTF-8"));
} catch (BackendInvalidResourceException | MalformedURLException | PIDGeneratorException |
BackendMethodFailedException | JAXBException |
BackendInvalidCredsException e) {
log.error("Unable to retrieve batch structure", e);
throw new InitialisationException("Unable to retrieve batch structure", e);
}
}
}
/**
* Utility method to get the batch structure file.
*
* @param batch the batch in question
*
* @return a file object denoting the path to the structure file (which might not exist)
*/
private File getBatchStructureFile(Batch batch) {
File scratchDir = new File(getProperties().getProperty(ConfigConstants.AUTONOMOUS_BATCH_STRUCTURE_STORAGE_DIR));
return new File(scratchDir, batch.getFullID() + ".batchStructure.xml");
}
/**
* Store the batch structure, either in DOMS or on the filesystem.
* If the property "batchStructure.useFileSystem" is true (default), store the structure in the
* "batchStructure.storageDir"
* otherwise store it in the datastream named MANIFEST on the round trip object
*
* @param batch the batch in question
* @param batchStructure the batch structure as an UTF-8 inputstream
*
* @throws IOException if the storing failed
*/
public void storeBatchStructure(Batch batch, InputStream batchStructure) throws IOException {
boolean useFileSystem = Boolean.parseBoolean(
getProperties().getProperty(ConfigConstants.ITERATOR_USE_FILESYSTEM, "true"));
if (useFileSystem) {
File batchStructureFile = getBatchStructureFile(batch);
FileOutputStream output = new FileOutputStream(batchStructureFile);
Streams.pipe(batchStructure, output);
} else {
String pid;
try {
EnhancedFedora fedora = getEnhancedFedora();
pid = getRoundTripObject(batch, fedora);
fedora.modifyDatastreamByValue(
pid, BATCH_STRUCTURE, null,null,toString(batchStructure).getBytes(),null,"text/xml", "Updating batch structure",null);
} catch (BackendInvalidResourceException | MalformedURLException | PIDGeneratorException |
BackendMethodFailedException | JAXBException |
BackendInvalidCredsException e) {
log.error("Unable to store batch structure", e);
throw new InitialisationException("Unable to retrieve batch structure", e);
}
}
}
/**
* Utility method to get the round trip object for a given batch
*
* @param batch the batch in question
* @param fedora the enhanced fedora interface
*
* @return the found pid or null
* @throws BackendInvalidCredsException if the credentials are insufficient
* @throws BackendMethodFailedException if something failed in the backend
*/
private String getRoundTripObject(Batch batch, EnhancedFedora fedora) throws
BackendInvalidCredsException,
BackendMethodFailedException {
List<String> pids = fedora.findObjectFromDCIdentifier("path:" + batch.getFullID());
if (pids.isEmpty()) {
return null;
} else {
if (pids.size() > 1) {
log.warn("Apparently found more than one round trip for this round trip '{}'", batch.getFullID());
}
return pids.get(0);
}
}
/**
* Utility method to initialise an enhanced fedora object
*
* @return the enhanced fedora object
* @throws MalformedURLException if the URL in "fedora.server" is invalid
* @throws PIDGeneratorException if the pid generator webservice choked again. Should not be possible
* @throws JAXBException if jaxb fails to understand the wsdl
*/
@SuppressWarnings("deprecation")//Credentials
private synchronized EnhancedFedora getEnhancedFedora() throws
MalformedURLException,
PIDGeneratorException,
JAXBException {
int fedoraRetries = Integer.parseInt(getProperties().getProperty(ConfigConstants.FEDORA_RETRIES, "1"));
int fedoraDelayBetweenRetries = Integer.parseInt(getProperties().getProperty(ConfigConstants.FEDORA_DELAY_BETWEEN_RETRIES, "100"));
if (fedora == null) {
fedora = new EnhancedFedoraImpl(
new Credentials(
getProperties().getProperty(ConfigConstants.DOMS_USERNAME),
getProperties().getProperty(ConfigConstants.DOMS_PASSWORD)),
getProperties().getProperty(ConfigConstants.DOMS_URL),
null,
null,
fedoraRetries, fedoraDelayBetweenRetries);
}
return fedora;
}
/**
* Utility method to read an inputstream to a string. Why is this not in SBUtils already?
*
* @param stream the stream to read
*
* @return the stream as a string
* @throws IOException if the stream could not be read
*/
private String toString(InputStream stream) throws IOException {
ByteArrayOutputStream temp = new ByteArrayOutputStream();
Streams.pipe(stream, temp);
return new String(temp.toByteArray(), "UTF-8");
}
}