/**
* Copyright 2008 The University of North Carolina at Chapel Hill
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.unc.lib.dl.admin.collect;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.StandardWatchEventKinds;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipFile;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.map.JsonMappingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.type.CollectionType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import edu.unc.lib.dl.fedora.PID;
import edu.unc.lib.dl.util.PackagingType;
import edu.unc.lib.dl.util.TripleStoreQueryService;
import gov.loc.repository.bagit.Bag;
import gov.loc.repository.bagit.BagFactory;
import gov.loc.repository.bagit.BagFile;
import gov.loc.repository.bagit.BagHelper;
/**
* Loads and manages ingest sources, which are preconfigured locations to find packages for deposit.
*
* @author bbpennel
* @date Oct 22, 2015
*/
public class IngestSourceManager {
private static final Logger log = LoggerFactory.getLogger(IngestSourceManager.class);
private List<IngestSourceConfiguration> configs;
private TripleStoreQueryService tripleService;
private String configPath;
public void init() throws JsonParseException, JsonMappingException, IOException {
final ObjectMapper mapper = new ObjectMapper();
final CollectionType type = mapper.getTypeFactory()
.constructCollectionType(List.class, IngestSourceConfiguration.class);
final File configFile = new File(configPath);
final Path path = configFile.toPath();
configs = mapper.readValue(configFile, type);
// Start separate thread for reloading configuration when it changes
Thread watchThread = new Thread(new Runnable() {
@Override
public void run() {
// Monitor config file for changes to allow for reloading without restarts
try (final WatchService watchService = FileSystems.getDefault().newWatchService()) {
// Register watcher on parent directory of config to detect file modifications
path.getParent().register(watchService, StandardWatchEventKinds.ENTRY_MODIFY);
while (true) {
final WatchKey wk = watchService.take();
for (WatchEvent<?> event : wk.pollEvents()) {
final Path changed = (Path) event.context();
if (changed.toString().equals(configFile.getName())) {
log.warn("Ingest source configuration has changed, reloading: {}", configFile.getAbsolutePath());
// Config file changed, reload the mappings
synchronized (configs) {
configs = mapper.readValue(configFile, type);
}
}
}
// reset the key so that we can continue monitor for future events
boolean valid = wk.reset();
if (!valid) {
break;
}
}
} catch (InterruptedException e) {
log.info("Interrupted watcher for updates to ingest source configuration");
} catch (IOException e) {
log.error("Failed to establish watcher for ingest source configuration");
}
}
});
watchThread.start();
}
/**
* Retrieves a list of ingest sources which contain or match the destination object provided.
*
* @param destination
* @return
*/
public List<IngestSourceConfiguration> listSources(PID destination) {
List<PID> ancestors = tripleService.lookupAllContainersAbove(destination);
// Determine which sources apply to the selected destination
List<IngestSourceConfiguration> applicableSources = new ArrayList<>();
for (IngestSourceConfiguration source : configs) {
for (String container : source.getContainers()) {
PID containerPID = new PID(container);
if (containerPID.equals(destination) || ancestors.contains(containerPID)) {
applicableSources.add(source);
continue;
}
}
}
return applicableSources;
}
/**
* Retrieves a list of candidate file information for ingestable packages from sources which are
* applicable to the destination provided.
*
* @param destination
* @return
*/
public List<Map<String, Object>> listCandidates(PID destination) {
List<IngestSourceConfiguration> applicableSources = listSources(destination);
final List<Map<String, Object>> candidates = new ArrayList<>();
for (final IngestSourceConfiguration source : applicableSources) {
final String base = source.getBase();
// Gathering candidates per pattern within a particular base directory
for (String pattern : source.getPatterns()) {
final PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + base + pattern);
try {
Files.walkFileTree(Paths.get(base), new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
if (matcher.matches(dir)) {
log.debug("Matched dir {} for source {}", dir, source.getId());
addCandidate(candidates, dir, source, base);
return FileVisitResult.SKIP_SUBTREE;
}
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (matcher.matches(file)) {
log.debug("Matched file {} for source {}", file, source.getId());
addCandidate(candidates, file, source, base);
}
return FileVisitResult.CONTINUE;
}
});
} catch (IOException e) {
log.error("Failed to gather candidate files for source {}", source.getId(), e);
}
}
}
return candidates;
}
/**
* Adds information about applicable packages to the list of candidates
*
* @param candidates
* @param filePath
* @param source
* @param base
* @throws IOException
*/
private void addCandidate(List<Map<String, Object>> candidates, Path filePath,
IngestSourceConfiguration source, String base) throws IOException {
File file = filePath.toFile();
if (!file.isDirectory()) {
return;
}
// Only directory bags are candidates currently
String version = BagHelper.getVersion(file);
Map<String, Object> candidate = new HashMap<>();
candidate.put("sourceId", source.getId());
candidate.put("base", base);
candidate.put("patternMatched", Paths.get(base).relativize(filePath).toString());
candidate.put("version", version);
if (version != null) {
// Add payload stats for bags
addBagInfo(candidate, filePath);
} else if (file.isDirectory()) {
candidate.put("packagingType", PackagingType.DIRECTORY.getUri());
} else {
// Add stats for a non-bag zip file
if (file.getName().endsWith(".zip")) {
try (ZipFile zip = new ZipFile(file)) {
candidate.put("files", zip.size());
}
}
candidate.put("size", file.length());
}
candidates.add(candidate);
}
private void addBagInfo(Map<String, Object> fileInfo, Path filePath) {
BagFactory bagFactory = new BagFactory();
Bag bagFile = bagFactory.createBag(filePath.toFile());
fileInfo.put("files", bagFile.getPayload().size());
long size = 0;
Iterator<BagFile> bagIt = bagFile.getPayload().iterator();
while (bagIt.hasNext()) {
size += bagIt.next().getSize();
}
fileInfo.put("size", size);
fileInfo.put("packagingType", PackagingType.BAGIT.getUri());
}
/**
* Returns true if the given path is from valid for the given source and present.
*
* @param pathString
* @param sourceId
* @return
*/
public boolean isPathValid(String pathString, String sourceId) {
IngestSourceConfiguration source = getSourceConfiguration(sourceId);
if (source == null) {
return false;
}
Path path = Paths.get(source.getBase(), pathString);
if (!isPathValidForSource(path, source)) {
return false;
}
return path.toFile().exists();
}
/**
* Returns true if the given path matches any of the patterns specified for the given source
*
* @param path
* @param source
* @return
*/
private boolean isPathValidForSource(Path path, IngestSourceConfiguration source) {
for (String pattern : source.getPatterns()) {
PathMatcher matcher = FileSystems.getDefault().getPathMatcher("glob:" + source.getBase() + pattern);
if (matcher.matches(path)) {
return true;
}
}
return false;
}
public IngestSourceConfiguration getSourceConfiguration(String id) {
for (IngestSourceConfiguration source : configs) {
if (source.getId().equals(id)) {
return source;
}
}
return null;
}
public void setConfigs(List<IngestSourceConfiguration> configs) {
this.configs = configs;
}
public void setTripleService(TripleStoreQueryService tripleService) {
this.tripleService = tripleService;
}
public void setConfigPath(String configPath) {
this.configPath = configPath;
}
}