package org.archive.wayback.resourceindex;
import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE;
import static java.nio.file.StandardWatchEventKinds.ENTRY_DELETE;
import static com.sun.nio.file.SensitivityWatchEventModifier.HIGH;
import static java.nio.file.FileVisitResult.CONTINUE;
import static java.nio.file.FileVisitResult.SKIP_SUBTREE;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitOption;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.WatchEvent;
import java.nio.file.WatchKey;
import java.nio.file.WatchService;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.wayback.resourceindex.cdx.CDXIndex;
/**
* SearchResultSource that watches a single directory for new
* SearchResultSources.
*
* <pre>
* {@code
* <property name="source">
* <bean class="org.archive.wayback.resourceindex.WatchedCDXSource">
* <property name="recursive" value="false" />
* <property name="filters">
* <list>
* <value>^.+\.cdx$</value>
* </list>
* </property>
* <property name="path" value="/wayback/cdx-index/" />
* </bean>
* </property>
* }
* </pre>
*
* @author rcoram
*
*/
public class WatchedCDXSource extends CompositeSearchResultSource {
private static final Logger LOGGER = Logger
.getLogger(WatchedCDXSource.class.getName());
private Thread watcherThread;
private Path path;
private boolean recursive = false;
private List<String> filters;
private ArrayList<Pattern> includePatterns = new ArrayList<Pattern>();
private final Set<FileVisitOption> visitOptions = EnumSet
.noneOf(FileVisitOption.class);
public WatchedCDXSource() {
visitOptions.add(FileVisitOption.FOLLOW_LINKS);
}
{
setFilters(Arrays.asList("^.+\\.cdx$"));
}
public void setFilters(List<String> filters) {
for (String filter : filters) {
includePatterns.add(Pattern.compile(filter));
}
this.filters = filters;
}
public List<String> getFilters() {
return this.filters;
}
public void setRecursive(boolean recursive) {
this.recursive = recursive;
}
public boolean getRecursive() {
return this.recursive;
}
public void setPath(String path) {
this.path = Paths.get(path);
if (watcherThread == null) {
try {
watcherThread = new WatcherThread(this.path, this.recursive);
} catch (IOException e) {
LOGGER.log(Level.SEVERE,
"Could not watch CDX directory: " + e.getMessage(), e);
}
watcherThread.start();
}
}
public String getPath() {
return this.path.toString();
}
/**
* removes a SearchResultSource upon from the list of sources.
*
* @param deleted
* @return
*/
public boolean removeSource(CDXIndex deleted) {
return sources.remove(deleted);
}
/**
* Monitors a directory for ENTRY_CREATE/ENTRY_DELETE events, creating
* SearchResultSources.
*
* @author rcoram
*
*/
private class WatcherThread extends Thread {
private final WatchService watcher;
private final HashMap<WatchKey, Path> keys = new HashMap<WatchKey, Path>();
private final int depth;
private final FileVisitor<Path> visitor = new CDXFileVisitor();
public WatcherThread(Path path, boolean recursive) throws IOException {
if (recursive) {
LOGGER.finest("Watching recursively.");
this.depth = Integer.MAX_VALUE;
} else {
this.depth = 1;
}
this.watcher = FileSystems.getDefault().newWatchService();
Files.walkFileTree(path, visitOptions, depth, visitor);
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public void run() {
while (true) {
WatchKey key;
try {
key = watcher.take();
} catch (InterruptedException x) {
return;
}
for (WatchEvent<?> event : key.pollEvents()) {
WatchEvent.Kind kind = event.kind();
Path dir = keys.get(key);
if (kind == ENTRY_CREATE) {
WatchEvent<Path> ev = (WatchEvent<Path>) event;
Path path = dir.resolve(ev.context());
try {
Files.walkFileTree(path, visitOptions, depth,
visitor);
} catch (IOException e) {
LOGGER.log(Level.WARNING, "Problem walking: "
+ path.toString(), e);
}
}
if (kind == ENTRY_DELETE) {
WatchEvent<Path> ev = (WatchEvent<Path>) event;
Path path = dir.resolve(ev.context());
CDXIndex index = new CDXIndex();
index.setPath(path.toString());
if (!removeSource(index)) {
LOGGER.info("CDX " + path
+ " not found in list of sources.");
} else {
LOGGER.info("Removed " + path);
}
}
}
// "If the key is no longer valid, the directory is inaccessible
// so exit the loop."
boolean valid = key.reset();
if (!valid) {
break;
}
}
}
/**
* handles traversal of CDX (sub)directories.
*
* @author rcoram
*
*/
public class CDXFileVisitor extends SimpleFileVisitor<Path> {
@Override
public FileVisitResult visitFile(Path path,
BasicFileAttributes attrs) {
if (attrs.isRegularFile()) {
String spath = path.toString();
Matcher matcher;
for (Pattern pattern : includePatterns) {
matcher = pattern.matcher(spath);
if (matcher.matches()) {
CDXIndex index = new CDXIndex();
index.setPath(spath);
if (!sources.contains(index)) {
LOGGER.info("Adding CDX: " + index.getPath());
addSource(index);
}
}
break;
}
}
return CONTINUE;
}
@SuppressWarnings("restriction")
@Override
public FileVisitResult preVisitDirectory(Path dir,
BasicFileAttributes attrs) throws IOException {
if (keys.keySet().size() < depth) {
WatchKey key = dir.register(watcher, new WatchEvent.Kind[]{ENTRY_CREATE, ENTRY_DELETE}, HIGH);
LOGGER.info("Watching: " + dir.toString());
keys.put(key, dir);
return CONTINUE;
} else {
return SKIP_SUBTREE;
}
}
}
}
}