package org.archive.wayback.webapp; import java.io.IOException; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.archive.util.iterator.CloseableIterator; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filters.FileRegexFilter; import org.archive.wayback.util.ObjectFilter; import org.archive.wayback.util.flatfile.FlatFile; public class FileRegexParamFilterAndFactory extends FileRegexFilter implements CustomResultFilterFactory { private static final Logger LOGGER = Logger.getLogger(FileRegexParamFilterAndFactory.class.getName()); protected String paramFile; protected int paramIndex = 1; protected boolean isExclusion = true; protected char delim = '\t'; protected String prefixMatch; protected Set<String> paramSet = null; // This method should be set as the init-method in the spring config // init-method="loadParamFile" when using this Filter public void loadParamFile() { FlatFile ff = new FlatFile(paramFile); CloseableIterator<String> itr = null; try { itr = ff.getSequentialIterator(); } catch (IOException io) { LOGGER.warning(io.toString()); } paramSet = new HashSet<String>(); while (itr.hasNext()) { String param = itr.next(); param = param.trim(); if (param.isEmpty() || param.startsWith("#")) { continue; } // Use only the first word, ignore the rest int wordEnd = param.indexOf(delim); if (wordEnd > 0) { param = param.substring(0, wordEnd); } paramSet.add(param); } } // Filter and Factory are the same object to avoid creating a new object that is unmodified during // the filtering process @Override public ObjectFilter<CaptureSearchResult> get(AccessPoint ap) { return this; } @Override public int filterObject(CaptureSearchResult o) { final String file = o.getFile(); boolean matched = false; if (prefixMatch != null) { if (!file.startsWith(prefixMatch)) { return (isExclusion ? FILTER_INCLUDE : FILTER_EXCLUDE); } } for (Pattern pattern : patterns) { Matcher matcher = pattern.matcher(file); if (matcher.find()) { String param = matcher.group(paramIndex); if (paramSet.contains(param)) { if (LOGGER.isLoggable(Level.FINE)) { LOGGER.fine("Excluding (w)arc: " + file); } matched = true; break; } } } if (isExclusion) { return (matched ? FILTER_EXCLUDE : FILTER_INCLUDE); } else { return (matched ? FILTER_INCLUDE : FILTER_EXCLUDE); } } //Getters/Setters public String getParamFile() { return paramFile; } public void setParamFile(String paramFile) { this.paramFile = paramFile; } public int getParamIndex() { return paramIndex; } public void setParamIndex(int paramIndex) { this.paramIndex = paramIndex; } public boolean isExclusion() { return isExclusion; } public void setExclusion(boolean isExclusion) { this.isExclusion = isExclusion; } public char getDelim() { return delim; } public void setDelim(char delim) { this.delim = delim; } public String getPrefixMatch() { return prefixMatch; } public void setPrefixMatch(String prefixMatch) { this.prefixMatch = prefixMatch; } }