package org.archive.wayback.accesscontrol.staticmap; import java.io.File; import java.io.IOException; import java.util.TreeSet; import java.util.logging.Logger; import org.archive.util.SURT; import org.archive.util.iterator.CloseableIterator; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.accesscontrol.ExclusionFilterFactory; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.util.flatfile.FlatFile; import org.archive.wayback.util.url.AggressiveUrlCanonicalizer; public class StaticListExclusionFilterFactory implements ExclusionFilterFactory { private static final Logger LOGGER = Logger.getLogger(StaticMapExclusionFilterFactory.class.getName()); private int checkInterval = 0; private TreeSet<String> excludes = null; private File file = null; long lastUpdated = 0; UrlCanonicalizer canonicalizer = new AggressiveUrlCanonicalizer(); /** * Thread object of update thread -- also is flag indicating if the thread * has already been started -- static, and access to it is synchronized. */ private static Thread updateThread = null; /** * load exclusion file and startup polling thread to check for updates * @throws IOException if the exclusion file could not be read. */ public void init() throws IOException { reloadFile(); if(checkInterval > 0) { startUpdateThread(); } } protected void reloadFile() throws IOException { long currentMod = file.lastModified(); if(currentMod == lastUpdated) { if(currentMod == 0) { LOGGER.severe("No exclude file at " + file.getAbsolutePath()); } return; } LOGGER.info("Reloading exclusion file " + file.getAbsolutePath()); try { excludes = loadFile(file.getAbsolutePath()); lastUpdated = currentMod; LOGGER.info("Reload " + file.getAbsolutePath() + " OK"); } catch(IOException e) { lastUpdated = -1; excludes = null; e.printStackTrace(); LOGGER.severe("Reload " + file.getAbsolutePath() + " FAILED:" + e.getLocalizedMessage()); } } protected TreeSet<String> loadFile(String path) throws IOException { TreeSet<String> excludes = new TreeSet<String>(); FlatFile ff = new FlatFile(path); CloseableIterator<String> itr = ff.getSequentialIterator(); while(itr.hasNext()) { String line = (String) itr.next(); line = line.trim(); if(line.length() == 0) { continue; } line = canonicalizer.urlStringToKey(line); String surt = line.startsWith("(") ? line : SURT.fromPlain(line); // SURTTokenizer.prefixKey(line); LOGGER.fine("EXCLUSION-MAP: adding " + surt); excludes.add(surt); } itr.close(); return excludes; } /** * @return ObjectFilter which blocks CaptureSearchResults in the * exclusion file. */ public ExclusionFilter get() { if(excludes == null) { return null; } return new StaticListExclusionFilter(excludes, canonicalizer); } private synchronized void startUpdateThread() { if (updateThread != null) { return; } updateThread = new CacheUpdaterThread(this,checkInterval); updateThread.start(); } private synchronized void stopUpdateThread() { if (updateThread == null) { return; } updateThread.interrupt(); } private class CacheUpdaterThread extends Thread { /** * object which merges CDX files with the BDBResourceIndex */ private StaticListExclusionFilterFactory service = null; private int runInterval; /** * @param service ExclusionFactory which will be reloaded * @param runInterval int number of seconds between reloads */ public CacheUpdaterThread(StaticListExclusionFilterFactory service, int runInterval) { super("CacheUpdaterThread"); super.setDaemon(true); this.service = service; this.runInterval = runInterval; LOGGER.info("CacheUpdaterThread is alive."); } public void run() { int sleepInterval = runInterval; while (true) { try { try { service.reloadFile(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Thread.sleep(sleepInterval * 1000); } catch (InterruptedException e) { e.printStackTrace(); return; } } } } /** * @return the checkInterval in seconds */ public int getCheckInterval() { return checkInterval; } /** * @param checkInterval the checkInterval in seconds to set */ public void setCheckInterval(int checkInterval) { this.checkInterval = checkInterval; } /** * @return the path */ public String getFile() { return file.getAbsolutePath(); } /** * @param path the file to set */ public void setFile(String path) { this.file = new File(path); } /* (non-Javadoc) * @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown() */ public void shutdown() { stopUpdateThread(); } }