/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.accesscontrol.staticmap; import java.util.Map; import java.util.logging.Logger; import org.apache.commons.httpclient.URIException; import org.archive.wayback.UrlCanonicalizer; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.resourceindex.filterfactory.ExclusionCaptureFilterGroup; import org.archive.wayback.resourceindex.filters.ExclusionFilter; import org.archive.wayback.surt.SURTTokenizer; import org.archive.wayback.util.ObjectFilter; /** * * * @author brad * @version $Date$, $Revision$ */ public class StaticMapExclusionFilter extends ExclusionFilter { private static final Logger LOGGER = Logger.getLogger( StaticMapExclusionFilter.class.getName()); private String lastChecked = null; private boolean lastCheckedExcluded = false; private boolean notifiedSeen = false; private boolean notifiedPassed = false; Map<String,Object> exclusionMap = null; UrlCanonicalizer canonicalizer = null; /** * @param map where each String key is a SURT that is blocked. */ public StaticMapExclusionFilter(Map<String,Object> map, UrlCanonicalizer canonicalizer) { exclusionMap = map; this.canonicalizer = canonicalizer; } // Set the canonicalizer from the filter, as it may be different from the default @Override public void setFilterGroup(ExclusionCaptureFilterGroup filterGroup) { super.setFilterGroup(filterGroup); if ((filterGroup != null) && (filterGroup.getCaptureFilterGroupCanonicalizer() != null)) { this.canonicalizer = filterGroup.getCaptureFilterGroupCanonicalizer(); } } protected boolean isExcluded(String url) { try { SURTTokenizer st = new SURTTokenizer(url, canonicalizer.isSurtForm()); while(true) { String nextSearch = st.nextSearch(); if(nextSearch == null) { break; } LOGGER.fine("EXCLUSION-MAP:Checking " + nextSearch); if(exclusionMap.containsKey(nextSearch)) { LOGGER.info("EXCLUSION-MAP: EXCLUDED: \"" + nextSearch + "\" (" + url +")"); return true; } } } catch (URIException e) { LOGGER.warning(e.toString()); return true; } return false; } /* (non-Javadoc) * @see org.archive.wayback.resourceindex.SearchResultFilter#filterSearchResult(org.archive.wayback.core.SearchResult) */ public int filterObject(CaptureSearchResult r) { if(!notifiedSeen) { if(filterGroup != null) { filterGroup.setSawAdministrative(); } notifiedSeen = true; } String url = r.getUrlKey(); if(lastChecked != null) { if(lastChecked.equals(url)) { if(lastCheckedExcluded) { return ObjectFilter.FILTER_EXCLUDE; } else { // don't need to: already did last time... //filterGroup.setPassedAdministrative(); return ObjectFilter.FILTER_INCLUDE; } } } lastChecked = url; lastCheckedExcluded = isExcluded(url); if(lastCheckedExcluded) { return ObjectFilter.FILTER_EXCLUDE; } else { if(!notifiedPassed) { if(filterGroup != null) { filterGroup.setPassedAdministrative(); } notifiedPassed = true; } return ObjectFilter.FILTER_INCLUDE; } } }