package org.archive.wayback.accesscontrol.oracleclient; import java.util.Date; import java.util.logging.Logger; import org.archive.accesscontrol.AccessControlClient; import org.archive.accesscontrol.RobotsUnavailableException; import org.archive.accesscontrol.RuleOracleUnavailableException; import org.archive.util.ArchiveUtils; import org.archive.wayback.accesspoint.AccessPointAdapter; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.util.ObjectFilter; /** * Oracle Filter Implementation that supports custom policies in addition to * allow, block, block-message and robots * * The policy is stored in the CaptureSearchResult * * <p> * Note: this class is being re-designed to allow for run-time customization * (i.e. with Spring config): * <ul> * <li>Redefine {@code Policy} as an interface + abstract implementation.</li> * <li>Define concrete instances for well-known policies like {@code block}, * {@code allow} and {@code robots}.</li> * <li>Add a property for configurable list of {@code Policy}s, in a class * instantiating this object (factory?)</li> * </ul> * {@code Policy} enum below is re-designed toward in this direction. * The second argument of {@link Policy#apply(CaptureSearchResult, OracleExclusionFilter)} * is very likely to be changed to more abstract interface. * </p> * @see CustomPolicyOracleFilterFactory * @see AccessPointAdapter */ public class CustomPolicyOracleFilter extends OracleExclusionFilter { private static final Logger LOGGER = Logger .getLogger(CustomPolicyOracleFilter.class.getName()); // TODO: redefine this enum as ordinary base class with well-known // instances to make CustomPolicyOracleFilter runtime-configurable. enum Policy { ALLOW("allow"), BLOCK_HIDDEN("block") { @Override int apply(CaptureSearchResult capture, OracleExclusionFilter filter) { // mark capture blocked, and include in the result (see ARI-3879). // no message is given to user. capture.setRobotFlag(CaptureSearchResult.CAPTURE_ROBOT_BLOCKED); //return FILTER_EXCLUDE; return FILTER_INCLUDE; } }, BLOCK_MESSAGE("block-message") { @Override int apply(CaptureSearchResult capture, OracleExclusionFilter filter) { return filter.handleBlock(); } }, ROBOTS("robots") { @Override int apply(CaptureSearchResult capture, OracleExclusionFilter filter) { return filter.handleRobots(); } } ; Policy(String policy) { this.policy = policy; } boolean matches(String other) { return (other.equals(this.policy)); } final String policy; /** * Apply policy. Bare minimum required is to return one of {@link ObjectFilter} * result code. It may call {@code handle*} methods on {@code filter} for * common policy handling, and/or modify {@code capture}. * <p>TODO: define abstract interface for allow/block notifications defined * in {@code OracleExclusionFilter}.</p> * @param capture CaptureSearchResult * @param filter OracleExclusionFilter object calling this method. * @return one of {@link ObjectFilter} result codes. */ int apply(CaptureSearchResult capture, OracleExclusionFilter filter) { return filter.handleAllow(); } } protected int defaultFilter = FILTER_INCLUDE; public CustomPolicyOracleFilter(String oracleUrl, String accessGroup, String proxyHostPort) { super(oracleUrl, accessGroup, proxyHostPort); } public CustomPolicyOracleFilter(AccessControlClient client, String accessGroup) { super(client, accessGroup); } protected String getRawPolicy(CaptureSearchResult capture) throws RobotsUnavailableException, RuleOracleUnavailableException { String url = capture.getOriginalUrl(); Date captureDate = capture.getCaptureDate(); Date retrievalDate = new Date(); return client.getPolicy(ArchiveUtils.addImpliedHttpIfNecessary(url), captureDate, retrievalDate, accessGroup); } @Override public int filterObject(CaptureSearchResult o) { try { String policy = getRawPolicy(o); // Setting policy to CaptureSearchResult has no effect with new approach // in which CustomPolicyOracle is called through CDXAccessFilter, because // o is just a transient wrapper around CDXLine object; CDXLine has no place // to store extra information. Wayback makes separate call to getRewriteDirective() // above. This line will be removed when migration completes. o.setOraclePolicy(policy); if (policy == null) { return defaultFilter; } for (Policy handler : Policy.values()) { if (handler.matches(policy)) { return handler.apply(o, this); } } // unhandled policy is okay. Oracle also returns rewrite directives // as policy. Just ignore them. } catch (RobotsUnavailableException e) { e.printStackTrace(); } catch (RuleOracleUnavailableException e) { LOGGER.warning( "Oracle Unavailable/not running, default to allow all until it responds. Details: " + e.toString()); } return defaultFilter; } }