/**
*
*/
package org.archive.wayback.accesscontrol.oracleclient;
import java.util.Date;
import java.util.logging.Logger;
import org.archive.accesscontrol.AccessControlClient;
import org.archive.accesscontrol.AccessControlException;
import org.archive.accesscontrol.RobotsUnavailableException;
import org.archive.accesscontrol.RuleOracleUnavailableException;
import org.archive.util.ArchiveUtils;
import org.archive.wayback.accesscontrol.ContextExclusionFilterFactory;
import org.archive.wayback.accesscontrol.CollectionContext;
import org.archive.wayback.accesscontrol.oracleclient.CustomPolicyOracleFilter.Policy;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.replay.html.RewriteDirector;
import org.archive.wayback.resourceindex.filters.ExclusionFilter;
/**
* Implementation of {@link ContextExclusionFilterFactory} and {@link RewriteDirectorFactory}
* on top of {@link AccessControlClient}.
*
*/
public class OraclePolicyService implements ContextExclusionFilterFactory, RewriteDirector {
private static final Logger LOGGER = Logger.getLogger(OraclePolicyService.class.getName());
private String oracleUrl;
private String proxyHostPort;
private String fallbackAccessGroup;
// Now AccessControlClient is shared among multiple ExclusionFilter and RewriteDirector instances.
// Is AccessControlClient really thread-safe?
private AccessControlClient client;
public void setOracleUrl(String oracleUrl) {
this.oracleUrl = oracleUrl;
}
public void setProxyHostPort(String proxyHostPort) {
this.proxyHostPort = proxyHostPort;
}
/**
* Fallback accessGroup used when ExclusionFilter is created with {@link #get()}.
* @param fallbackAccessGroup
*/
public void setFallbackAccessGroup(String fallbackAccessGroup) {
this.fallbackAccessGroup = fallbackAccessGroup;
}
/**
* Inject {@link AccessControlClient}.
* @param client AccessControlClient initialized externally.
*/
public void setClient(AccessControlClient client) {
this.client = client;
}
/**
* call this method after initializing properties.
*/
public void init() {
if (client == null) {
initializeClient();
}
}
protected void initializeClient() {
client = new AccessControlClient(oracleUrl);
if (proxyHostPort != null) {
int colonIdx = proxyHostPort.indexOf(':');
if (colonIdx > 0) {
String host = proxyHostPort.substring(0, colonIdx);
int port = Integer.valueOf(proxyHostPort
.substring(colonIdx + 1));
client.setRobotProxy(host, port);
}
}
}
protected String getRawPolicy(String accessGroup,
CaptureSearchResult capture) throws RobotsUnavailableException,
RuleOracleUnavailableException {
String url = capture.getOriginalUrl();
Date captureDate = capture.getCaptureDate();
Date retrievalDate = new Date();
return client.getPolicy(ArchiveUtils.addImpliedHttpIfNecessary(url),
captureDate, retrievalDate, accessGroup);
}
/* (non-Javadoc)
* @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#get()
*/
@Override
public ExclusionFilter get() {
return getExclusionFilter(fallbackAccessGroup);
}
/* (non-Javadoc)
* @see org.archive.wayback.accesscontrol.ExclusionFilterFactory#shutdown()
*/
@Override
public void shutdown() {
}
/* (non-Javadoc)
* @see org.archive.wayback.accesscontrol.ContextExclusionFilterFactory#get(org.archive.wayback.accesscontrol.ExclusionContext)
*/
@Override
public ExclusionFilter getExclusionFilter(CollectionContext context) {
return getExclusionFilter(context.getCollectionContextName());
}
protected ExclusionFilter getExclusionFilter(String accessGroup) {
CustomPolicyOracleFilter filter = new CustomPolicyOracleFilter(client, accessGroup);
return filter;
}
@Override
public String getRewriteDirective(CollectionContext context, CaptureSearchResult capture) {
String accessGroup = context.getCollectionContextName();
try {
String policy = getRawPolicy(accessGroup, capture);
// exclusion policies are not rewrite directives. map them to null.
// (Danger: assumes Policy enum has exclusion values only).
for (Policy handler : Policy.values()) {
if (handler.matches(policy)) {
return null;
}
}
return policy;
} catch (AccessControlException ex) {
// TODO: If retrieval of rewrite directive fails due to an error in
// underlining service, replay can suffer. It would be better to let
// user know of this transient problem.
LOGGER.warning(
"Oracle Unavailable/not running, default to allow all until it responds. Details: " +
ex.toString());
return null;
}
}
// @Override
// public RewriteDirector getRewriteDirector(ExclusionContext context) {
// return new ContextRewriteDirector(context.getExclusionContextName());
// }
}