package org.archive.wayback.replay;
import org.archive.wayback.ReplayDispatcher;
import org.archive.wayback.archivalurl.requestparser.DatelessReplayRequestParser;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.CaptureSearchResults;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.util.Timestamp;
/**
* Default implementation of {@link ReplayCaptureSelector}.
* <p>
* This is what used to be embedded in {@code AccessPoint hadleReplay} method.
* It compares previous and next capture of the current one, picks closer capture
* time-wise. Captures are removed as it traverses the captures list, so that
* the same capture will not be returned twice.
* </p>
* <p>
* It also favors successful (200) captures over closer redirects and errors for
* embeds and <em>bestLatestReplayRequest</em> (see {@link DatelessReplayRequestParser}).
* </p>
* <p>
* New feature 2014-00-18: It also skips captures with any of ROBOT_FLAGS_SKIPPED
* in {@code robotflags} field. This is for new <em>soft-blocked captures</em>
* feature.
* </p>
* <p>
* For backward compatibility, this implementation delegates closest-selection (i.e.
* selecting the first capture to return) to {@link ReplayDispatcher} passed to
* its constructor. {@code ReplayCaptureSelector} is not ready for factory-based
* customization because of this. When {@code ReplayCaptureSelector}
* takes over closest-selection functionality from {@code ReplayDispatcher},
* dependency on {@code replayDispatcher} will be removed.
* </p>
*/
public class DefaultReplayCaptureSelector implements ReplayCaptureSelector {
/**
* captures with these flags in {@code robotflags} field are skipped.
* (TODO: make this customizable?)
*/
public static final String ROBOT_FLAGS_SKIPPED = "X";
private WaybackRequest wbRequest;
private CaptureSearchResults captures;
private long requestMS;
// Current implementation delegates closest-selection part to
// ReplayDispatcher. It will eventually be removed.
private ReplayDispatcher replayDispatcher;
private CaptureSearchResult currentClosest;
/**
* Initialize object with {@link ReplayDispatcher}, to which
* closest-selection is delegated.
* @param replayDispatcher {@code ReplayDispatcher}, cannot be null.
*/
public DefaultReplayCaptureSelector(ReplayDispatcher replayDispatcher) {
this.replayDispatcher = replayDispatcher;
}
@Override
public void setRequest(WaybackRequest wbRequest) {
this.wbRequest = wbRequest;
requestMS = Timestamp
.parseBefore(wbRequest.getReplayTimestamp()).getDate()
.getTime();
}
@Override
public void setCaptures(CaptureSearchResults captures) {
this.captures = captures;
currentClosest = null;
}
/**
* set {@link ReplayDispatcher} for selecting the best capture.
* @param replayDispatcher
*/
public void setReplayDispatcher(ReplayDispatcher replayDispatcher) {
this.replayDispatcher = replayDispatcher;
}
protected static boolean hasAnyRobotFlags(CaptureSearchResult capture, String flags) {
// most capture have no robot flag - do a shortcut.
if (capture.getRobotFlags() != null) {
for (int i = 0; i < flags.length(); i++) {
if (capture.isRobotFlagSet(flags.charAt(i)))
return true;
}
}
return false;
}
@Override
public CaptureSearchResult next() {
if (currentClosest == null)
currentClosest = replayDispatcher.getClosest(wbRequest,
captures);
else
currentClosest = findNextClosest();
while (currentClosest != null) {
// Attempt to resolve any not-found embedded content with next-best.
// For "best last" capture, skip not-founds and redirects, hoping to
// find the best 200 response.
if (wbRequest.isAnyEmbeddedContext() &&
currentClosest.isHttpError() ||
wbRequest.isBestLatestReplayRequest() &&
!currentClosest.isHttpSuccess()) {
CaptureSearchResult capture;
while ((capture = findNextClosest()) != null) {
if (capture.isHttpRedirect()) {
// save redirects, but keep looking; it'll be used if no
// better capture is found (caveat: picks the last, i.e. farthest,
// redirect capture.)
currentClosest = capture;
} else if (capture.isHttpSuccess()) {
currentClosest = capture;
break;
}
}
}
break;
}
return currentClosest;
}
protected CaptureSearchResult findNextClosest() {
CaptureSearchResult prev = currentClosest.getPrevResult();
CaptureSearchResult next = currentClosest.getNextResult();
currentClosest.removeFromList();
if (prev == null) {
return next;
} else if (next == null) {
return prev;
}
long prevMS = prev.getCaptureDate().getTime();
long nextMS = next.getCaptureDate().getTime();
long prevDiff = Math.abs(prevMS - requestMS);
long nextDiff = Math.abs(requestMS - nextMS);
if (prevDiff == 0) {
return prev;
} else if (nextDiff == 0) {
return next;
}
String currHash = currentClosest.getDigest();
String prevHash = prev.getDigest();
String nextHash = next.getDigest();
boolean prevSameHash = (prevHash.equals(currHash));
boolean nextSameHash = (nextHash.equals(currHash));
if (prevSameHash != nextSameHash) {
return prevSameHash ? prev : next;
}
String prevStatus = prev.getHttpCode();
String nextStatus = next.getHttpCode();
boolean prev200 = (prevStatus != null) && prevStatus.equals("200");
boolean next200 = (nextStatus != null) && nextStatus.equals("200");
// If only one is a 200, prefer the entry with the 200
if (prev200 != next200) {
return (prev200 ? prev : next);
}
if (prevDiff < nextDiff) {
return prev;
} else {
return next;
}
}
}