package org.archive.wayback.resourceindex.filters;
import org.apache.commons.lang.math.NumberUtils;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.util.ObjectFilter;
public class DuplicateTimestampFilter implements ObjectFilter<CaptureSearchResult> {
final static int WORST_HTTP_CODE = 9999;
protected String lastTimestamp;
//protected int lastHttpCode = WORST_HTTP_CODE;
protected int bestHttpCode = WORST_HTTP_CODE;
protected int timestampDedupLength;
public DuplicateTimestampFilter(int timestampDedupLength)
{
this.timestampDedupLength = timestampDedupLength;
}
@Override
public int filterObject(CaptureSearchResult o) {
if (timestampDedupLength <= 0) {
return FILTER_INCLUDE;
}
String timestamp = o.getCaptureTimestamp();
timestamp = timestamp.substring(0, Math.min(timestampDedupLength, timestamp.length()));
int httpCode = NumberUtils.toInt(o.getHttpCode(), WORST_HTTP_CODE);
boolean isDupe = false;
if ((lastTimestamp != null) && timestamp.equals(lastTimestamp)) {
if (httpCode < bestHttpCode) {
bestHttpCode = httpCode;
} else {
isDupe = true;
}
} else {
bestHttpCode = httpCode;
}
lastTimestamp = timestamp;
return isDupe ? FILTER_EXCLUDE : FILTER_INCLUDE;
}
}