/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.resourceindex.filterfactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.httpclient.URIException;
import org.archive.wayback.UrlCanonicalizer;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.SearchResults;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.exception.BadQueryException;
import org.archive.wayback.resourceindex.filters.DateRangeFilter;
import org.archive.wayback.resourceindex.filters.EndDateFilter;
import org.archive.wayback.resourceindex.filters.HostMatchFilter;
import org.archive.wayback.resourceindex.filters.SchemeMatchFilter;
import org.archive.wayback.resourceindex.filters.SelfRedirectFilter;
import org.archive.wayback.resourceindex.filters.UrlMatchFilter;
import org.archive.wayback.resourceindex.filters.UrlPrefixMatchFilter;
import org.archive.wayback.util.ObjectFilter;
import org.archive.wayback.util.ObjectFilterChain;
import org.archive.wayback.util.Timestamp;
import org.archive.wayback.util.url.UrlOperations;
public class QueryCaptureFilterGroup implements CaptureFilterGroup {
private ObjectFilterChain<CaptureSearchResult> chain = null;
private String requestType = null;
private String keyUrl = null;
private String startDate;
private String endDate;
private String exactDate;
/**
* List of URL Strings that are "close" to the current request, but not
* included in the current CaptureSearchResults.
*/
private Map<String,String> closeMatches = new HashMap<String,String>();
public QueryCaptureFilterGroup(WaybackRequest request,
UrlCanonicalizer canonicalizer)
throws BadQueryException {
requestType = request.get(WaybackRequest.REQUEST_TYPE);
// URL-Filters:
chain = new ObjectFilterChain<CaptureSearchResult>();
try {
keyUrl = canonicalizer.urlStringToKey(request.getRequestUrl());
} catch (URIException e) {
throw new BadQueryException("Bad request URL(" +
request.getRequestUrl() +")");
}
// Date-Filters:
startDate = request.getStartTimestamp();
if(startDate == null) {
startDate = Timestamp.earliestTimestamp().getDateStr();
}
endDate = request.getEndTimestamp();
if(endDate == null) {
endDate = Timestamp.latestTimestamp().getDateStr();
}
if(request.isReplayRequest()) {
exactDate = request.getReplayTimestamp();
if(exactDate == null) {
exactDate = Timestamp.latestTimestamp().getDateStr();
}
chain.addFilter(new UrlMatchFilter(keyUrl));
chain.addFilter(new SelfRedirectFilter(canonicalizer));
long wantMS = request.getReplayDate().getTime();
if(request.getAccessPoint().isUseAnchorWindow()) {
// use AnchorTimestamp, if specified:
String anchorTS = request.getAnchorTimestamp();
if(anchorTS != null) {
wantMS =
Timestamp.parseBefore(anchorTS).getDate().getTime();
}
}
} else if(request.isCaptureQueryRequest()) {
chain.addFilter(new UrlMatchFilter(keyUrl));
// OPTIMIZ: EndDateFilter is a hard stop: ABORT
// DateRangeFilter is an INCLUDE/EXCLUDE
// one class which EXCLUDEs before startDate, and ABORTs
// after endDate would save a compare..
chain.addFilter(new EndDateFilter(endDate));
chain.addFilter(new DateRangeFilter(startDate, endDate));
} else if(request.isUrlQueryRequest()) {
chain.addFilter(new UrlPrefixMatchFilter(keyUrl));
chain.addFilter(new DateRangeFilter(startDate, endDate));
}
// Other Filters:
if(request.isExactHost()) {
chain.addFilter(
new HostMatchFilter(
UrlOperations.urlToHost(request.getRequestUrl()),
this)
);
}
if(request.isExactScheme()) {
chain.addFilter(new SchemeMatchFilter(
UrlOperations.urlToScheme(request.getRequestUrl()),this));
}
}
public List<ObjectFilter<CaptureSearchResult>> getFilters() {
return chain.getFilters();
}
public void annotateResults(SearchResults results) {
// set the filter properties on the results:
results.putFilter(WaybackRequest.REQUEST_URL, keyUrl);
results.putFilter(WaybackRequest.REQUEST_START_DATE, startDate);
results.putFilter(WaybackRequest.REQUEST_END_DATE, endDate);
if(exactDate != null) {
results.putFilter(WaybackRequest.REQUEST_EXACT_DATE, exactDate);
}
results.putFilter(WaybackRequest.REQUEST_TYPE, requestType);
if(!closeMatches.isEmpty()) {
results.setCloseMatches(new ArrayList<String>(closeMatches.values()));
}
}
public void addCloseMatch(String host, String closeMatch) {
closeMatches.put(host, closeMatch);
}
}