/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.replay;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.archive.wayback.ResultURIConverter;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.util.url.UrlOperations;
/**
* {@link HttpHeaderProcessor} that preserves all headers by prepending a prefix,
* translates URL in resource location headers and pass-through certain headers.
* <p>Headers rewritten:
* <ul>
* <li>{@code Location}</li>
* <li>{@code Content-Location}</li>
* <li>{@code Content-Base}</li>
* </ul>
* Headers passed-through:
* <ul>
* <li>{@code Content-Type}</li>
* <li>{@code Content-Disposition}</li>
* </ul>
* </p>
* <p>If {@code prefix} property is {@code null} (default), all headers but {@code Content-Length}
* are copied as they are. With non-{@code null} prefix, all headers, including
* {@code Length} are preserved by prepending header name with {@code prefix}.</p>
* <p>Caveat: if {@code prefix} is an empty string, all headers including {@code Content-Length}
* are copied as they are. This is presumably a bug.</p>
* <p>Intended for archival-URL and domain-prefix mode.</p>
*
* @author brad
*/
public class RedirectRewritingHttpHeaderProcessor extends PreservingHttpHeaderProcessor {
private Set<String> passThroughHeaders = null;
private Set<String> rewriteHeaders = null;
private Set<String> dropHeaders;
public RedirectRewritingHttpHeaderProcessor() {
passThroughHeaders = new HashSet<String>();
passThroughHeaders.add(HTTP_CONTENT_TYPE_HEADER_UP);
passThroughHeaders.add(HTTP_CONTENT_DISP_HEADER_UP);
passThroughHeaders.add(HTTP_CONTENT_RANGE_HEADER_UP);
rewriteHeaders = new HashSet<String>();
rewriteHeaders.add(HTTP_LOCATION_HEADER_UP);
rewriteHeaders.add(HTTP_CONTENT_LOCATION_HEADER_UP);
rewriteHeaders.add(HTTP_CONTENT_BASE_HEADER_UP);
dropHeaders = new HashSet<String>();
dropHeaders.add(HTTP_LENGTH_HEADER_UP);
dropHeaders.add(HTTP_TRANSFER_ENCODING_HEADER_UP);
}
/* (non-Javadoc)
* @see org.archive.wayback.replay.HttpHeaderProcessor#filter(java.util.Map, java.lang.String, java.lang.String, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.CaptureSearchResult)
*/
public void filter(Map<String, String> output, String key, String value,
ResultURIConverter uriConverter, CaptureSearchResult result) {
String keyUp = key.toUpperCase();
// first stick it in as-is, or with prefix, then maybe we'll overwrite
// with the later logic.
if (dropHeaders.contains(keyUp))
preserve(output, key, value);
else
preserveAlways(output, key, value);
// rewrite Location header URLs
if(rewriteHeaders.contains(keyUp)) {
String baseUrl = result.getOriginalUrl();
String cd = result.getCaptureTimestamp();
// by the spec, these should be absolute already, but just in case:
String u = UrlOperations.resolveUrl(baseUrl, value);
output.put(key, uriConverter.makeReplayURI(cd,u));
} else if(passThroughHeaders.contains(keyUp)) {
// let's leave this one as-is:
output.put(key,value);
}
}
}