/* * This file is part of the Wayback archival access software * (http://archive-access.sourceforge.net/projects/wayback/). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.wayback.replay; import java.io.IOException; import java.util.List; import java.util.Map; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.archive.wayback.ReplayRenderer; import org.archive.wayback.ResultURIConverter; import org.archive.wayback.core.CaptureSearchResult; import org.archive.wayback.core.CaptureSearchResults; import org.archive.wayback.core.Resource; import org.archive.wayback.core.WaybackRequest; import org.archive.wayback.exception.BadContentException; import org.archive.wayback.replay.charset.CharsetDetector; import org.archive.wayback.replay.charset.StandardCharsetDetector; /** * * * @author brad * @version $Date$, $Revision$ */ public abstract class TextReplayRenderer implements ReplayRenderer { public static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset"; private String guessedCharsetHeader = GUESSED_CHARSET_HEADER; private List<String> jspInserts = null; private HttpHeaderProcessor httpHeaderProcessor; private CharsetDetector charsetDetector = new StandardCharsetDetector(); public TextReplayRenderer(HttpHeaderProcessor httpHeaderProcessor) { this.httpHeaderProcessor = httpHeaderProcessor; } protected abstract void updatePage(TextDocument page, HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, CaptureSearchResult result, Resource resource, ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException; /* (non-Javadoc) * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults) */ public void renderResource(HttpServletRequest httpRequest, HttpServletResponse httpResponse, WaybackRequest wbRequest, CaptureSearchResult result, Resource resource, ResultURIConverter uriConverter, CaptureSearchResults results) throws ServletException, IOException, BadContentException { HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse); Map<String,String> headers = HttpHeaderOperation.processHeaders( resource, result, uriConverter, httpHeaderProcessor); // Decode resource (such as if gzip encoded) resource = decodeResource(resource); String charSet = charsetDetector.getCharset(resource, wbRequest); // Load content into an HTML page, and resolve load-time URLs: TextDocument page = new TextDocument(resource,result,uriConverter); page.readFully(charSet); updatePage(page,httpRequest,httpResponse,wbRequest,result,resource, uriConverter,results); // set the corrected length: int bytes = page.getBytes().length; headers.put(HttpHeaderOperation.HTTP_LENGTH_HEADER, String.valueOf(bytes)); if(guessedCharsetHeader != null) { headers.put(guessedCharsetHeader, page.getCharSet()); } // send back the headers: HttpHeaderOperation.sendHeaders(headers, httpResponse); // Tomcat will always send a charset... It's trying to be smarter than // we are. If the original page didn't include a "charset" as part of // the "Content-Type" HTTP header, then Tomcat will use the default.. // who knows what that is, or what that will do to the page.. // let's try explicitly setting it to what we used: httpResponse.setCharacterEncoding(page.getCharSet()); page.writeToOutputStream(httpResponse.getOutputStream()); } /** * @return the jspInserts */ public List<String> getJspInserts() { return jspInserts; } /** * @param jspInserts the jspInserts to set */ public void setJspInserts(List<String> jspInserts) { this.jspInserts = jspInserts; } /** * @return the charsetDetector */ public CharsetDetector getCharsetDetector() { return charsetDetector; } /** * @param charsetDetector the charsetDetector to set */ public void setCharsetDetector(CharsetDetector charsetDetector) { this.charsetDetector = charsetDetector; } /** * @return the String HTTP Header used to indicate what Wayback determined * was the pages original charset */ public String getGuessedCharsetHeader() { return guessedCharsetHeader; } /** * @param guessedCharsetHeader the String HTTP Header value used to indicate * to clients what Wayback determined was the pages original charset. If set * to null, the header will be omitted. */ public void setGuessedCharsetHeader(String guessedCharsetHeader) { this.guessedCharsetHeader = guessedCharsetHeader; } public static Resource decodeResource(Resource resource) throws IOException { Map<String, String> headers = resource.getHttpHeaders(); if (headers != null) { String encoding = headers.get(HttpHeaderOperation.HTTP_CONTENT_ENCODING); if (encoding != null) { if (encoding.toLowerCase().equals(GzipDecodingResource.GZIP)) { return new GzipDecodingResource(resource); } //TODO: check for other encodings? } } return resource; } }