TextReplayRenderer.java example

Explorer
wayback-machine-master
/*
 *  This file is part of the Wayback archival access software
 *   (http://archive-access.sourceforge.net/projects/wayback/).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.archive.wayback.replay;

import java.io.IOException;
import java.util.List;
import java.util.Map;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.archive.wayback.ReplayRenderer;
import org.archive.wayback.ResultURIConverter;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.CaptureSearchResults;
import org.archive.wayback.core.Resource;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.exception.BadContentException;
import org.archive.wayback.replay.charset.CharsetDetector;
import org.archive.wayback.replay.charset.StandardCharsetDetector;

/**
 *
 *
 * @author brad
 * @version $Date$, $Revision$
 */
public abstract class TextReplayRenderer implements ReplayRenderer {

	public static String GUESSED_CHARSET_HEADER = "X-Archive-Guessed-Charset";
	
	private String guessedCharsetHeader = GUESSED_CHARSET_HEADER;
	private List<String> jspInserts = null;
	private HttpHeaderProcessor httpHeaderProcessor;
	private CharsetDetector charsetDetector = new StandardCharsetDetector();

	public TextReplayRenderer(HttpHeaderProcessor httpHeaderProcessor) {
		this.httpHeaderProcessor = httpHeaderProcessor;
	}
	
	protected abstract void updatePage(TextDocument page, 
			HttpServletRequest httpRequest,
			HttpServletResponse httpResponse, WaybackRequest wbRequest,
			CaptureSearchResult result, Resource resource,
			ResultURIConverter uriConverter, CaptureSearchResults results)
		throws ServletException, IOException;

	/* (non-Javadoc)
	 * @see org.archive.wayback.ReplayRenderer#renderResource(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse, org.archive.wayback.core.WaybackRequest, org.archive.wayback.core.SearchResult, org.archive.wayback.core.Resource, org.archive.wayback.ResultURIConverter, org.archive.wayback.core.SearchResults)
	 */
	public void renderResource(HttpServletRequest httpRequest,
			HttpServletResponse httpResponse, WaybackRequest wbRequest,
			CaptureSearchResult result, Resource resource,
			ResultURIConverter uriConverter, CaptureSearchResults results)
			throws ServletException, IOException, BadContentException {

		HttpHeaderOperation.copyHTTPMessageHeader(resource, httpResponse);

		Map<String,String> headers = HttpHeaderOperation.processHeaders(
				resource, result, uriConverter, httpHeaderProcessor);
		
		// Decode resource (such as if gzip encoded)
		resource = decodeResource(resource);
	
		String charSet = charsetDetector.getCharset(resource, wbRequest);
		// Load content into an HTML page, and resolve load-time URLs:
		TextDocument page = new TextDocument(resource,result,uriConverter);
		page.readFully(charSet);
		
		updatePage(page,httpRequest,httpResponse,wbRequest,result,resource,
				uriConverter,results);

		// set the corrected length:
		int bytes = page.getBytes().length;

		headers.put(HttpHeaderOperation.HTTP_LENGTH_HEADER, String.valueOf(bytes));
		if(guessedCharsetHeader != null) {
			headers.put(guessedCharsetHeader, page.getCharSet());
		}

		// send back the headers:
		HttpHeaderOperation.sendHeaders(headers, httpResponse);

		// Tomcat will always send a charset... It's trying to be smarter than
		// we are. If the original page didn't include a "charset" as part of
		// the "Content-Type" HTTP header, then Tomcat will use the default..
		// who knows what that is, or what that will do to the page..
		// let's try explicitly setting it to what we used:
		httpResponse.setCharacterEncoding(page.getCharSet());

		page.writeToOutputStream(httpResponse.getOutputStream());
	}

	/**
	 * @return the jspInserts
	 */
	public List<String> getJspInserts() {
		return jspInserts;
	}

	/**
	 * @param jspInserts the jspInserts to set
	 */
	public void setJspInserts(List<String> jspInserts) {
		this.jspInserts = jspInserts;
	}

	/**
	 * @return the charsetDetector
	 */
	public CharsetDetector getCharsetDetector() {
		return charsetDetector;
	}

	/**
	 * @param charsetDetector the charsetDetector to set
	 */
	public void setCharsetDetector(CharsetDetector charsetDetector) {
		this.charsetDetector = charsetDetector;
	}

	/**
	 * @return the String HTTP Header used to indicate what Wayback determined
	 * was the pages original charset 
	 */
	public String getGuessedCharsetHeader() {
		return guessedCharsetHeader;
	}

	/**
	 * @param guessedCharsetHeader the String HTTP Header value used to indicate
	 * to clients what Wayback determined was the pages original charset. If set
	 * to null, the header will be omitted.
	 */
	public void setGuessedCharsetHeader(String guessedCharsetHeader) {
		this.guessedCharsetHeader = guessedCharsetHeader;
	}
	
	public static Resource decodeResource(Resource resource) throws IOException
	{
		Map<String, String> headers = resource.getHttpHeaders();
		
		if (headers != null) {
			String encoding =  headers.get(HttpHeaderOperation.HTTP_CONTENT_ENCODING);
			if (encoding != null) {
				if (encoding.toLowerCase().equals(GzipDecodingResource.GZIP)) {
					return new GzipDecodingResource(resource);
				}
				
				//TODO: check for other encodings?
			}
		}
		
		return resource;
	}
}