/*
* This file is part of the Wayback archival access software
* (http://archive-access.sourceforge.net/projects/wayback/).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.wayback.core;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import org.apache.commons.httpclient.ChunkedInputStream;
/**
* Abstraction on top of a document stored in a WaybackCollection. Currently
* implemented subclasses include ArcResource and WarcResource.
*
* This implementation needs some pretty drastic refactoring.. May have to wait
* for 2.0. This should be a byte-oriented record, and allow wrapping the
* interior byte-stream in on the more full featured HTTP libraries
* (jetty/apache-http-client/w3c-http-reference).
*
* For now, it is a system-wide assumption that all resources are HTTP based.
*
* @author Brad Tofel
* @version $Date$, $Revision$
*/
public abstract class Resource extends InputStream {
private InputStream is;
public abstract void close() throws IOException;
/**
* Assumes an HTTP resource - return the HTTP response code
* @return the HTTP response code from the HTTP message
*/
public abstract int getStatusCode();
/**
* @return the size in bytes of the record payload, including HTTP header
*/
public abstract long getRecordLength();
/**
* Assumes an HTTP response - return the HTTP headers, not including the
* HTTP Message header
* @return key-value Map of HTTP headers
*/
public abstract Map<String,String> getHttpHeaders();
private void validate() throws IOException {
if(is == null) {
throw new IOException("No InputStream");
}
}
protected void setInputStream(InputStream is) {
if(is.markSupported()) {
this.is = is;
} else {
this.is = new BufferedInputStream(is);
}
}
/**
* indicate that there is a Transfer-Encoding: chunked header, so the input
* data should be dechunked as it is read. This method actually peeks
* ahead to verify that there is a hex-encoded chunk length before
* assuming the data is chunked.
* @throws IOException for usual reasons
*/
public void setChunkedEncoding() throws IOException {
validate();
// peek ahead and make sure we have a line with hex numbers:
int max = 50;
is.mark(max+2);
int cur = 0;
int hexFound = 0;
boolean isChunked = false;
while(cur < max) {
int nextC = is.read();
// allow CRLF and plain ole LF:
if((nextC == 13) || (nextC == 10)) {
// must have read at least 1 hex char:
if(hexFound > 0) {
if(nextC == 10) {
isChunked = true;
break;
}
nextC = is.read();
if(nextC == 10) {
isChunked = true;
break;
}
}
// keep looking to allow some blank lines.
} else {
// better be a hex character:
if(isHex(nextC)) {
hexFound++;
} else if(nextC != ' ') {
// allow whitespace before or after chunk...
// not a hex digit: not a chunked stream.
break;
}
}
cur++;
}
is.reset();
if(isChunked) {
setInputStream(new ChunkedInputStream(is));
}
}
private boolean isHex(int c) {
if((c >= '0') && (c <= '9')) {
return true;
}
if((c >= 'a') && (c <= 'f')) {
return true;
}
if((c >= 'A') && (c <= 'F')) {
return true;
}
return false;
}
public int available() throws IOException {
validate();
return is.available();
}
public void mark(int readlimit) {
if(is != null) {
is.mark(readlimit);
}
}
public boolean markSupported() {
if(is == null) {
return false;
}
return is.markSupported();
}
public int read() throws IOException {
validate();
return is.read();
}
public int read(byte[] b, int off, int len) throws IOException {
validate();
return is.read(b, off, len);
}
public int read(byte[] b) throws IOException {
validate();
return is.read(b);
}
public void reset() throws IOException {
validate();
is.reset();
}
public long skip(long n) throws IOException {
validate();
return is.skip(n);
}
}