/**
*
*/
package org.archive.wayback.resourcestore.resourcefile;
import junit.framework.TestCase;
import org.archive.io.arc.ARCRecord;
import org.archive.io.arc.TestARCReader;
import org.archive.io.warc.TestWARCRecordInfo;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.wayback.core.Resource;
import org.archive.wayback.replay.GzipDecodingResource;
import org.archive.wayback.replay.TextReplayRenderer;
/**
* @author kenji
*
*/
public class ArcResourceTest extends TestCase {
/* (non-Javadoc)
* @see junit.framework.TestCase#setUp()
*/
protected void setUp() throws Exception {
super.setUp();
}
/**
* plain HTTP response, without transfer/content-encoding.
* @throws Exception
*/
public void testPlainHttpRecord() throws Exception {
String payload = "hogehogehogehogehoge";
WARCRecordInfo recinfo = TestWARCRecordInfo.createHttpResponse(payload);
recinfo.setMimetype("text/plain");
TestARCReader ar = new TestARCReader(recinfo);
ARCRecord rec = ar.get(0);
ArcResource res = new ArcResource(rec, ar);
res.parseHeaders();
assertEquals("statusCode", 200, res.getStatusCode());
assertEquals("content-type", "text/plain", res.getHeader("Content-Type"));
byte[] buf = new byte[payload.getBytes().length + 1];
int n = res.read(buf);
assertEquals("content length", buf.length - 1, n);
res.close();
}
/**
* uncompressed, but chunked-encoded HTTP response.
* @throws Exception
*/
public void testPlainChunkedHttpRecord() throws Exception {
String payload = "hogehogehogehogehoge";
WARCRecordInfo recinfo = new TestWARCRecordInfo(
TestWARCRecordInfo.buildHttpResponseBlock("200 OK",
"text/plain", payload.getBytes("UTF-8"), true));
recinfo.setMimetype("text/plain");
TestARCReader ar = new TestARCReader(recinfo);
ARCRecord rec = ar.get(0);
ArcResource res = new ArcResource(rec, ar);
res.parseHeaders();
assertEquals("statusCode", 200, res.getStatusCode());
assertEquals("content-type", "text/plain", res.getHeader("Content-Type"));
byte[] buf = new byte[payload.getBytes().length + 1];
int n = res.read(buf);
assertEquals("content length", buf.length - 1, n);
res.close();
}
/**
* gzip-compressed HTTP response.
* @throws Exception
*/
public void testCompressedHttpRecord() throws Exception {
String payload = "hogehogehogehogehoge";
String ctype = "text/plain";
WARCRecordInfo recinfo = new TestWARCRecordInfo(
TestWARCRecordInfo.buildCompressedHttpResponseBlock(ctype,
payload.getBytes()));
recinfo.setMimetype(ctype);
TestARCReader ar = new TestARCReader(recinfo);
ARCRecord rec = ar.get(0);
ArcResource res = new ArcResource(rec, ar);
res.parseHeaders();
assertEquals("statusCode", 200, res.getStatusCode());
assertEquals("content-type", ctype, res.getHeader("Content-Type"));
Resource zres = TextReplayRenderer.decodeResource(res);
assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource));
byte[] buf = new byte[payload.getBytes().length + 1];
int n = zres.read(buf);
assertEquals("content length", buf.length - 1, n);
res.close();
}
/**
* gzip-compressed and chunk-encoded HTTP response.
* @throws Exception
*/
public void testCompressedChunkedHttpRecord() throws Exception {
String payload = "hogehogehogehogehoge";
String ctype = "text/plain";
WARCRecordInfo recinfo = new TestWARCRecordInfo(
TestWARCRecordInfo.buildCompressedHttpResponseBlock(ctype,
payload.getBytes(), true));
recinfo.setMimetype(ctype);
TestARCReader ar = new TestARCReader(recinfo);
ARCRecord rec = ar.get(0);
ArcResource res = new ArcResource(rec, ar);
res.parseHeaders();
assertEquals("statusCode", 200, res.getStatusCode());
assertEquals("content-type", ctype, res.getHeader("Content-Type"));
Resource zres = TextReplayRenderer.decodeResource(res);
assertTrue("wrapped with GzipDecodingResource", (zres instanceof GzipDecodingResource));
byte[] buf = new byte[payload.getBytes().length + 1];
int n = zres.read(buf);
assertEquals("content length", buf.length - 1, n);
res.close();
}
// TODO: add more tests on various Transfer-Encoding and Content-Encoding.
// TODO: add more tests on corner cases.
// NOTE: ARC revisit records have zero-length content, and ArcResource never gets created for them.
// thus we don't need a test case for revisit ARCRecord.
// /**
// * new, current revisit record, which has just HTTP response line and
// * headers part of the capture.
// * <p>Expectations:
// * TextReplayRender receives revisit WarcResource as {@code httpHeaderResource},
// * and calls following methods on it:</p>
// * <ul>
// * <li>{@link WarcResource#getStatusCode()}</li>
// * <li>{@link WarcResource#getHttpHeaders()} (ok to return null)</li>
// * </ul>
// * @throws Exception
// */
// public void testRevisitRecord() throws Exception {
// final String ct = "text/html";
// WARCRecordInfo recinfo = TestWARCRecordInfo.createRevisitHttpResponse(ct, 1345, false);
// recinfo.setMimetype(ct);
// TestARCReader ar = new TestARCReader(recinfo);
// ARCRecord rec = ar.get(0);
// ArcResource res = new ArcResource(rec, ar);
// res.parseHeaders();
//
// // these are from this record.
// assertEquals("statusCode", 200, res.getStatusCode());
// assertEquals("content-type", ct, res.getHeader("Content-Type"));
//
// StandardCharsetDetector csd = new StandardCharsetDetector();
// // assuming WaybackRequest (3rd parameter) is not used in getCharset()
// csd.getCharset(res, res, null);
//
// res.close();
// }
// disabled because it seems to be a general assumption that there exists no
// ARC records for FTP (or non-HTTP).
// /**
// * ARC record for ftp fetches.
// * @throws Exception
// */
// public void testResourceRecord() throws Exception {
// final String ct = "text/plain";
// final byte[] block = "blahblahblah\n".getBytes();
// WARCRecordInfo recinfo = new TestWARCRecordInfo(block);
// //recinfo.setType(WARCRecordType.resource);
// recinfo.setUrl("ftp://ftp.example.com/afile.txt");
// recinfo.setMimetype(ct);
// TestARCReader ar = new TestARCReader(recinfo);
// ARCRecord rec = (ARCRecord)ar.get(0);
// ArcResource res = new ArcResource(rec, ar);
// res.parseHeaders();
//
// int scode = res.getStatusCode();
// assertEquals("statusCode", 200, scode);
//
// Map<String, String> headers = res.getHttpHeaders();
// assertNotNull("headers", headers);
//
// assertEquals("content-type", ct, res.getHeader("Content-Type"));
//
// // must have Date header, in HTTP Date format.
// String date = res.getHeader("Date");
// assertNotNull("has date header", date);
// new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z").parse(date);
//
// res.close();
// }
}