package org.archive.url;
import org.apache.commons.httpclient.URIException;
import junit.framework.TestCase;
public class URLRegexTransformerTest extends TestCase {
public void testStripPathSessionID() {
// strip jsessionid
// String sid1 = "jsessionid=0123456789abcdefghijklemopqrstuv";
// String sid2 = "PHPSESSID=9682993c8daa2c5497996114facdc805";
// String sid3 = "sid=9682993c8daa2c5497996114facdc805";
// String sid4 = "ASPSESSIONIDAQBSDSRT=EOHBLBDDPFCLHKPGGKLILNAM";
// String sid5 = "CFID=12412453&CFTOKEN=15501799";
// String sid6 = "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A";
//
// String fore = "http://foo.com/bar?bo=lo&";
// String aft = "&gum=yum";
// String want = "foo.com/bar?bo=lo&gum=yum";
// String fore = "http://www.archive.org/index.html?";
// String aft = "";
// String want = "archive.org/index.html";
// Check ASP_SESSIONID2:
checkStripPathSessionID(
"/(S(4hqa0555fwsecu455xqckv45))/mileg.aspx",
"/mileg.aspx");
// Check ASP_SESSIONID2 (again):
checkStripPathSessionID(
"/(4hqa0555fwsecu455xqckv45)/mileg.aspx",
"/mileg.aspx");
// Check ASP_SESSIONID3:
checkStripPathSessionID(
"/(a(4hqa0555fwsecu455xqckv45)S(4hqa0555fwsecu455xqckv45)f(4hqa0555fwsecu455xqckv45))/mileg.aspx?page=sessionschedules",
"/mileg.aspx?page=sessionschedules");
// '@' in path:
checkStripPathSessionID(
"/photos/36050182@N05/",
"/photos/36050182@N05/");
}
private static void checkStripPathSessionID(String orig, String want) {
String got = URLRegexTransformer.stripPathSessionID(orig);
assertTrue(String.format("FAIL Orig(%s) Got(%s) Want(%s)",orig,got,want),want.equals(got));
}
// private static final String BASE = "http://www.archive.org/index.html";
private static final String BASE = "";
public void testStripQuerySessionID() throws URIException {
String str32id = "0123456789abcdefghijklemopqrstuv";
String url = BASE + "?jsessionid=" + str32id;
String expectedResult = BASE + "?";
String result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Test that we don't strip if not 32 chars only.
url = BASE + "?jsessionid=" + str32id + '0';
expectedResult = url;
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Test what happens when followed by another key/value pair.
url = BASE + "?jsessionid=" + str32id + "&x=y";
expectedResult = BASE + "?x=y";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed (" + result + ")", expectedResult.equals(result));
// Test what happens when followed by another key/value pair and
// prefixed by a key/value pair.
url = BASE + "?one=two&jsessionid=" + str32id + "&x=y";
expectedResult = BASE + "?one=two&x=y";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Test what happens when prefixed by a key/value pair.
url = BASE + "?one=two&jsessionid=" + str32id;
expectedResult = BASE + "?one=two&";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Test aspsession.
url = BASE + "?aspsessionidABCDEFGH=" + "ABCDEFGHIJKLMNOPQRSTUVWX"
+ "&x=y";
expectedResult = BASE + "?x=y";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Test archive phpsession.
url = BASE + "?phpsessid=" + str32id + "&x=y";
expectedResult = BASE + "?x=y";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// With prefix too.
url = BASE + "?one=two&phpsessid=" + str32id + "&x=y";
expectedResult = BASE + "?one=two&x=y";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// With only prefix
url = BASE + "?one=two&phpsessid=" + str32id;
expectedResult = BASE + "?one=two&";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Test sid.
url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&x=y";
expectedResult = BASE + "?x=y";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
// Igor test.
url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&" +
"jsessionid=" + str32id;
expectedResult = BASE + "?";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
url = "?CFID=1169580&CFTOKEN=48630702&dtstamp=22%2F08%2F2006%7C06%3A58%3A11";
expectedResult = "?dtstamp=22%2F08%2F2006%7C06%3A58%3A11";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
url = "?CFID=12412453&CFTOKEN=15501799&dt=19_08_2006_22_39_28";
expectedResult = "?dt=19_08_2006_22_39_28";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
url = "?CFID=14475712&CFTOKEN=2D89F5AF-3048-2957-DA4EE4B6B13661AB&r=468710288378&m=forgotten";
expectedResult = "?r=468710288378&m=forgotten";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
url = "?CFID=16603925&CFTOKEN=2AE13EEE-3048-85B0-56CEDAAB0ACA44B8";
expectedResult = "?";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
url = "?CFID=4308017&CFTOKEN=63914124&requestID=200608200458360%2E39414378";
expectedResult = "?requestID=200608200458360%2E39414378";
result = URLRegexTransformer.stripQuerySessionID(url);
assertTrue("Failed " + result, expectedResult.equals(result));
}
public void testSURT() {
assertEquals("org,archive,www",URLRegexTransformer.hostToSURT("www.archive.org"));
}
}