/* * This file is part of the Heritrix web crawler (crawler.archive.org). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.surt; import junit.framework.TestCase; import org.apache.commons.httpclient.URIException; /** * * * @author brad * @version $Date$, $Revision$ */ public class SURTTokenizerTest extends TestCase { SURTTokenizer tok; /** * Test method for 'org.archive.wayback.accesscontrol.SURTTokenizer.nextSearch()' */ public void testSimple() { tok = toSurtT("http://www.archive.org/foo"); assertEquals("(org,archive,www,)/foo\t",tok.nextSearch()); assertEquals("(org,archive,www,)/foo",tok.nextSearch()); assertEquals("(org,archive,www,",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testSlashPath() { tok = toSurtT("http://www.archive.org/"); assertEquals("(org,archive,www,)/\t",tok.nextSearch()); assertEquals("(org,archive,www,)/",tok.nextSearch()); assertEquals("(org,archive,www,",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testEmptyPath() { tok = toSurtT("http://www.archive.org"); assertEquals("(org,archive,www,)/\t",tok.nextSearch()); assertEquals("(org,archive,www,)/",tok.nextSearch()); assertEquals("(org,archive,www,",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testEmptyPathMore() { tok = toSurtT("http://brad.www.archive.org"); assertEquals("(org,archive,www,brad,)/\t",tok.nextSearch()); assertEquals("(org,archive,www,brad,)/",tok.nextSearch()); assertEquals("(org,archive,www,brad,",tok.nextSearch()); assertEquals("(org,archive,www,brad",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testLongPathMore() { tok = toSurtT("http://brad.www.archive.org/one/two"); assertEquals("(org,archive,www,brad,)/one/two\t",tok.nextSearch()); assertEquals("(org,archive,www,brad,)/one/two",tok.nextSearch()); assertEquals("(org,archive,www,brad,)/one",tok.nextSearch()); assertEquals("(org,archive,www,brad,",tok.nextSearch()); assertEquals("(org,archive,www,brad",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testShortPathHash() { tok = toSurtT("http://www.archive.org/one/two#hash"); assertEquals("(org,archive,www,)/one/two\t",tok.nextSearch()); assertEquals("(org,archive,www,)/one/two",tok.nextSearch()); assertEquals("(org,archive,www,)/one",tok.nextSearch()); assertEquals("(org,archive,www,",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testCGI1() { tok = toSurtT("http://www.archive.org/cgi?foobar"); assertEquals("(org,archive,www,)/cgi?foobar\t",tok.nextSearch()); assertEquals("(org,archive,www,)/cgi?foobar",tok.nextSearch()); assertEquals("(org,archive,www,)/cgi",tok.nextSearch()); assertEquals("(org,archive,www,",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testPort() { tok = toSurtT("http://www.archive.org:8080/cgi?foobar"); assertEquals("(org,archive,www,:8080)/cgi?foobar\t",tok.nextSearch()); assertEquals("(org,archive,www,:8080)/cgi?foobar",tok.nextSearch()); assertEquals("(org,archive,www,:8080)/cgi",tok.nextSearch()); assertEquals("(org,archive,www,:8080",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testLogin() { tok = toSurtT("http://brad@www.archive.org/cgi?foobar"); assertEquals("(org,archive,www,@brad)/cgi?foobar\t",tok.nextSearch()); assertEquals("(org,archive,www,@brad)/cgi?foobar",tok.nextSearch()); assertEquals("(org,archive,www,@brad)/cgi",tok.nextSearch()); assertEquals("(org,archive,www,@brad",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } /** test */ public void testLoginPass() { tok = toSurtT("http://brad:pass@www.archive.org/cgi?foobar"); assertEquals("(org,archive,www,@brad:pass)/cgi?foobar\t",tok.nextSearch()); assertEquals("(org,archive,www,@brad:pass)/cgi?foobar",tok.nextSearch()); assertEquals("(org,archive,www,@brad:pass)/cgi",tok.nextSearch()); assertEquals("(org,archive,www,@brad:pass",tok.nextSearch()); assertEquals("(org,archive,www",tok.nextSearch()); assertEquals("(org,archive",tok.nextSearch()); assertEquals("(org",tok.nextSearch()); assertNull(tok.nextSearch()); } // /** test */ // leave this guy out for now: was a bug in Heritrix thus archive-commons // wait for new jar... // public void testLoginPassPort() { // tok = toSurtT("http://brad:pass@www.archive.org:8080/cgi?foobar"); // assertEquals("(org,archive,www,:8080@brad:pass)/cgi?foobar\t",tok.nextSearch()); // assertEquals("(org,archive,www,:8080@brad:pass)/cgi?foobar",tok.nextSearch()); // assertEquals("(org,archive,www,:8080@brad:pass)/cgi",tok.nextSearch()); // assertEquals("(org,archive,www,:8080@brad:pass",tok.nextSearch()); // assertEquals("(org,archive,www,:8080",tok.nextSearch()); // assertEquals("(org,archive,www",tok.nextSearch()); // assertEquals("(org,archive",tok.nextSearch()); // assertEquals("(org",tok.nextSearch()); // assertNull(tok.nextSearch()); // } // private SURTTokenizer toSurtT(final String u) { SURTTokenizer tok = null; try { tok = new SURTTokenizer(u); } catch (URIException e) { e.printStackTrace(); assertFalse("URL Exception " + e.getLocalizedMessage(),true); } return tok; } }