/* * This file is part of the Heritrix web crawler (crawler.archive.org). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.modules.canonicalize; import org.apache.commons.httpclient.URIException; import org.archive.util.TmpDirTestCase; /** * Test canonicalization * * @contributor stack */ public class RulesCanonicalizationPolicyTest extends TmpDirTestCase { private RulesCanonicalizationPolicy policy; protected void setUp() throws Exception { super.setUp(); policy = new RulesCanonicalizationPolicy(); // this.rules = new ArrayList<CanonicalizationRule>(); // this.rules.add(new LowercaseRule()); // this.rules.add(new StripUserinfoRule()); // this.rules.add(new StripWWWRule()); // this.rules.add(new StripSessionIDs()); // this.rules.add(new FixupQueryString()); } public void testCanonicalize() throws URIException { final String scheme = "http://"; final String nonQueryStr = "archive.org/index.html"; final String result = scheme + nonQueryStr; assertTrue("Mangled original", result.equals( policy.canonicalize(result))); String tmp = scheme + "www." + nonQueryStr; assertTrue("Mangled www", result.equals( policy.canonicalize(tmp))); tmp = scheme + "www." + nonQueryStr + "?jsessionid=01234567890123456789012345678901"; assertTrue("Mangled sessionid", result.equals( policy.canonicalize(tmp))); tmp = scheme + "www." + nonQueryStr + "?jsessionid=01234567890123456789012345678901"; assertTrue("Mangled sessionid", result.equals( policy.canonicalize(tmp))); } }