/* * This file is part of the Heritrix web crawler (crawler.archive.org). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.net; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.regex.Matcher; import junit.framework.TestCase; import org.archive.net.PublicSuffixes.Node; /** * Test cases for PublicSuffixes utility. Confirm expected matches/nonmatches * from constructed regex. * * @author gojomo */ public class PublicSuffixesTest extends TestCase { // test of low level implementation private final String NL = System.getProperty("line.separator"); public void testCompare() { Node n = new Node("hoge"); assertTrue(n.compareTo('a') > 0); assertEquals(-1, n.compareTo('*')); assertEquals(-1, n.compareTo('!')); assertEquals(-1, n.compareTo(new Node("*,"))); assertEquals(-1, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); n = new Node("*,"); assertEquals(1, n.compareTo('a')); assertEquals(0, n.compareTo('*')); assertEquals(1, n.compareTo('!')); assertEquals(0, n.compareTo(new Node("*,"))); assertEquals(1, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); n = new Node("!hoge"); assertEquals(1, n.compareTo('a')); assertEquals(-1, n.compareTo('*')); assertEquals(0, n.compareTo('!')); assertEquals(-1, n.compareTo(new Node("*,"))); assertEquals(0, n.compareTo(new Node("!muga,"))); assertEquals(-1, n.compareTo(new Node(""))); n = new Node(""); assertEquals(1, n.compareTo('a')); assertEquals(1, n.compareTo('*')); assertEquals(1, n.compareTo('!')); assertEquals(0, n.compareTo(new Node(""))); } protected String dump(Node alt) { StringWriter w = new StringWriter(); PublicSuffixes.dump(alt, 0, new PrintWriter(w)); return w.toString(); } public void testTrie1() { Node alt = new Node(null, new ArrayList<Node>()); alt.addBranch("ac,"); // specifically, should not have empty string as match. assertEquals("(null)" + NL + " \"ac,\"" + NL, dump(alt)); alt.addBranch("ac,com,"); assertEquals("(null)" + NL + " \"ac,\"" + NL + " \"com,\"" + NL + " \"\"" + NL, dump(alt)); alt.addBranch("ac,edu,"); assertEquals("(null)" + NL + " \"ac,\"" + NL + " \"com,\"" + NL + " \"edu,\"" + NL + " \"\"" + NL, dump(alt)); } public void testTrie2() { Node alt = new Node(null, new ArrayList<Node>()); alt.addBranch("ac,"); alt.addBranch("*,"); assertEquals("(null)" + NL + " \"ac,\"" + NL + " \"*,\"" + NL, dump(alt)); } public void testTrie3() { Node alt = new Node(null, new ArrayList<Node>()); alt.addBranch("ac,"); alt.addBranch("ac,!hoge,"); alt.addBranch("ac,*,"); // exception goes first. assertEquals("(null)" + NL + " \"ac,\"" + NL + " \"!hoge,\"" + NL + " \"*,\"" + NL + " \"\"" + NL, dump(alt)); } // test of higher-level functionality Matcher m = PublicSuffixes.getTopmostAssignedSurtPrefixPattern() .matcher(""); public void testBasics() { matchPrefix("com,example,www,", "com,example,"); matchPrefix("com,example,", "com,example,"); matchPrefix("org,archive,www,", "org,archive,"); matchPrefix("org,archive,", "org,archive,"); matchPrefix("fr,yahoo,www,", "fr,yahoo,"); matchPrefix("fr,yahoo,", "fr,yahoo,"); matchPrefix("au,com,foobar,www,", "au,com,foobar,"); matchPrefix("au,com,foobar,", "au,com,foobar,"); matchPrefix("uk,co,virgin,www,", "uk,co,virgin,"); matchPrefix("uk,co,virgin,", "uk,co,virgin,"); matchPrefix("au,com,example,www,", "au,com,example,"); matchPrefix("au,com,example,", "au,com,example,"); matchPrefix("jp,yokohama,public,assigned,www,", "jp,yokohama,public,assigned,"); matchPrefix("jp,yokohama,public,assigned,", "jp,yokohama,public,assigned,"); } public void testDomainWithDash() { matchPrefix("de,bad-site,www", "de,bad-site,"); } public void testDomainWithNumbers() { matchPrefix("de,archive4u,www", "de,archive4u,"); } public void testIPV4() { assertEquals("unexpected reduction", "1.2.3.4", PublicSuffixes.reduceSurtToAssignmentLevel("1.2.3.4")); } public void testIPV6() { assertEquals("unexpected reduction", "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]", PublicSuffixes.reduceSurtToAssignmentLevel( "[2001:0db8:85a3:08d3:1319:8a2e:0370:7344]")); } public void testExceptions() { matchPrefix("uk,bl,www,", "uk,bl,"); matchPrefix("uk,bl,", "uk,bl,"); matchPrefix("jp,tokyo,city,subdomain,", "jp,tokyo,city,"); matchPrefix("jp,tokyo,city,", "jp,tokyo,city,"); } public void testFakeTLD() { // we assume any new/unknonwn TLD should be assumed as 2-level; // this is preferable for our grouping purpose but might not be // for a cookie-assigning browser (original purpose of publicsuffixlist) matchPrefix("zzz,example,www,", "zzz,example,"); } public void testUnsegmentedHostname() { m.reset("example"); assertFalse("unexpected match found in 'example'", m.find()); } public void testTopmostAssignedCaching() { assertSame("topmostAssignedSurtPrefixPattern not cached",PublicSuffixes.getTopmostAssignedSurtPrefixPattern(),PublicSuffixes.getTopmostAssignedSurtPrefixPattern()); assertSame("topmostAssignedSurtPrefixRegex not cached",PublicSuffixes.getTopmostAssignedSurtPrefixRegex(),PublicSuffixes.getTopmostAssignedSurtPrefixRegex()); } // TODO: test UTF domains? protected void matchPrefix(String surtDomain, String expectedAssignedPrefix) { m.reset(surtDomain); assertTrue("expected match not found in '" + surtDomain, m.find()); assertEquals("expected match not found", expectedAssignedPrefix, m .group()); } }