/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.ingestion.google.webmaster; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.commons.lang3.tuple.Triple; import org.testng.Assert; import org.testng.annotations.Test; import gobblin.ingestion.google.webmaster.GoogleWebmasterFilter.FilterOperator; @Test(groups = {"gobblin.source.extractor.extract.google.webmaster"}) public class UrlTriePrefixGrouperTest { private String _property = "www.linkedin.com/"; /** * The trie is: * / * 0 * 1* 2* */ @Test public void testGrouping1() { UrlTrie trie = new UrlTrie(_property, Arrays.asList(_property + "01", _property + "02")); UrlTriePrefixGrouper grouper = new UrlTriePrefixGrouper(trie, 1); ArrayList<String> chars = new ArrayList<>(); ArrayList<FilterOperator> operators = new ArrayList<>(); while (grouper.hasNext()) { Triple<String, FilterOperator, UrlTrieNode> group = grouper.next(); chars.add(group.getLeft()); operators.add(group.getMiddle()); } Assert.assertEquals(new String[]{_property + "01", _property + "02"}, chars.toArray()); Assert.assertEquals(new FilterOperator[]{FilterOperator.CONTAINS, FilterOperator.CONTAINS}, operators.toArray()); } /** * The trie is: * / * 0* * 1* 2* */ @Test public void testGrouping2() { UrlTrie trie = new UrlTrie(_property, Arrays.asList(_property + "0", _property + "01", _property + "02")); UrlTriePrefixGrouper grouper = new UrlTriePrefixGrouper(trie, 1); ArrayList<String> chars = new ArrayList<>(); ArrayList<FilterOperator> operators = new ArrayList<>(); while (grouper.hasNext()) { Triple<String, FilterOperator, UrlTrieNode> group = grouper.next(); chars.add(group.getLeft()); operators.add(group.getMiddle()); } Assert.assertEquals(new String[]{_property + "01", _property + "02", _property + "0"}, chars.toArray()); Assert.assertEquals(new FilterOperator[]{FilterOperator.CONTAINS, FilterOperator.CONTAINS, FilterOperator.EQUALS}, operators.toArray()); } /** * The trie is: * / * 0 1 2 * 3 4 5 6 * 7 */ @Test public void testTrie2GroupingWithSize3() { UrlTrie trie = UrlTriePostOrderIteratorTest.getUrlTrie2(_property); UrlTriePrefixGrouper grouper = new UrlTriePrefixGrouper(trie, 3); ArrayList<String> chars = new ArrayList<>(); ArrayList<FilterOperator> operators = new ArrayList<>(); Triple<String, FilterOperator, UrlTrieNode> group = null; while (grouper.hasNext()) { group = grouper.next(); chars.add(group.getLeft()); operators.add(group.getMiddle()); } Assert.assertEquals( new String[]{_property + "0", _property + "1", _property + "25", _property + "26", _property + "2"}, chars.toArray()); Assert.assertEquals( new FilterOperator[]{FilterOperator.CONTAINS, FilterOperator.CONTAINS, FilterOperator.CONTAINS, FilterOperator.CONTAINS, FilterOperator.EQUALS}, operators.toArray()); //The group is at www.linkedin.com/2 in the end with operator EQUALS ArrayList<String> pages = UrlTriePrefixGrouper.groupToPages(group); Assert.assertEquals(pages.toArray(), new String[]{_property + "2"}); } @Test public void testGroupToPagesWithContainsOperator() { List<String> pages = Arrays.asList(_property + "13", _property + "14"); UrlTrie trie = new UrlTrie(_property, pages); ArrayList<String> actual = UrlTriePrefixGrouper.groupToPages(Triple.of(_property, FilterOperator.CONTAINS, trie.getRoot())); Assert.assertEquals(actual.toArray(), pages.toArray()); } @Test public void testGroupToPagesWithContainsOperator2() { List<String> pages = Arrays.asList(_property + "13", _property + "14", _property + "1", _property + "1"); UrlTrie trie = new UrlTrie(_property, pages); ArrayList<String> actual = UrlTriePrefixGrouper.groupToPages(Triple.of(_property, FilterOperator.CONTAINS, trie.getRoot())); Assert.assertEquals(actual.toArray(), new String[]{_property + "13", _property + "14", _property + "1"}); } @Test public void testGroupToPagesWithEqualsOperator() { List<String> pages = Arrays.asList(_property + "13", _property + "14"); UrlTrie trie1 = new UrlTrie(_property, pages); ArrayList<String> actual1 = UrlTriePrefixGrouper.groupToPages(Triple.of(_property, FilterOperator.EQUALS, trie1.getRoot())); Assert.assertEquals(actual1.size(), 0); List<String> pagesWithRoot = new ArrayList<>(); pagesWithRoot.addAll(pages); pagesWithRoot.add(_property); UrlTrie trie2 = new UrlTrie(_property, pagesWithRoot); ArrayList<String> actual2 = UrlTriePrefixGrouper.groupToPages(Triple.of(_property, FilterOperator.EQUALS, trie2.getRoot())); Assert.assertEquals(actual2.toArray(), new String[]{_property}); } @Test public void testWhenTrieSizeLessThanGroupSize1() { List<String> pages = Arrays.asList(_property + "13"); UrlTrie trie1 = new UrlTrie(_property, pages); UrlTriePrefixGrouper grouper = new UrlTriePrefixGrouper(trie1, 1); Triple<String, FilterOperator, UrlTrieNode> next = grouper.next(); Assert.assertEquals(next.getLeft(), _property); Assert.assertEquals(next.getMiddle(), FilterOperator.CONTAINS); Assert.assertEquals(next.getRight().getValue(), Character.valueOf('/')); Assert.assertFalse(next.getRight().isExist()); Assert.assertFalse(grouper.hasNext()); } @Test public void testWhenTrieSizeLessThanGroupSize2() { List<String> pages = Arrays.asList(_property + "13"); UrlTrie trie1 = new UrlTrie(_property, pages); UrlTriePrefixGrouper grouper = new UrlTriePrefixGrouper(trie1, 2); Triple<String, FilterOperator, UrlTrieNode> next = grouper.next(); Assert.assertEquals(next.getLeft(), _property); Assert.assertEquals(next.getMiddle(), FilterOperator.CONTAINS); Assert.assertEquals(next.getRight().getValue(), Character.valueOf('/')); Assert.assertFalse(next.getRight().isExist()); Assert.assertFalse(grouper.hasNext()); } // @Test // public void fun() throws FileNotFoundException { // UrlTrie trie = new UrlTrie("https://" + _property, new ArrayList<String>()); // FileReader fileReader = new FileReader(new File("/Users/chguo/projects/seo/src/main/java/test/output2.txt")); // try (BufferedReader br = new BufferedReader(fileReader)) { // String line; // while ((line = br.readLine()) != null) { // trie.add(line); // } // } catch (IOException e) { // e.printStackTrace(); // } // // UrlTriePrefixGrouper _grouper = new UrlTriePrefixGrouper(trie, 3); //// ArrayList<String> chars = new ArrayList<>(); //// ArrayList<FilterOperator> operators = new ArrayList<>(); // while (_grouper.hasNext()) { // Triple<String, FilterOperator, UrlTrieNode> group = _grouper.next(); // System.out.println(group.getLeft() + " " + group.getMiddle() + " " + group.getRight().getSize()); // } // } }