/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.preprocessing; import org.junit.Before; import org.junit.Test; /** * Test cases for {@link SuffixSorter}. */ public class SuffixSorterTest extends PreprocessingComponentTestBase { /** Suffix sorter under tests */ private SuffixSorter suffixSorter; /** Other preprocessing components required for the test */ private Tokenizer tokenizer; private CaseNormalizer caseNormalizer; @Before public void setUpPreprocessingComponents() { tokenizer = new Tokenizer(); caseNormalizer = new CaseNormalizer(); suffixSorter = new SuffixSorter(); } @Test public void testEmpty() { // Do not add any documents to the rawDocuments list int [] expectedSuffixOrder = new int [] { 0 }; int [] expectedLcpArray = new int [] { 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } @Test public void testEmptySnippet() { createDocuments((String) null); int [] expectedSuffixOrder = new int [] { 0 }; int [] expectedLcpArray = new int [] { 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } @Test public void testEmptyBody() { createDocuments("a"); int [] expectedSuffixOrder = new int [] { 0, 1 }; int [] expectedLcpArray = new int [] { 0, 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } @Test public void testEmptyTitle() { createDocuments((String) null, "a"); int [] expectedSuffixOrder = new int [] { 0, 1 }; int [] expectedLcpArray = new int [] { 0, 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } @Test public void testOnePhrase() { createDocuments("a b", "a b"); int [] expectedSuffixOrder = new int [] { 1, 4, 0, 3, 2, 5 }; int [] expectedLcpArray = new int [] { 0, 1, 0, 2, 0, 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } @Test public void testPunctuation() { createDocuments("a . b", "a . b"); int [] expectedSuffixOrder = new int [] { 2, 6, 0, 4, 1, 3, 5, 7 }; int [] expectedLcpArray = new int [] { 0, 1, 0, 1, 0, 0, 0, 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } @Test public void testMoreTokens() { createDocuments("a b c d e f g h i j k l m n o p q r", null); int [] expectedSuffixOrder = new int [] { 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 18 }; int [] expectedLcpArray = new int [] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; checkAsserts(expectedSuffixOrder, expectedLcpArray); } private void checkAsserts(int [] expectedSuffixOrder, int [] expectedLcpArray) { tokenizer.tokenize(context); caseNormalizer.normalize(context); suffixSorter.suffixSort(context); assertThat(context.allTokens.suffixOrder).as("allTokens.suffixOrder").isEqualTo( expectedSuffixOrder); assertThat(context.allTokens.lcp).as("allTokens.lcp").isEqualTo(expectedLcpArray); } }