/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.core.test; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.carrot2.core.Controller; import org.carrot2.core.Document; import org.carrot2.core.IDocumentSource; import org.carrot2.core.ProcessingResult; import org.carrot2.core.attribute.AttributeNames; import org.carrot2.core.test.assertions.Carrot2CoreAssertions; import org.carrot2.util.StringUtils; import org.carrot2.util.tests.UsesExternalServices; import org.junit.Test; import com.carrotsearch.randomizedtesting.RandomizedContext; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering; import org.carrot2.shaded.guava.common.base.Function; import org.carrot2.shaded.guava.common.collect.Lists; import org.carrot2.shaded.guava.common.collect.Maps; import static org.junit.Assert.*; /** * Common tests for {@link IDocumentSource}s that accept a string query. */ public abstract class QueryableDocumentSourceTestBase<T extends IDocumentSource> extends DocumentSourceTestBase<T> { @UsesExternalServices @Test public void testNoResultsQuery() throws Exception { runAndCheckNoResultsQuery(); } @UsesExternalServices @Test public void testSmallQuery() throws Exception { runAndCheckMinimumResults(getSmallQueryText(), getSmallQuerySize(), getSmallQuerySize() / 2); } @UsesExternalServices @Test public void testUtfCharacters() throws Exception { assumeTrue(hasUtfResults()); runAndCheckMinimumResults("kaczyński", getSmallQuerySize(), getSmallQuerySize() / 2); } @UsesExternalServices @Test public void testLargeQuery() throws Exception { runAndCheckMinimumResults(getLargeQueryText(), getLargeQuerySize(), getLargeQuerySize() / 2); } @UsesExternalServices @Test public void testResultsTotal() throws Exception { assumeTrue(hasTotalResultsEstimate()); runQuery(getSmallQueryText(), getSmallQuerySize()); assertNotNull(resultAttributes.get(AttributeNames.RESULTS_TOTAL)); assertTrue((Long) resultAttributes.get(AttributeNames.RESULTS_TOTAL) > 0); } @UsesExternalServices @Test public void testURLsUnique() throws Exception { assumeTrue(mustReturnUniqueUrls()); runQuery(getLargeQueryText(), getLargeQuerySize()); assertFieldUnique(getDocuments(), Document.CONTENT_URL); } @UsesExternalServices @Test public void testHtmlUnescaping() { assumeTrue(canReturnEscapedHtml()); runQuery("test", getSmallQuerySize()); final List<Document> documents = getDocuments(); int i = 0; for (Document document : documents) { Carrot2CoreAssertions.assertThat(document) .as("doc[" + i++ + "]").stringFieldsDoNotMatchPattern(".*<.*"); } } @UsesExternalServices @Test @ThreadLeakLingering(linger = 2000) @SuppressWarnings("unchecked") public void testInCachingController() throws InterruptedException, ExecutionException { final Map<String, Object> attributes = Maps.newHashMap(); attributes.put(AttributeNames.QUERY, getSmallQueryText()); attributes.put(AttributeNames.RESULTS, getSmallQuerySize()); // Cache results from all DataSources final Controller controller = getCachingController(initAttributes, IDocumentSource.class); int count = 3; final ExecutorService executorService = Executors.newFixedThreadPool(count); try { List<Callable<ProcessingResult>> callables = Lists.newArrayList(); for (int i = 0; i < count; i++) { callables.add(new Callable<ProcessingResult>() { public ProcessingResult call() throws Exception { Map<String, Object> localAttributes = Maps.newHashMap(attributes); return controller.process(localAttributes, getComponentClass()); } }); } final List<Future<ProcessingResult>> results = executorService.invokeAll(callables); List<Document> documents = null; int index = 0; for (Future<ProcessingResult> future : results) { ProcessingResult processingResult = future.get(); final List<Document> documentsLocal = (List<Document>) processingResult .getAttributes().get(AttributeNames.DOCUMENTS); assertThat(documentsLocal).as("documents at " + index).isNotNull(); if (!canReturnMoreResultsThanRequested()) { assertThat(documentsLocal.size()).as("documents.size() at " + index) .isLessThanOrEqualTo(getSmallQuerySize()); } assertThat(documentsLocal.size()).as("documents.size() at " + index) .isGreaterThanOrEqualTo(getSmallQuerySize() / 2); // Should have same documents (from the cache) if (documents != null) { for (int i = 0; i < documents.size(); i++) { assertSame(documents.get(i), documentsLocal.get(i)); } } documents = documentsLocal; index++; } } finally { controller.dispose(); executorService.shutdown(); } } /** * Override to switch on checking non-English results. */ protected boolean hasUtfResults() { return false; } /** * Override to customize small query size. */ protected int getSmallQuerySize() { return 50; } /** * Override to customize small query text. */ protected String getSmallQueryText() { return "blog"; } /** * Override to customize large query size. */ protected int getLargeQuerySize() { return 300; } /** * Return <code>true</code> if the source can return more results than requested. */ protected boolean canReturnMoreResultsThanRequested() { return false; } /** * Override to customize large query text. */ protected String getLargeQueryText() { return "test"; } /** * Override to switch checking of total results estimates. */ protected boolean hasTotalResultsEstimate() { return true; } /** * Override to switch checking of HTML unescaping. */ protected boolean canReturnEscapedHtml() { return true; } /** * Override to switch checking of URL uniqueness. */ protected boolean mustReturnUniqueUrls() { return true; } /** * Override to customize no results query. */ protected String getNoResultsQueryText() { return getNoResultsQuery(); } /** * Override to customize no results query. */ public static String getNoResultsQuery() { final int words = 5; final int chars = 8; final Random random = RandomizedContext.current().getRandom(); final StringBuilder query = new StringBuilder(); for (int i = 0; i < words; i++) { for (int j = 0; j < chars; j++) { query.append((char) ('a' + random.nextInt('z' - 'a'))); } query.append(random.nextInt(1000000)); query.append(' '); } return query.toString(); } protected void runAndCheckMinimumResults(String query, int resultsToRequest, int minimumExpectedResults) { int actualResults = runQuery(query, resultsToRequest); assertThat(actualResults).isGreaterThanOrEqualTo(minimumExpectedResults); } protected void runAndCheckNoResultsQuery() { runAndCheckNoResultsQuery(getSmallQuerySize()); } protected void runAndCheckNoResultsQuery(int size) { final int results = runQuery(getNoResultsQueryText(), size); if (results != 0) { final List<Document> documents = getDocuments(); final String urls = StringUtils.toString(Lists.transform(documents, new Function<Document, String>() { public String apply(Document document) { return document.getField(Document.CONTENT_URL); } }), ", "); fail("Expected 0 results but found: " + results + " (urls: " + urls + ")"); } } }