/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.text.linguistic; import static org.junit.Assert.*; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Collections; import org.carrot2.core.Controller; import org.carrot2.core.ControllerFactory; import org.carrot2.core.LanguageCode; import org.carrot2.core.ProcessingComponentBase; import org.carrot2.core.ProcessingException; import org.carrot2.core.ProcessingResult; import org.carrot2.core.attribute.Processing; import org.carrot2.shaded.guava.common.collect.ImmutableMap; import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline; import org.carrot2.text.util.MutableCharArray; import org.carrot2.util.attribute.Attribute; import org.carrot2.util.attribute.AttributeUtils; import org.carrot2.util.attribute.Bindable; import org.carrot2.util.attribute.Output; import org.carrot2.util.resource.DirLocator; import org.carrot2.util.resource.IResourceLocator; import org.carrot2.util.resource.ResourceLookup; import org.carrot2.util.resource.ResourceLookup.Location; import org.carrot2.util.tests.CarrotTestCase; import org.junit.Test; import com.carrotsearch.randomizedtesting.LifecycleScope; import com.carrotsearch.randomizedtesting.RandomizedTest; /** * Tests {@link ILexicalData}. */ public class DefaultLexicalDataFactoryTest extends CarrotTestCase { /** * Binds basic preprocessing pipeline. */ @Bindable public static class TestComponent extends ProcessingComponentBase { /** * Basic preprocessing pipeline. */ public final BasicPreprocessingPipeline preprocessingPipeline = new BasicPreprocessingPipeline(); /** * Expose the lexical data for English. */ @Processing @Output @Attribute(key = "english") public ILexicalData english; @Override public void process() throws ProcessingException { english = preprocessingPipeline.lexicalDataFactory.getLexicalData(LanguageCode.ENGLISH); } } /** * Two controllers created with the same {@link DefaultLexicalDataFactory#resourceLookup} * should share parsed {@link ILexicalData}. */ @Test public void testLexicalDataFromTheSameResourceDirIsShared() throws IOException { final ILexicalData lexicalData1; final ILexicalData lexicalData2; // Use ctrl1 { final Controller ctrl = ControllerFactory.createPooling(); final ProcessingResult result = ctrl.process( Collections.<String, Object> emptyMap(), TestComponent.class); lexicalData1 = result.getAttribute("english"); } // Use ctrl1 { final Controller ctrl = ControllerFactory.createPooling(); final ProcessingResult result = ctrl.process( Collections.<String, Object> emptyMap(), TestComponent.class); lexicalData2 = result.getAttribute("english"); } assertSame(lexicalData1, lexicalData2); } /** * Lexical data from a given location can be reloaded on-demand. This affects all * pooled controllers, even if they have initialized earlier (lexical resources are * shared). */ @Test public void testLexicalDataIsReloadedOnDemand() throws IOException { final Path tempDir1 = newTempDir(LifecycleScope.TEST); Files.write(tempDir1.resolve("stopwords.en"), "uniquea".getBytes(StandardCharsets.UTF_8)); final String resourceLookupKey = AttributeUtils.getKey( DefaultLexicalDataFactory.class, "resourceLookup"); final String reloadResourcesKey = AttributeUtils.getKey( DefaultLexicalDataFactory.class, "reloadResources"); final IResourceLocator classpathLocator = Location.CONTEXT_CLASS_LOADER.locator; // Create pooling controller, use tempDir1 final Controller ctrl1 = ControllerFactory.createPooling(); final ILexicalData data1; { ctrl1.init(ImmutableMap.<String, Object> of( resourceLookupKey, new ResourceLookup(new DirLocator(tempDir1), classpathLocator))); final ProcessingResult result = ctrl1.process( Collections.<String, Object> emptyMap(), TestComponent.class); data1 = result.getAttribute("english"); assertTrue(data1.isCommonWord(new MutableCharArray("uniquea"))); } // Create another pooling controller, same folder, but different resource lookup. final Controller ctrl2 = ControllerFactory.createPooling(); final ILexicalData data2; { ctrl2.init(ImmutableMap.<String, Object> of( resourceLookupKey, new ResourceLookup(new DirLocator(tempDir1), classpathLocator))); final ProcessingResult result = ctrl2.process( Collections.<String, Object> emptyMap(), TestComponent.class); data2 = result.getAttribute("english"); assertTrue(data2.isCommonWord(new MutableCharArray("uniquea"))); assertSame(data1, data2); } /* * Now force reloading of resources from that path on ctrl1. The new stop word resource * should contain 'uniqueb'. */ Files.write(tempDir1.resolve("stopwords.en"), "uniqueb".getBytes(StandardCharsets.UTF_8)); final ILexicalData data3 = ctrl1.process( ImmutableMap.<String, Object> of(reloadResourcesKey, true), TestComponent.class) .getAttribute("english"); assertNotSame(data1, data3); assertFalse(data3.isCommonWord(new MutableCharArray("uniquea"))); assertTrue(data3.isCommonWord(new MutableCharArray("uniqueb"))); /* * But since it's the same location, all other controllers should now see updated resources * (and share the same lexical data). */ final ILexicalData data4 = ctrl2.process( Collections.<String, Object> emptyMap(), TestComponent.class).getAttribute("english"); assertSame(data3, data4); } /** * Two controllers with different {@link DefaultLexicalDataFactory#resourceLookup} * should not affect each other's resources. */ @Test public void testSeparateLexicalDataForDifferentResourceLookup() throws IOException { final Path tempDir1 = RandomizedTest.newTempDir(LifecycleScope.TEST); Files.write(tempDir1.resolve("stopwords.en"), "uniquea".getBytes(StandardCharsets.UTF_8)); final Path tempDir2 = RandomizedTest.newTempDir(LifecycleScope.TEST); Files.write(tempDir2.resolve("stopwords.en"), "uniqueb".getBytes(StandardCharsets.UTF_8)); final IResourceLocator classpathLocator = Location.CONTEXT_CLASS_LOADER.locator; final String resourceLookupKey = AttributeUtils.getKey(DefaultLexicalDataFactory.class, "resourceLookup"); final String resourceReloadKey = AttributeUtils.getKey(DefaultLexicalDataFactory.class, "reloadResources"); // Create pooling controller, use tempDir1 final Controller ctrl1 = ControllerFactory.createPooling(); { ctrl1.init(ImmutableMap.<String, Object> of( resourceLookupKey, new ResourceLookup(new DirLocator(tempDir1), classpathLocator), resourceReloadKey, true)); final ProcessingResult result = ctrl1.process( Collections.<String, Object> emptyMap(), TestComponent.class); final ILexicalData data = result.getAttribute("english"); assertTrue(data.isCommonWord(new MutableCharArray("uniquea"))); assertFalse(data.isCommonWord(new MutableCharArray("uniqueb"))); } // Create pooling controller, use tempDir2 final Controller ctrl2 = ControllerFactory.createPooling(); { ctrl2.init(ImmutableMap.<String, Object> of(resourceLookupKey, new ResourceLookup(new DirLocator(tempDir2), classpathLocator))); final ProcessingResult result = ctrl2.process( Collections.<String, Object> emptyMap(), TestComponent.class); final ILexicalData data = result.getAttribute("english"); assertFalse(data.isCommonWord(new MutableCharArray("uniquea"))); assertTrue(data.isCommonWord(new MutableCharArray("uniqueb"))); } // Now, reuse the first controller, nothing should change. { final ProcessingResult result = ctrl1.process( Collections.<String, Object> emptyMap(), TestComponent.class); final ILexicalData data = result.getAttribute("english"); assertTrue(data.isCommonWord(new MutableCharArray("uniquea"))); assertFalse(data.isCommonWord(new MutableCharArray("uniqueb"))); } } }