/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.core; import java.io.Reader; import java.io.StringReader; import java.lang.reflect.Modifier; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.CrankyTokenFilter; import org.apache.lucene.analysis.MockCharFilter; import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; import org.apache.lucene.analysis.MockGraphTokenFilter; import org.apache.lucene.analysis.MockHoleInjectingTokenFilter; import org.apache.lucene.analysis.MockLowerCaseFilter; import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter; import org.apache.lucene.analysis.MockSynonymFilter; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockVariableLengthPayloadFilter; import org.apache.lucene.analysis.SimplePayloadFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ValidatingTokenFilter; import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer; import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.sr.SerbianNormalizationRegularFilter; import org.apache.lucene.analysis.util.CharFilterFactory; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.StringMockResourceLoader; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.Version; /** * Tests that any newly added Tokenizers/TokenFilters/CharFilters have a * corresponding factory (and that the SPI configuration is correct) */ public class TestAllAnalyzersHaveFactories extends LuceneTestCase { // these are test-only components (e.g. test-framework) private static final Set<Class<?>> testComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>()); static { Collections.<Class<?>>addAll(testComponents, MockTokenizer.class, MockCharFilter.class, MockFixedLengthPayloadFilter.class, MockGraphTokenFilter.class, MockHoleInjectingTokenFilter.class, MockLowerCaseFilter.class, MockRandomLookaheadTokenFilter.class, MockSynonymFilter.class, MockTokenFilter.class, MockVariableLengthPayloadFilter.class, ValidatingTokenFilter.class, CrankyTokenFilter.class, SimplePayloadFilter.class ); } // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for these? private static final Set<Class<?>> crazyComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>()); static { Collections.<Class<?>>addAll(crazyComponents, CachingTokenFilter.class, TeeSinkTokenFilter.class ); } // these are oddly-named (either the actual analyzer, or its factory) // they do actually have factories. // TODO: clean this up! private static final Set<Class<?>> oddlyNamedComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>()); static { Collections.<Class<?>>addAll(oddlyNamedComponents, ReversePathHierarchyTokenizer.class, // this is supported via an option to PathHierarchyTokenizer's factory SnowballFilter.class, // this is called SnowballPorterFilterFactory PatternKeywordMarkerFilter.class, SetKeywordMarkerFilter.class, UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory org.apache.lucene.analysis.StopFilter.class, // class from core, but StopFilterFactory creates one from this module org.apache.lucene.analysis.LowerCaseFilter.class // class from core, but LowerCaseFilterFactory creates one from this module ); } // The following token filters are excused from having their factory. private static final Set<Class<?>> tokenFiltersWithoutFactory = new HashSet<>(); static { tokenFiltersWithoutFactory.add(SerbianNormalizationRegularFilter.class); } private static final ResourceLoader loader = new StringMockResourceLoader(""); public void test() throws Exception { List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis"); for (final Class<?> c : analysisClasses) { final int modifiers = c.getModifiers(); if ( // don't waste time with abstract classes Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface() || testComponents.contains(c) || crazyComponents.contains(c) || oddlyNamedComponents.contains(c) || tokenFiltersWithoutFactory.contains(c) || c.isAnnotationPresent(Deprecated.class) // deprecated ones are typically back compat hacks || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c)) ) { continue; } Map<String,String> args = new HashMap<>(); args.put("luceneMatchVersion", Version.LATEST.toString()); if (Tokenizer.class.isAssignableFrom(c)) { String clazzName = c.getSimpleName(); assertTrue(clazzName.endsWith("Tokenizer")); String simpleName = clazzName.substring(0, clazzName.length() - 9); assertNotNull(TokenizerFactory.lookupClass(simpleName)); TokenizerFactory instance = null; try { instance = TokenizerFactory.forName(simpleName, args); assertNotNull(instance); if (instance instanceof ResourceLoaderAware) { ((ResourceLoaderAware) instance).inform(loader); } assertSame(c, instance.create().getClass()); } catch (IllegalArgumentException e) { // TODO: For now pass because some factories have not yet a default config that always works } } else if (TokenFilter.class.isAssignableFrom(c)) { String clazzName = c.getSimpleName(); assertTrue(clazzName.endsWith("Filter")); String simpleName = clazzName.substring(0, clazzName.length() - (clazzName.endsWith("TokenFilter") ? 11 : 6)); assertNotNull(TokenFilterFactory.lookupClass(simpleName)); TokenFilterFactory instance = null; try { instance = TokenFilterFactory.forName(simpleName, args); assertNotNull(instance); if (instance instanceof ResourceLoaderAware) { ((ResourceLoaderAware) instance).inform(loader); } Class<? extends TokenStream> createdClazz = instance.create(new KeywordTokenizer()).getClass(); // only check instance if factory have wrapped at all! if (KeywordTokenizer.class != createdClazz) { assertSame(c, createdClazz); } } catch (IllegalArgumentException e) { // TODO: For now pass because some factories have not yet a default config that always works } } else if (CharFilter.class.isAssignableFrom(c)) { String clazzName = c.getSimpleName(); assertTrue(clazzName.endsWith("CharFilter")); String simpleName = clazzName.substring(0, clazzName.length() - 10); assertNotNull(CharFilterFactory.lookupClass(simpleName)); CharFilterFactory instance = null; try { instance = CharFilterFactory.forName(simpleName, args); assertNotNull(instance); if (instance instanceof ResourceLoaderAware) { ((ResourceLoaderAware) instance).inform(loader); } Class<? extends Reader> createdClazz = instance.create(new StringReader("")).getClass(); // only check instance if factory have wrapped at all! if (StringReader.class != createdClazz) { assertSame(c, createdClazz); } } catch (IllegalArgumentException e) { // TODO: For now pass because some factories have not yet a default config that always works } } } } }