/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.phonetic; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.commons.codec.language.Caverphone2; import org.apache.commons.codec.language.Metaphone; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.util.ClasspathResourceLoader; public class TestPhoneticFilterFactory extends BaseTokenStreamTestCase { /** * Case: default */ public void testFactoryDefaults() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Metaphone"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Metaphone); assertTrue(factory.inject); // default } public void testInjectFalse() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Metaphone"); args.put(PhoneticFilterFactory.INJECT, "false"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertFalse(factory.inject); } public void testMaxCodeLength() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Metaphone"); args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen()); } /** * Case: Failures and Exceptions */ public void testMissingEncoder() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { new PhoneticFilterFactory(new HashMap<String,String>()); }); assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'encoder'")); } public void testUnknownEncoder() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { Map<String,String> args = new HashMap<>(); args.put("encoder", "XXX"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); }); assertTrue(expected.getMessage().contains("Error loading encoder")); } public void testUnknownEncoderReflection() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { Map<String,String> args = new HashMap<>(); args.put("encoder", "org.apache.commons.codec.language.NonExistence"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); }); assertTrue(expected.getMessage().contains("Error loading encoder")); } /** * Case: Reflection */ public void testFactoryReflection() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Metaphone); assertTrue(factory.inject); // default } /** * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone, * so this effectively tests reflection without package name */ public void testFactoryReflectionCaverphone2() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Caverphone2"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Caverphone2); assertTrue(factory.inject); // default } public void testFactoryReflectionCaverphone() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Caverphone"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Caverphone2); assertTrue(factory.inject); // default } public void testAlgorithms() throws Exception { assertAlgorithm("Metaphone", "true", "aaa bbb ccc easgasg", new String[] { "A", "aaa", "B", "bbb", "KKK", "ccc", "ESKS", "easgasg" }); assertAlgorithm("Metaphone", "false", "aaa bbb ccc easgasg", new String[] { "A", "B", "KKK", "ESKS" }); assertAlgorithm("DoubleMetaphone", "true", "aaa bbb ccc easgasg", new String[] { "A", "aaa", "PP", "bbb", "KK", "ccc", "ASKS", "easgasg" }); assertAlgorithm("DoubleMetaphone", "false", "aaa bbb ccc easgasg", new String[] { "A", "PP", "KK", "ASKS" }); assertAlgorithm("Soundex", "true", "aaa bbb ccc easgasg", new String[] { "A000", "aaa", "B000", "bbb", "C000", "ccc", "E220", "easgasg" }); assertAlgorithm("Soundex", "false", "aaa bbb ccc easgasg", new String[] { "A000", "B000", "C000", "E220" }); assertAlgorithm("RefinedSoundex", "true", "aaa bbb ccc easgasg", new String[] { "A0", "aaa", "B1", "bbb", "C3", "ccc", "E034034", "easgasg" }); assertAlgorithm("RefinedSoundex", "false", "aaa bbb ccc easgasg", new String[] { "A0", "B1", "C3", "E034034" }); assertAlgorithm("Caverphone", "true", "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "Darda", "KLN1111111", "Karleen", "TTA1111111", "Datha", "KLN1111111", "Carlene" }); assertAlgorithm("Caverphone", "false", "Darda Karleen Datha Carlene", new String[] { "TTA1111111", "KLN1111111", "TTA1111111", "KLN1111111" }); assertAlgorithm("ColognePhonetic", "true", "Meier Schmitt Meir Schmidt", new String[] { "67", "Meier", "862", "Schmitt", "67", "Meir", "862", "Schmidt" }); assertAlgorithm("ColognePhonetic", "false", "Meier Schmitt Meir Schmidt", new String[] { "67", "862", "67", "862" }); assertAlgorithm("Nysiis", "true", "Macintosh Knuth Bart Hurd", new String[] { "MCANT", "Macintosh", "NAT", "Knuth", "BAD", "Bart", "HAD", "Hurd" }); assertAlgorithm("Nysiis", "false", "Macintosh Knuth Bart Hurd", new String[] { "MCANT", "NAT", "BAD", "HAD" }); } /** Test that bogus arguments result in exception */ public void testBogusArguments() throws Exception { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { new PhoneticFilterFactory(new HashMap<String,String>() {{ put("encoder", "Metaphone"); put("bogusArg", "bogusValue"); }}); }); assertTrue(expected.getMessage().contains("Unknown parameters")); } static void assertAlgorithm(String algName, String inject, String input, String[] expected) throws Exception { Tokenizer tokenizer = whitespaceMockTokenizer(input); Map<String,String> args = new HashMap<>(); args.put("encoder", algName); args.put("inject", inject); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); TokenStream stream = factory.create(tokenizer); assertTokenStreamContents(stream, expected); } }