package org.apache.lucene.analysis.core; import java.io.StringReader; import java.util.HashMap; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.solr.analysis.AcronymTokenFilterFactory; public class TestAcronymFilter extends BaseTokenStreamTestCase { public void testReplace() throws Exception { AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(new HashMap<String,String>() {{ put("emitBoth", "false"); put("prefix", "acr::"); put("setType", "ACRONYM"); }}); factory.setExplicitLuceneMatchVersion(true); TokenStream stream = factory.create( whitespaceMockTokenizer(new StringReader("mit MIT")) ); assertTokenStreamContents(stream, new String[] { "mit", "acr::MIT" }, new int[] { 1, 1 } ); stream = factory.create( whitespaceMockTokenizer(new StringReader("mit MIT")) ); assertTokenStreamContents(stream, new String[] { "mit", "acr::MIT" }, new String[] { TypeAttribute.DEFAULT_TYPE, "ACRONYM" } ); } public void testAdd() throws Exception { AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(new HashMap<String,String>() {{ put("emitBoth", "true"); put("prefix", "acr::"); put("setType", "ACRONYM"); }}); factory.setExplicitLuceneMatchVersion(true); TokenStream stream = factory.create(whitespaceMockTokenizer(new StringReader("M MIT"))); assertTokenStreamContents(stream, new String[] { "M", "MIT", "acr::MIT" }, new int[] { 1, 1, 0 } ); stream = factory.create(whitespaceMockTokenizer(new StringReader("M MIT"))); assertTokenStreamContents(stream, new String[] { "M", "MIT", "acr::MIT" }, new String[] { TypeAttribute.DEFAULT_TYPE, TypeAttribute.DEFAULT_TYPE, "ACRONYM" } ); } public void testMixedCases() throws Exception { AcronymTokenFilterFactory factory = new AcronymTokenFilterFactory(new HashMap<String,String>() {{ put("emitBoth", "true"); put("prefix", "acr::"); put("setType", "ACRONYM"); }}); factory.setExplicitLuceneMatchVersion(true); TokenStream stream = factory.create(whitespaceMockTokenizer(new StringReader("DiRAC"))); assertTokenStreamContents(stream, new String[] {"DiRAC", "acr::DiRAC" }, new String[] { TypeAttribute.DEFAULT_TYPE, "ACRONYM" } ); stream = factory.create(whitespaceMockTokenizer(new StringReader("DiRAc"))); assertTokenStreamContents(stream, new String[] {"DiRAc" }, new String[] { TypeAttribute.DEFAULT_TYPE } ); stream = factory.create(whitespaceMockTokenizer(new StringReader("DDDDDiRAc5"))); assertTokenStreamContents(stream, new String[] {"DDDDDiRAc5", "acr::DDDDDiRAc5" }, new String[] { TypeAttribute.DEFAULT_TYPE, "ACRONYM" } ); } }