package org.apache.lucene.analysis.core; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.nio.CharBuffer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.MockCharFilter; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.charfilter.MappingCharFilter; import org.apache.lucene.analysis.charfilter.NormalizeCharMap; import org.apache.lucene.analysis.commongrams.CommonGramsFilter; import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenFilter; import org.apache.lucene.analysis.shingle.ShingleFilter; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ @SuppressCodecs("Direct") public class TestBugInSomething extends BaseTokenStreamTestCase { public void test() throws Exception { final CharArraySet cas = new CharArraySet(TEST_VERSION_CURRENT, 3, false); cas.add("jjp"); cas.add("wlmwoknt"); cas.add("tcgyreo"); final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); builder.add("mtqlpi", ""); builder.add("mwoknt", "jjp"); builder.add("tcgyreo", "zpfpajyws"); final NormalizeCharMap map = builder.build(); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer t = new MockTokenizer(MockTokenFilter.ENGLISH_STOPSET, false, -65); TokenFilter f = new CommonGramsFilter(TEST_VERSION_CURRENT, t, cas); return new TokenStreamComponents(t, f); } @Override protected Reader initReader(String fieldName, Reader reader) { reader = new MockCharFilter(reader, 0); reader = new MappingCharFilter(map, reader); reader = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(reader); return reader; } }; checkAnalysisConsistency(random(), a, false, "wmgddzunizdomqyj"); } CharFilter wrappedStream = new CharFilter(new StringReader("bogus")) { @Override public void mark(int readAheadLimit) { throw new UnsupportedOperationException("mark(int)"); } @Override public boolean markSupported() { throw new UnsupportedOperationException("markSupported()"); } @Override public int read() { throw new UnsupportedOperationException("read()"); } @Override public int read(char[] cbuf) { throw new UnsupportedOperationException("read(char[])"); } @Override public int read(CharBuffer target) { throw new UnsupportedOperationException("read(CharBuffer)"); } @Override public boolean ready() { throw new UnsupportedOperationException("ready()"); } @Override public void reset() { throw new UnsupportedOperationException("reset()"); } @Override public long skip(long n) { throw new UnsupportedOperationException("skip(long)"); } @Override public int correct(int currentOff) { throw new UnsupportedOperationException("correct(int)"); } @Override public void close() { throw new UnsupportedOperationException("close()"); } @Override public int read(char[] arg0, int arg1, int arg2) { throw new UnsupportedOperationException("read(char[], int, int)"); } }; public void testWrapping() throws Exception { CharFilter cs = new TestRandomChains.CheckThatYouDidntReadAnythingReaderWrapper(wrappedStream); try { cs.mark(1); fail(); } catch (Exception e) { assertEquals("mark(int)", e.getMessage()); } try { cs.markSupported(); fail(); } catch (Exception e) { assertEquals("markSupported()", e.getMessage()); } try { cs.read(); fail(); } catch (Exception e) { assertEquals("read()", e.getMessage()); } try { cs.read(new char[0]); fail(); } catch (Exception e) { assertEquals("read(char[])", e.getMessage()); } try { cs.read(CharBuffer.wrap(new char[0])); fail(); } catch (Exception e) { assertEquals("read(CharBuffer)", e.getMessage()); } try { cs.reset(); fail(); } catch (Exception e) { assertEquals("reset()", e.getMessage()); } try { cs.skip(1); fail(); } catch (Exception e) { assertEquals("skip(long)", e.getMessage()); } try { cs.correctOffset(1); fail(); } catch (Exception e) { assertEquals("correct(int)", e.getMessage()); } try { cs.close(); fail(); } catch (Exception e) { assertEquals("close()", e.getMessage()); } try { cs.read(new char[0], 0, 0); fail(); } catch (Exception e) { assertEquals("read(char[], int, int)", e.getMessage()); } } // todo: test framework? static final class SopTokenFilter extends TokenFilter { SopTokenFilter(TokenStream input) { super(input); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { System.out.println(input.getClass().getSimpleName() + "->" + this.reflectAsString(false)); return true; } else { return false; } } @Override public void end() throws IOException { super.end(); System.out.println(input.getClass().getSimpleName() + ".end()"); } @Override public void close() throws IOException { super.close(); System.out.println(input.getClass().getSimpleName() + ".close()"); } @Override public void reset() throws IOException { super.reset(); System.out.println(input.getClass().getSimpleName() + ".reset()"); } } // LUCENE-5269 public void testUnicodeShinglesAndNgrams() throws Exception { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new EdgeNGramTokenizer(TEST_VERSION_CURRENT, 2, 94); //TokenStream stream = new SopTokenFilter(tokenizer); TokenStream stream = new ShingleFilter(tokenizer, 5); //stream = new SopTokenFilter(stream); stream = new NGramTokenFilter(TEST_VERSION_CURRENT, stream, 55, 83); //stream = new SopTokenFilter(stream); return new TokenStreamComponents(tokenizer, stream); } }; checkRandomData(random(), analyzer, 2000); } }