/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.pattern; import java.io.StringReader; import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; public class TestPatternCaptureGroupTokenFilter extends BaseTokenStreamTestCase { public void testNoPattern() throws Exception { testPatterns( "foobarbaz", new String[] {}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, true ); testPatterns( "foo bar baz", new String[] {}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, true ); } public void testNoMatch() throws Exception { testPatterns( "foobarbaz", new String[] {"xx"}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {"xx"}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, true ); testPatterns( "foo bar baz", new String[] {"xx"}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {"xx"}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, true ); } public void testNoCapture() throws Exception { testPatterns( "foobarbaz", new String[] {".."}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {".."}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, true ); testPatterns( "foo bar baz", new String[] {".."}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {".."}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, true ); } public void testEmptyCapture() throws Exception { testPatterns( "foobarbaz", new String[] {".(y*)"}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {".(y*)"}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, true ); testPatterns( "foo bar baz", new String[] {".(y*)"}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {".(y*)"}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, true ); } public void testCaptureAll() throws Exception { testPatterns( "foobarbaz", new String[] {"(.+)"}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {"(.+)"}, new String[] {"foobarbaz"}, new int[] {0}, new int[] {9}, new int[] {1}, true ); testPatterns( "foo bar baz", new String[] {"(.+)"}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {"(.+)"}, new String[] {"foo","bar","baz"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, true ); } public void testCaptureStart() throws Exception { testPatterns( "foobarbaz", new String[] {"^(.)"}, new String[] {"f"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {"^(.)"}, new String[] {"foobarbaz","f"}, new int[] {0,0}, new int[] {9,9}, new int[] {1,0}, true ); testPatterns( "foo bar baz", new String[] {"^(.)"}, new String[] {"f","b","b"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {"^(.)"}, new String[] {"foo","f","bar","b","baz","b"}, new int[] {0,0,4,4,8,8}, new int[] {3,3,7,7,11,11}, new int[] {1,0,1,0,1,0}, true ); } public void testCaptureMiddle() throws Exception { testPatterns( "foobarbaz", new String[] {"^.(.)."}, new String[] {"o"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {"^.(.)."}, new String[] {"foobarbaz","o"}, new int[] {0,0}, new int[] {9,9}, new int[] {1,0}, true ); testPatterns( "foo bar baz", new String[] {"^.(.)."}, new String[] {"o","a","a"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {"^.(.)."}, new String[] {"foo","o","bar","a","baz","a"}, new int[] {0,0,4,4,8,8}, new int[] {3,3,7,7,11,11}, new int[] {1,0,1,0,1,0}, true ); } public void testCaptureEnd() throws Exception { testPatterns( "foobarbaz", new String[] {"(.)$"}, new String[] {"z"}, new int[] {0}, new int[] {9}, new int[] {1}, false ); testPatterns( "foobarbaz", new String[] {"(.)$"}, new String[] {"foobarbaz","z"}, new int[] {0,0}, new int[] {9,9}, new int[] {1,0}, true ); testPatterns( "foo bar baz", new String[] {"(.)$"}, new String[] {"o","r","z"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "foo bar baz", new String[] {"(.)$"}, new String[] {"foo","o","bar","r","baz","z"}, new int[] {0,0,4,4,8,8}, new int[] {3,3,7,7,11,11}, new int[] {1,0,1,0,1,0}, true ); } public void testCaptureStartMiddle() throws Exception { testPatterns( "foobarbaz", new String[] {"^(.)(.)"}, new String[] {"f","o"}, new int[] {0,0}, new int[] {9,9}, new int[] {1,0}, false ); testPatterns( "foobarbaz", new String[] {"^(.)(.)"}, new String[] {"foobarbaz","f","o"}, new int[] {0,0,0}, new int[] {9,9,9}, new int[] {1,0,0}, true ); testPatterns( "foo bar baz", new String[] {"^(.)(.)"}, new String[] {"f","o","b","a","b","a"}, new int[] {0,0,4,4,8,8}, new int[] {3,3,7,7,11,11}, new int[] {1,0,1,0,1,0}, false ); testPatterns( "foo bar baz", new String[] {"^(.)(.)"}, new String[] {"foo","f","o","bar","b","a","baz","b","a"}, new int[] {0,0,0,4,4,4,8,8,8}, new int[] {3,3,3,7,7,7,11,11,11}, new int[] {1,0,0,1,0,0,1,0,0}, true ); } public void testCaptureStartEnd() throws Exception { testPatterns( "foobarbaz", new String[] {"^(.).+(.)$"}, new String[] {"f","z"}, new int[] {0,0}, new int[] {9,9}, new int[] {1,0}, false ); testPatterns( "foobarbaz", new String[] {"^(.).+(.)$"}, new String[] {"foobarbaz","f","z"}, new int[] {0,0,0}, new int[] {9,9,9}, new int[] {1,0,0}, true ); testPatterns( "foo bar baz", new String[] {"^(.).+(.)$"}, new String[] {"f","o","b","r","b","z"}, new int[] {0,0,4,4,8,8}, new int[] {3,3,7,7,11,11}, new int[] {1,0,1,0,1,0}, false ); testPatterns( "foo bar baz", new String[] {"^(.).+(.)$"}, new String[] {"foo","f","o","bar","b","r","baz","b","z"}, new int[] {0,0,0,4,4,4,8,8,8}, new int[] {3,3,3,7,7,7,11,11,11}, new int[] {1,0,0,1,0,0,1,0,0}, true ); } public void testCaptureMiddleEnd() throws Exception { testPatterns( "foobarbaz", new String[] {"(.)(.)$"}, new String[] {"a","z"}, new int[] {0,0}, new int[] {9,9}, new int[] {1,0}, false ); testPatterns( "foobarbaz", new String[] {"(.)(.)$"}, new String[] {"foobarbaz","a","z"}, new int[] {0,0,0}, new int[] {9,9,9}, new int[] {1,0,0}, true ); testPatterns( "foo bar baz", new String[] {"(.)(.)$"}, new String[] {"o","o","a","r","a","z"}, new int[] {0,0,4,4,8,8}, new int[] {3,3,7,7,11,11}, new int[] {1,0,1,0,1,0}, false ); testPatterns( "foo bar baz", new String[] {"(.)(.)$"}, new String[] {"foo","o","o","bar","a","r","baz","a","z"}, new int[] {0,0,0,4,4,4,8,8,8}, new int[] {3,3,3,7,7,7,11,11,11}, new int[] {1,0,0,1,0,0,1,0,0}, true ); } public void testMultiCaptureOverlap() throws Exception { testPatterns( "foobarbaz", new String[] {"(.(.(.)))"}, new String[] {"foo","oo","o","bar","ar","r","baz","az","z"}, new int[] {0,0,0,0,0,0,0,0,0}, new int[] {9,9,9,9,9,9,9,9,9}, new int[] {1,0,0,0,0,0,0,0,0}, false ); testPatterns( "foobarbaz", new String[] {"(.(.(.)))"}, new String[] {"foobarbaz","foo","oo","o","bar","ar","r","baz","az","z"}, new int[] {0,0,0,0,0,0,0,0,0,0}, new int[] {9,9,9,9,9,9,9,9,9,9}, new int[] {1,0,0,0,0,0,0,0,0,0}, true ); testPatterns( "foo bar baz", new String[] {"(.(.(.)))"}, new String[] {"foo","oo","o","bar","ar","r","baz","az","z"}, new int[] {0,0,0,4,4,4,8,8,8}, new int[] {3,3,3,7,7,7,11,11,11}, new int[] {1,0,0,1,0,0,1,0,0}, false ); testPatterns( "foo bar baz", new String[] {"(.(.(.)))"}, new String[] {"foo","oo","o","bar","ar","r","baz","az","z"}, new int[] {0,0,0,4,4,4,8,8,8}, new int[] {3,3,3,7,7,7,11,11,11}, new int[] {1,0,0,1,0,0,1,0,0}, true ); } public void testMultiPattern() throws Exception { testPatterns( "aaabbbaaa", new String[] {"(aaa)","(bbb)","(ccc)"}, new String[] {"aaa","bbb","aaa"}, new int[] {0,0,0}, new int[] {9,9,9}, new int[] {1,0,0}, false ); testPatterns( "aaabbbaaa", new String[] {"(aaa)","(bbb)","(ccc)"}, new String[] {"aaabbbaaa","aaa","bbb","aaa"}, new int[] {0,0,0,0}, new int[] {9,9,9,9}, new int[] {1,0,0,0}, true ); testPatterns( "aaa bbb aaa", new String[] {"(aaa)","(bbb)","(ccc)"}, new String[] {"aaa","bbb","aaa"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, false ); testPatterns( "aaa bbb aaa", new String[] {"(aaa)","(bbb)","(ccc)"}, new String[] {"aaa","bbb","aaa"}, new int[] {0,4,8}, new int[] {3,7,11}, new int[] {1,1,1}, true ); } public void testCamelCase() throws Exception { testPatterns( "letsPartyLIKEits1999_dude", new String[] { "([A-Z]{2,})", "(?<![A-Z])([A-Z][a-z]+)", "(?:^|\\b|(?<=[0-9_])|(?<=[A-Z]{2}))([a-z]+)", "([0-9]+)" }, new String[] {"lets","Party","LIKE","its","1999","dude"}, new int[] {0,0,0,0,0,0}, new int[] {25,25,25,25,25,25}, new int[] {1,0,0,0,0,0,0}, false ); testPatterns( "letsPartyLIKEits1999_dude", new String[] { "([A-Z]{2,})", "(?<![A-Z])([A-Z][a-z]+)", "(?:^|\\b|(?<=[0-9_])|(?<=[A-Z]{2}))([a-z]+)", "([0-9]+)" }, new String[] {"letsPartyLIKEits1999_dude","lets","Party","LIKE","its","1999","dude"}, new int[] {0,0,0,0,0,0,0}, new int[] {25,25,25,25,25,25,25}, new int[] {1,0,0,0,0,0,0,0}, true ); } public void testRandomString() throws Exception { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); return new TokenStreamComponents(tokenizer, new PatternCaptureGroupTokenFilter(tokenizer, false, Pattern.compile("((..)(..))"))); } }; checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER); a.close(); } private void testPatterns(String input, String[] regexes, String[] tokens, int[] startOffsets, int[] endOffsets, int[] positions, boolean preserveOriginal) throws Exception { Pattern[] patterns = new Pattern[regexes.length]; for (int i = 0; i < regexes.length; i++) { patterns[i] = Pattern.compile(regexes[i]); } Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); tokenizer.setReader( new StringReader(input)); TokenStream ts = new PatternCaptureGroupTokenFilter(tokenizer, preserveOriginal, patterns); assertTokenStreamContents(ts, tokens, startOffsets, endOffsets, positions); } }