/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.schema; import java.util.Collections; import java.util.HashMap; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Field; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser; import org.junit.BeforeClass; import org.junit.Test; public class PreAnalyzedFieldTest extends SolrTestCaseJ4 { private static final String[] valid = { "1 one two three", // simple parsing "1 one two three ", // spurious spaces "1 one,s=123,e=128,i=22 two three,s=20,e=22,y=foobar", // attribs "1 \\ one\\ \\,,i=22,a=\\, two\\=\n\r\t\\n,\\ =\\ \\", // escape madness "1 ,i=22 ,i=33,s=2,e=20 , ", // empty token text, non-empty attribs "1 =This is the stored part with \\= \n \\n \t \\t escapes.=one two three \u0001ąćęłńóśźż", // stored plus token stream "1 ==", // empty stored, no token stream "1 =this is a test.=", // stored + empty token stream "1 one,p=deadbeef two,p=0123456789abcdef three" // payloads }; private static final String[] validParsed = { "1 one,s=0,e=3 two,s=4,e=7 three,s=8,e=13", "1 one,s=1,e=4 two,s=6,e=9 three,s=12,e=17", "1 one,i=22,s=123,e=128,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar", "1 \\ one\\ \\,,i=22,s=0,e=6 two\\=\\n\\r\\t\\n,i=1,s=7,e=15 \\\\,i=1,s=17,e=18", "1 i=22,s=0,e=0 i=33,s=2,e=20 i=1,s=2,e=2", "1 =This is the stored part with = \n \\n \t \\t escapes.=one,s=0,e=3 two,s=4,e=7 three,s=8,e=13 \u0001ąćęłńóśźż,s=15,e=25", "1 ==", "1 =this is a test.=", "1 one,p=deadbeef,s=0,e=3 two,p=0123456789abcdef,s=4,e=7 three,s=8,e=13" }; private static final String[] invalidSimple = { "one two three", // missing version # "2 one two three", // invalid version # "1 o,ne two", // missing escape "1 one t=wo", // missing escape "1 one,, two", // missing attribs, unescaped comma "1 one,s ", // missing attrib value "1 one,s= val", // missing attrib value, unescaped space "1 one,s=,val", // unescaped comma "1 =", // unescaped equals "1 =stored ", // unterminated stored "1 ===" // empty stored (ok), but unescaped = in token stream }; private static final String validJson = json("{'v':'1','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"); private static final String[] invalidJson = { json("'v':'1','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]"), // missing enclosing object json("{'str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"), // missing version # json("{'v':'2','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"), // invalid version # json("{'v':'1','str':'stored-value','tokens':[{}]}"), // single token no attribs json("{'v':'1','str':'stored-value','tokens':[{'t'}]}"), // missing attrib value }; SchemaField field = null; int props = FieldProperties.INDEXED | FieldProperties.STORED; @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig-minimal.xml","schema-preanalyzed.xml"); } @Override public void setUp() throws Exception { super.setUp(); field = new SchemaField("content", new TextField(), props, null); } @Test public void testValidSimple() { PreAnalyzedField paf = new PreAnalyzedField(); // use Simple format HashMap<String,String> args = new HashMap<>(); args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName()); paf.init(h.getCore().getLatestSchema(), args); PreAnalyzedParser parser = new SimplePreAnalyzedParser(); for (int i = 0; i < valid.length; i++) { String s = valid[i]; try { Field f = (Field)paf.fromString(field, s); //System.out.println(" - toString: '" + sb.toString() + "'"); assertEquals(validParsed[i], parser.toFormattedString(f)); } catch (Exception e) { e.printStackTrace(); fail("Should pass: '" + s + "', exception: " + e); } } } private String addTwoDocs(int firstId, String field) { return "<add>\n" + doc("id", Integer.toString(firstId), field, json("{'v':'1','str':'document one','tokens':[{'t':'one'},{'t':'two'},{'t':'three','i':100}]}")) + doc("id", Integer.toString(firstId + 1), field, json("{'v':'1','str':'document two','tokens':[{'t':'eleven'},{'t':'twelve'},{'t':'thirteen','i':110}]}")) + "</add>\n"; } @Test public void testIndexAndQueryNoSchemaAnalyzer() throws Exception { assertU(addTwoDocs(1, "pre_no_analyzer")); assertU(commit()); assertQ(req("q", "id:(1 2)", "sort", "id asc") ,"//result[@numFound='2']" ,"//result/doc[1]/str[@name='id'][.='1']" ,"//result/doc[1]/str[@name='pre_no_analyzer'][.='document one']" ,"//result/doc[2]/str[@name='id'][.='2']" ,"//result/doc[2]/str[@name='pre_no_analyzer'][.='document two']" ); assertQ(req("q", "{!field f='pre_no_analyzer'}{'v':'1','tokens':[{'t':'two'}]}") ,"//result[@numFound='1']" ); assertQ(req("q", "{!field f='pre_no_analyzer'}{'v':'1','tokens':[{'t':'eleven'},{'t':'twelve'}]}") ,"//result[@numFound='1']" ); } @Test public void testIndexAndQueryWithSchemaAnalyzer() { assertU(addTwoDocs(3, "pre_with_analyzer")); assertU(commit()); assertQ(req("q", "id:(3 4)", "sort", "id asc") ,"//result[@numFound='2']" ,"//result/doc[1]/str[@name='id'][.='3']" ,"//result/doc[1]/str[@name='pre_with_analyzer'][.='document one']" ,"//result/doc[2]/str[@name='id'][.='4']" ,"//result/doc[2]/str[@name='pre_with_analyzer'][.='document two']" ); assertQ(req("q", "pre_with_analyzer:(+two +three)"), "//result[@numFound='1']"); assertQ(req("q", "pre_with_analyzer:(+eleven +twelve)"), "//result[@numFound='1']"); } @Test public void testIndexAndQueryWithSchemaQueryAnalyzer() { assertU(addTwoDocs(5, "pre_with_query_analyzer")); assertU(commit()); assertQ(req("q", "id:(5 6)", "sort", "id asc") ,"//result[@numFound='2']" ,"//result/doc[1]/str[@name='id'][.='5']" ,"//result/doc[1]/str[@name='pre_with_query_analyzer'][.='document one']" ,"//result/doc[2]/str[@name='id'][.='6']" ,"//result/doc[2]/str[@name='pre_with_query_analyzer'][.='document two']" ); assertQ(req("q", "pre_with_query_analyzer:one,two"), "//result[@numFound='1']"); assertQ(req("q", "pre_with_query_analyzer:eleven,twelve"), "//result[@numFound='1']"); } @Test public void testInvalidSimple() { PreAnalyzedField paf = new PreAnalyzedField(); paf.init(h.getCore().getLatestSchema(), Collections.<String,String>emptyMap()); for (String s : invalidSimple) { try { paf.fromString(field, s); fail("should fail: '" + s + "'"); } catch (Exception e) { // } } } public void testInvalidJson() throws Exception { PreAnalyzedField paf = new PreAnalyzedField(); paf.init(h.getCore().getLatestSchema(), Collections.emptyMap()); Analyzer preAnalyzer = paf.getIndexAnalyzer(); for (String s: invalidJson) { TokenStream stream = null; try { stream = preAnalyzer.tokenStream("dummy", s); stream.reset(); // exception should be triggered here. fail("should fail: '" + s + "'"); } catch (Exception e) { // expected } finally { if (stream != null) { stream.close(); } } } // make sure the analyzer can now handle properly formatted input TokenStream stream = preAnalyzer.tokenStream("dummy", validJson); CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { assertFalse("zero-length token", termAttr.length() == 0); } stream.end(); stream.close(); } // "1 =test ąćęłńóśźż \u0001=one,i=22,s=123,e=128,p=deadbeef,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar" private static final String jsonValid = "{\"v\":\"1\",\"str\":\"test ąćęłńóśźż\",\"tokens\":[" + "{\"e\":128,\"i\":22,\"p\":\"DQ4KDQsODg8=\",\"s\":123,\"t\":\"one\",\"y\":\"word\"}," + "{\"e\":8,\"i\":1,\"s\":5,\"t\":\"two\",\"y\":\"word\"}," + "{\"e\":22,\"i\":1,\"s\":20,\"t\":\"three\",\"y\":\"foobar\"}" + "]}"; @Test public void testParsers() { PreAnalyzedField paf = new PreAnalyzedField(); // use Simple format HashMap<String,String> args = new HashMap<>(); args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName()); paf.init(h.getCore().getLatestSchema(), args); try { Field f = (Field)paf.fromString(field, valid[0]); } catch (Exception e) { fail("Should pass: '" + valid[0] + "', exception: " + e); } // use JSON format args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName()); paf.init(h.getCore().getLatestSchema(), args); try { Field f = (Field)paf.fromString(field, valid[0]); fail("Should fail JSON parsing: '" + valid[0] + "'"); } catch (Exception e) { } byte[] deadbeef = new byte[]{(byte)0xd, (byte)0xe, (byte)0xa, (byte)0xd, (byte)0xb, (byte)0xe, (byte)0xe, (byte)0xf}; PreAnalyzedParser parser = new JsonPreAnalyzedParser(); try { Field f = (Field)paf.fromString(field, jsonValid); assertEquals(jsonValid, parser.toFormattedString(f)); } catch (Exception e) { fail("Should pass: '" + jsonValid + "', exception: " + e); } } }