package org.apache.solr.schema; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.Collections; import java.util.HashMap; import org.apache.lucene.document.Field; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser; import org.junit.BeforeClass; import org.junit.Test; public class PreAnalyzedFieldTest extends SolrTestCaseJ4 { private static final String[] valid = { "1 one two three", // simple parsing "1 one two three ", // spurious spaces "1 one,s=123,e=128,i=22 two three,s=20,e=22,y=foobar", // attribs "1 \\ one\\ \\,,i=22,a=\\, two\\=\n\r\t\\n,\\ =\\ \\", // escape madness "1 ,i=22 ,i=33,s=2,e=20 , ", // empty token text, non-empty attribs "1 =This is the stored part with \\= \n \\n \t \\t escapes.=one two three \u0001ąćęłńóśźż", // stored plus token stream "1 ==", // empty stored, no token stream "1 =this is a test.=", // stored + empty token stream "1 one,p=deadbeef two,p=0123456789abcdef three" // payloads }; private static final String[] validParsed = { "1 one,s=0,e=3 two,s=4,e=7 three,s=8,e=13", "1 one,s=1,e=4 two,s=6,e=9 three,s=12,e=17", "1 one,i=22,s=123,e=128,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar", "1 \\ one\\ \\,,i=22,s=0,e=6 two\\=\\n\\r\\t\\n,i=1,s=7,e=15 \\\\,i=1,s=17,e=18", "1 i=22,s=0,e=0 i=33,s=2,e=20 i=1,s=2,e=2", "1 =This is the stored part with = \n \\n \t \\t escapes.=one,s=0,e=3 two,s=4,e=7 three,s=8,e=13 \u0001ąćęłńóśźż,s=15,e=25", "1 ==", "1 =this is a test.=", "1 one,p=deadbeef,s=0,e=3 two,p=0123456789abcdef,s=4,e=7 three,s=8,e=13" }; private static final String[] invalid = { "one two three", // missing version # "2 one two three", // invalid version # "1 o,ne two", // missing escape "1 one t=wo", // missing escape "1 one,, two", // missing attribs, unescaped comma "1 one,s ", // missing attrib value "1 one,s= val", // missing attrib value, unescaped space "1 one,s=,val", // unescaped comma "1 =", // unescaped equals "1 =stored ", // unterminated stored "1 ===" // empty stored (ok), but unescaped = in token stream }; SchemaField field = null; int props = FieldProperties.INDEXED | FieldProperties.STORED; @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig.xml","schema.xml"); } @Override public void setUp() throws Exception { super.setUp(); field = new SchemaField("content", new TextField(), props, null); } @Test public void testValidSimple() { PreAnalyzedField paf = new PreAnalyzedField(); // use Simple format HashMap<String,String> args = new HashMap<>(); args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName()); paf.init(h.getCore().getLatestSchema(), args); PreAnalyzedParser parser = new SimplePreAnalyzedParser(); for (int i = 0; i < valid.length; i++) { String s = valid[i]; try { Field f = (Field)paf.fromString(field, s, 1.0f); //System.out.println(" - toString: '" + sb.toString() + "'"); assertEquals(validParsed[i], parser.toFormattedString(f)); } catch (Exception e) { e.printStackTrace(); fail("Should pass: '" + s + "', exception: " + e); } } } @Test public void testInvalidSimple() { PreAnalyzedField paf = new PreAnalyzedField(); paf.init(h.getCore().getLatestSchema(), Collections.<String,String>emptyMap()); for (String s : invalid) { try { paf.fromString(field, s, 1.0f); fail("should fail: '" + s + "'"); } catch (Exception e) { // } } } // "1 =test ąćęłńóśźż \u0001=one,i=22,s=123,e=128,p=deadbeef,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar" private static final String jsonValid = "{\"v\":\"1\",\"str\":\"test ąćęłńóśźż\",\"tokens\":[" + "{\"e\":128,\"i\":22,\"p\":\"DQ4KDQsODg8=\",\"s\":123,\"t\":\"one\",\"y\":\"word\"}," + "{\"e\":8,\"i\":1,\"s\":5,\"t\":\"two\",\"y\":\"word\"}," + "{\"e\":22,\"i\":1,\"s\":20,\"t\":\"three\",\"y\":\"foobar\"}" + "]}"; @Test public void testParsers() { PreAnalyzedField paf = new PreAnalyzedField(); // use Simple format HashMap<String,String> args = new HashMap<>(); args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName()); paf.init(h.getCore().getLatestSchema(), args); try { Field f = (Field)paf.fromString(field, valid[0], 1.0f); } catch (Exception e) { fail("Should pass: '" + valid[0] + "', exception: " + e); } // use JSON format args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName()); paf.init(h.getCore().getLatestSchema(), args); try { Field f = (Field)paf.fromString(field, valid[0], 1.0f); fail("Should fail JSON parsing: '" + valid[0]); } catch (Exception e) { } byte[] deadbeef = new byte[]{(byte)0xd, (byte)0xe, (byte)0xa, (byte)0xd, (byte)0xb, (byte)0xe, (byte)0xe, (byte)0xf}; PreAnalyzedParser parser = new JsonPreAnalyzedParser(); try { Field f = (Field)paf.fromString(field, jsonValid, 1.0f); assertEquals(jsonValid, parser.toFormattedString(f)); } catch (Exception e) { fail("Should pass: '" + jsonValid + "', exception: " + e); } } }