PreAnalyzedFieldTest.java example

Explorer
heliosearch-master
- lucene
- solr
package org.apache.solr.schema;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.util.Collections;
import java.util.HashMap;

import org.apache.lucene.document.Field;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
import org.junit.BeforeClass;
import org.junit.Test;

public class PreAnalyzedFieldTest extends SolrTestCaseJ4 {
  
  private static final String[] valid = {
    "1 one two three",                       // simple parsing
    "1  one  two   three ",                  // spurious spaces
    "1 one,s=123,e=128,i=22  two three,s=20,e=22,y=foobar",    // attribs
    "1 \\ one\\ \\,,i=22,a=\\, two\\=\n\r\t\\n,\\ =\\   \\", // escape madness
    "1 ,i=22 ,i=33,s=2,e=20 , ", // empty token text, non-empty attribs
    "1 =This is the stored part with \\= \n \\n \t \\t escapes.=one two three  \u0001ąćęłńóśźż", // stored plus token stream
    "1 ==", // empty stored, no token stream
    "1 =this is a test.=", // stored + empty token stream
    "1 one,p=deadbeef two,p=0123456789abcdef three" // payloads
  };
  
  private static final String[] validParsed = {
    "1 one,s=0,e=3 two,s=4,e=7 three,s=8,e=13",
    "1 one,s=1,e=4 two,s=6,e=9 three,s=12,e=17",
    "1 one,i=22,s=123,e=128,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar",
    "1 \\ one\\ \\,,i=22,s=0,e=6 two\\=\\n\\r\\t\\n,i=1,s=7,e=15 \\\\,i=1,s=17,e=18",
    "1 i=22,s=0,e=0 i=33,s=2,e=20 i=1,s=2,e=2",
    "1 =This is the stored part with = \n \\n \t \\t escapes.=one,s=0,e=3 two,s=4,e=7 three,s=8,e=13 \u0001ąćęłńóśźż,s=15,e=25",
    "1 ==",
    "1 =this is a test.=",
    "1 one,p=deadbeef,s=0,e=3 two,p=0123456789abcdef,s=4,e=7 three,s=8,e=13"
  };

  private static final String[] invalid = {
    "one two three", // missing version #
    "2 one two three", // invalid version #
    "1 o,ne two", // missing escape
    "1 one t=wo", // missing escape
    "1 one,, two", // missing attribs, unescaped comma
    "1 one,s ",   // missing attrib value
    "1 one,s= val", // missing attrib value, unescaped space
    "1 one,s=,val", // unescaped comma
    "1 =", // unescaped equals
    "1 =stored ", // unterminated stored
    "1 ===" // empty stored (ok), but unescaped = in token stream
  };
  
  SchemaField field = null;
  int props = 
    FieldProperties.INDEXED | FieldProperties.STORED;
  
  @BeforeClass
  public static void beforeClass() throws Exception {
    initCore("solrconfig.xml","schema.xml");
  }

  @Override
  public void setUp() throws Exception {
    super.setUp();
    field = new SchemaField("content", new TextField(), props, null);
  }
  
  @Test
  public void testValidSimple() {
    PreAnalyzedField paf = new PreAnalyzedField();
    // use Simple format
    HashMap<String,String> args = new HashMap<>();
    args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
    paf.init(h.getCore().getLatestSchema(), args);
    PreAnalyzedParser parser = new SimplePreAnalyzedParser();
    for (int i = 0; i < valid.length; i++) {
      String s = valid[i];
      try {
        Field f = (Field)paf.fromString(field, s, 1.0f);
        //System.out.println(" - toString: '" + sb.toString() + "'");
        assertEquals(validParsed[i], parser.toFormattedString(f));
      } catch (Exception e) {
        e.printStackTrace();
        fail("Should pass: '" + s + "', exception: " + e);
      }
    }
  }
  
  @Test
  public void testInvalidSimple() {
    PreAnalyzedField paf = new PreAnalyzedField();
    paf.init(h.getCore().getLatestSchema(), Collections.<String,String>emptyMap());
    for (String s : invalid) {
      try {
        paf.fromString(field, s, 1.0f);
        fail("should fail: '" + s + "'");
      } catch (Exception e) {
        //
      }
    }
  }
  
  // "1 =test ąćęłńóśźż \u0001=one,i=22,s=123,e=128,p=deadbeef,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar"
  
  private static final String jsonValid = "{\"v\":\"1\",\"str\":\"test ąćęłńóśźż\",\"tokens\":[" +
      "{\"e\":128,\"i\":22,\"p\":\"DQ4KDQsODg8=\",\"s\":123,\"t\":\"one\",\"y\":\"word\"}," +
      "{\"e\":8,\"i\":1,\"s\":5,\"t\":\"two\",\"y\":\"word\"}," +
      "{\"e\":22,\"i\":1,\"s\":20,\"t\":\"three\",\"y\":\"foobar\"}" +
      "]}";
  
  @Test
  public void testParsers() {
    PreAnalyzedField paf = new PreAnalyzedField();
    // use Simple format
    HashMap<String,String> args = new HashMap<>();
    args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
    paf.init(h.getCore().getLatestSchema(), args);
    try {
      Field f = (Field)paf.fromString(field, valid[0], 1.0f);
    } catch (Exception e) {
      fail("Should pass: '" + valid[0] + "', exception: " + e);
    }
    // use JSON format
    args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName());
    paf.init(h.getCore().getLatestSchema(), args);
    try {
      Field f = (Field)paf.fromString(field, valid[0], 1.0f);
      fail("Should fail JSON parsing: '" + valid[0]);
    } catch (Exception e) {
    }
    byte[] deadbeef = new byte[]{(byte)0xd, (byte)0xe, (byte)0xa, (byte)0xd, (byte)0xb, (byte)0xe, (byte)0xe, (byte)0xf};
    PreAnalyzedParser parser = new JsonPreAnalyzedParser();
    try {
      Field f = (Field)paf.fromString(field, jsonValid, 1.0f);
      assertEquals(jsonValid, parser.toFormattedString(f));
    } catch (Exception e) {
      fail("Should pass: '" + jsonValid + "', exception: " + e);
    }
  }
}