PreAnalyzedFieldTest.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.schema;

import java.util.Collections;
import java.util.HashMap;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Field;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser;
import org.junit.BeforeClass;
import org.junit.Test;

public class PreAnalyzedFieldTest extends SolrTestCaseJ4 {
  
  private static final String[] valid = {
    "1 one two three",                       // simple parsing
    "1  one  two   three ",                  // spurious spaces
    "1 one,s=123,e=128,i=22  two three,s=20,e=22,y=foobar",    // attribs
    "1 \\ one\\ \\,,i=22,a=\\, two\\=\n\r\t\\n,\\ =\\   \\", // escape madness
    "1 ,i=22 ,i=33,s=2,e=20 , ", // empty token text, non-empty attribs
    "1 =This is the stored part with \\= \n \\n \t \\t escapes.=one two three  \u0001ąćęłńóśźż", // stored plus token stream
    "1 ==", // empty stored, no token stream
    "1 =this is a test.=", // stored + empty token stream
    "1 one,p=deadbeef two,p=0123456789abcdef three" // payloads
  };
  
  private static final String[] validParsed = {
    "1 one,s=0,e=3 two,s=4,e=7 three,s=8,e=13",
    "1 one,s=1,e=4 two,s=6,e=9 three,s=12,e=17",
    "1 one,i=22,s=123,e=128,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar",
    "1 \\ one\\ \\,,i=22,s=0,e=6 two\\=\\n\\r\\t\\n,i=1,s=7,e=15 \\\\,i=1,s=17,e=18",
    "1 i=22,s=0,e=0 i=33,s=2,e=20 i=1,s=2,e=2",
    "1 =This is the stored part with = \n \\n \t \\t escapes.=one,s=0,e=3 two,s=4,e=7 three,s=8,e=13 \u0001ąćęłńóśźż,s=15,e=25",
    "1 ==",
    "1 =this is a test.=",
    "1 one,p=deadbeef,s=0,e=3 two,p=0123456789abcdef,s=4,e=7 three,s=8,e=13"
  };

  private static final String[] invalidSimple = {
    "one two three", // missing version #
    "2 one two three", // invalid version #
    "1 o,ne two", // missing escape
    "1 one t=wo", // missing escape
    "1 one,, two", // missing attribs, unescaped comma
    "1 one,s ",   // missing attrib value
    "1 one,s= val", // missing attrib value, unescaped space
    "1 one,s=,val", // unescaped comma
    "1 =", // unescaped equals
    "1 =stored ", // unterminated stored
    "1 ===" // empty stored (ok), but unescaped = in token stream
  };

  private static final String validJson
      = json("{'v':'1','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}");

  private static final String[] invalidJson = {
      json("'v':'1','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]"),    // missing enclosing object
      json("{'str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"),          // missing version #
      json("{'v':'2','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"),  // invalid version #
      json("{'v':'1','str':'stored-value','tokens':[{}]}"),                             // single token no attribs
      json("{'v':'1','str':'stored-value','tokens':[{'t'}]}"),                          // missing attrib value
  };

  SchemaField field = null;
  int props = 
    FieldProperties.INDEXED | FieldProperties.STORED;
  
  @BeforeClass
  public static void beforeClass() throws Exception {
    initCore("solrconfig-minimal.xml","schema-preanalyzed.xml");
  }

  @Override
  public void setUp() throws Exception {
    super.setUp();
    field = new SchemaField("content", new TextField(), props, null);
  }
  
  @Test
  public void testValidSimple() {
    PreAnalyzedField paf = new PreAnalyzedField();
    // use Simple format
    HashMap<String,String> args = new HashMap<>();
    args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
    paf.init(h.getCore().getLatestSchema(), args);
    PreAnalyzedParser parser = new SimplePreAnalyzedParser();
    for (int i = 0; i < valid.length; i++) {
      String s = valid[i];
      try {
        Field f = (Field)paf.fromString(field, s);
        //System.out.println(" - toString: '" + sb.toString() + "'");
        assertEquals(validParsed[i], parser.toFormattedString(f));
      } catch (Exception e) {
        e.printStackTrace();
        fail("Should pass: '" + s + "', exception: " + e);
      }
    }
  }

  private String addTwoDocs(int firstId, String field) {
    return "<add>\n"
        + doc("id", Integer.toString(firstId), field,
              json("{'v':'1','str':'document one','tokens':[{'t':'one'},{'t':'two'},{'t':'three','i':100}]}"))
        + doc("id", Integer.toString(firstId + 1), field,
              json("{'v':'1','str':'document two','tokens':[{'t':'eleven'},{'t':'twelve'},{'t':'thirteen','i':110}]}"))
        + "</add>\n";
  }

  @Test
  public void testIndexAndQueryNoSchemaAnalyzer() throws Exception {
    assertU(addTwoDocs(1, "pre_no_analyzer"));
    assertU(commit());
    assertQ(req("q", "id:(1 2)", "sort", "id asc")
        ,"//result[@numFound='2']"
        ,"//result/doc[1]/str[@name='id'][.='1']"
        ,"//result/doc[1]/str[@name='pre_no_analyzer'][.='document one']"
        ,"//result/doc[2]/str[@name='id'][.='2']"
        ,"//result/doc[2]/str[@name='pre_no_analyzer'][.='document two']"
    );
    assertQ(req("q", "{!field f='pre_no_analyzer'}{'v':'1','tokens':[{'t':'two'}]}")
        ,"//result[@numFound='1']"
    );
    assertQ(req("q", "{!field f='pre_no_analyzer'}{'v':'1','tokens':[{'t':'eleven'},{'t':'twelve'}]}")
        ,"//result[@numFound='1']"
    );
  }

  @Test
  public void testIndexAndQueryWithSchemaAnalyzer() {
    assertU(addTwoDocs(3, "pre_with_analyzer"));
    assertU(commit());
    assertQ(req("q", "id:(3 4)", "sort", "id asc")
        ,"//result[@numFound='2']"
        ,"//result/doc[1]/str[@name='id'][.='3']"
        ,"//result/doc[1]/str[@name='pre_with_analyzer'][.='document one']"
        ,"//result/doc[2]/str[@name='id'][.='4']"
        ,"//result/doc[2]/str[@name='pre_with_analyzer'][.='document two']"
    );
    assertQ(req("q", "pre_with_analyzer:(+two +three)"), "//result[@numFound='1']");
    assertQ(req("q", "pre_with_analyzer:(+eleven +twelve)"), "//result[@numFound='1']");
  }

  @Test
  public void testIndexAndQueryWithSchemaQueryAnalyzer() {
    assertU(addTwoDocs(5, "pre_with_query_analyzer"));
    assertU(commit());
    assertQ(req("q", "id:(5 6)", "sort", "id asc")
        ,"//result[@numFound='2']"
        ,"//result/doc[1]/str[@name='id'][.='5']"
        ,"//result/doc[1]/str[@name='pre_with_query_analyzer'][.='document one']"
        ,"//result/doc[2]/str[@name='id'][.='6']"
        ,"//result/doc[2]/str[@name='pre_with_query_analyzer'][.='document two']"
    );
    assertQ(req("q", "pre_with_query_analyzer:one,two"), "//result[@numFound='1']");
    assertQ(req("q", "pre_with_query_analyzer:eleven,twelve"), "//result[@numFound='1']");
  }

  @Test
  public void testInvalidSimple() {
    PreAnalyzedField paf = new PreAnalyzedField();
    paf.init(h.getCore().getLatestSchema(), Collections.<String,String>emptyMap());
    for (String s : invalidSimple) {
      try {
        paf.fromString(field, s);
        fail("should fail: '" + s + "'");
      } catch (Exception e) {
        //
      }
    }
  }

  public void testInvalidJson() throws Exception {
    PreAnalyzedField paf = new PreAnalyzedField();
    paf.init(h.getCore().getLatestSchema(), Collections.emptyMap());
    Analyzer preAnalyzer = paf.getIndexAnalyzer();
    for (String s: invalidJson) {
      TokenStream stream = null;
      try {
        stream = preAnalyzer.tokenStream("dummy", s);
        stream.reset(); // exception should be triggered here.
        fail("should fail: '" + s + "'");
      } catch (Exception e) {
        // expected
      } finally {
        if (stream != null) {
          stream.close();
        }
      }
    }
    // make sure the analyzer can now handle properly formatted input
    TokenStream stream = preAnalyzer.tokenStream("dummy", validJson);
    CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
    stream.reset();
    while (stream.incrementToken()) {
      assertFalse("zero-length token", termAttr.length() == 0);
    }
    stream.end();
    stream.close();
  }
  
  // "1 =test ąćęłńóśźż \u0001=one,i=22,s=123,e=128,p=deadbeef,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar"
  
  private static final String jsonValid = "{\"v\":\"1\",\"str\":\"test ąćęłńóśźż\",\"tokens\":[" +
      "{\"e\":128,\"i\":22,\"p\":\"DQ4KDQsODg8=\",\"s\":123,\"t\":\"one\",\"y\":\"word\"}," +
      "{\"e\":8,\"i\":1,\"s\":5,\"t\":\"two\",\"y\":\"word\"}," +
      "{\"e\":22,\"i\":1,\"s\":20,\"t\":\"three\",\"y\":\"foobar\"}" +
      "]}";
  
  @Test
  public void testParsers() {
    PreAnalyzedField paf = new PreAnalyzedField();
    // use Simple format
    HashMap<String,String> args = new HashMap<>();
    args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName());
    paf.init(h.getCore().getLatestSchema(), args);
    try {
      Field f = (Field)paf.fromString(field, valid[0]);
    } catch (Exception e) {
      fail("Should pass: '" + valid[0] + "', exception: " + e);
    }
    // use JSON format
    args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName());
    paf.init(h.getCore().getLatestSchema(), args);
    try {
      Field f = (Field)paf.fromString(field, valid[0]);
      fail("Should fail JSON parsing: '" + valid[0] + "'");
    } catch (Exception e) {
    }
    byte[] deadbeef = new byte[]{(byte)0xd, (byte)0xe, (byte)0xa, (byte)0xd, (byte)0xb, (byte)0xe, (byte)0xe, (byte)0xf};
    PreAnalyzedParser parser = new JsonPreAnalyzedParser();
    try {
      Field f = (Field)paf.fromString(field, jsonValid);
      assertEquals(jsonValid, parser.toFormattedString(f));
    } catch (Exception e) {
      fail("Should pass: '" + jsonValid + "', exception: " + e);
    }
  }
}