/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.component; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.params.TermVectorParams; import org.junit.BeforeClass; import org.junit.Test; /** * * **/ public class TermVectorComponentTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig.xml","schema.xml"); assertU(adoc("id", "0", "test_posoffpaytv", "This is a title and another title", "test_posofftv", "This is a title and another title", "test_basictv", "This is a title and another title", "test_notv", "This is a title and another title", "test_postv", "This is a title and another title", "test_offtv", "This is a title and another title" )); assertU(adoc("id", "1", "test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.", "test_posofftv", "The quick reb fox jumped over the lazy brown dogs.", "test_basictv", "The quick reb fox jumped over the lazy brown dogs.", "test_notv", "The quick reb fox jumped over the lazy brown dogs.", "test_postv", "The quick reb fox jumped over the lazy brown dogs.", "test_offtv", "The quick reb fox jumped over the lazy brown dogs." )); assertU(adoc("id", "2", "test_posoffpaytv", "This is a document", "test_posofftv", "This is a document", "test_basictv", "This is a document", "test_notv", "This is a document", "test_postv", "This is a document", "test_offtv", "This is a document" )); assertU(adoc("id", "3", "test_posoffpaytv", "another document", "test_posofftv", "another document", "test_basictv", "another document", "test_notv", "another document", "test_postv", "another document", "test_offtv", "another document" )); //bunch of docs that are variants on blue assertU(adoc("id", "4", "test_posoffpaytv", "blue", "test_posofftv", "blue", "test_basictv", "blue", "test_notv", "blue", "test_postv", "blue", "test_offtv", "blue" )); assertU(adoc("id", "5", "test_posoffpaytv", "blud", "test_posofftv", "blud", "test_basictv", "blud", "test_notv", "blud", "test_postv", "blud", "test_offtv", "blud" )); assertU(adoc("id", "6", "test_posoffpaytv", "boue", "test_posofftv", "boue", "test_basictv", "boue", "test_notv", "boue", "test_postv", "boue", "test_offtv", "boue" )); assertU(adoc("id", "7", "test_posoffpaytv", "glue", "test_posofftv", "glue", "test_basictv", "glue", "test_notv", "glue", "test_postv", "glue", "test_offtv", "glue" )); assertU(adoc("id", "8", "test_posoffpaytv", "blee", "test_posofftv", "blee", "test_basictv", "blee", "test_notv", "blee", "test_postv", "blee", "test_offtv", "blee" )); assertU(adoc("id", "9", "test_posoffpaytv", "blah", "test_posofftv", "blah", "test_basictv", "blah", "test_notv", "blah", "test_postv", "blah", "test_offtv", "blah" )); assertNull(h.validateUpdate(commit())); } static String tv = "tvrh"; @Test public void testBasics() throws Exception { assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") ,"/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // tv.fl diff from fl assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", "fl", "*,score", "tv.fl", "test_basictv,test_offtv", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") ,"/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // multi-valued tv.fl assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", "fl", "*,score", "tv.fl", "test_basictv", "tv.fl","test_offtv", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") ,"/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // re-use fl glob assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", "fl", "*,score", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") ,"/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // re-use fl, ignore things we can't handle assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", "fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") ,"/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); // re-use (multi-valued) fl, ignore things we can't handle assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", "fl", "score,test_basictv", "fl", "[docid],test_postv,val:sum(3,4)", TermVectorComponent.COMPONENT_NAME, "true", TermVectorParams.TF, "true") ,"/termVectors=={'0':{'uniqueKey':'0'," + " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," + " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}}" ); } @Test public void testOptions() throws Exception { assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true") ,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" ); assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" , TermVectorParams.ALL, "true") ,"/termVectors/0/test_posofftv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" ); // test each combination at random final List<String> list = new ArrayList<>(); list.addAll(Arrays.asList("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true")); String[][] options = new String[][] { { TermVectorParams.TF, "'tf':1" }, { TermVectorParams.OFFSETS, "'offsets':{'start':20, 'end':27}" }, { TermVectorParams.POSITIONS, "'positions':{'position':5}" }, { TermVectorParams.DF, "'df':2" }, { TermVectorParams.TF_IDF, "'tf-idf':0.5" } }; StringBuilder expected = new StringBuilder("/termVectors/0/test_posofftv/anoth=={"); boolean first = true; for (int i = 0; i < options.length; i++) { final boolean use = random().nextBoolean(); if (use) { if (!first) { expected.append(", "); } first = false; expected.append(options[i][1]); } list.add(options[i][0]); list.add(use ? "true" : "false"); } expected.append("}"); assertJQ(req(list.toArray(new String[0])), expected.toString()); } @Test public void testPerField() throws Exception { assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" ,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true" ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv" ,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false" ,"f.test_posofftv." + TermVectorParams.POSITIONS, "false" ,"f.test_offtv." + TermVectorParams.OFFSETS, "false" ,"f.test_basictv." + TermVectorParams.DF, "false" ,"f.test_basictv." + TermVectorParams.TF, "false" ,"f.test_basictv." + TermVectorParams.TF_IDF, "false" ) ,"/termVectors/0/test_basictv=={'anoth':{},'titl':{}}" ,"/termVectors/0/test_postv/anoth=={'tf':1, 'positions':{'position':5}, 'df':2, 'tf-idf':0.5}" ,"/termVectors/0/test_offtv/anoth=={'tf':1, 'df':2, 'tf-idf':0.5}" ,"/termVectors/warnings=={ 'noTermVectors':['test_notv'], 'noPositions':['test_basictv', 'test_offtv'], 'noOffsets':['test_basictv', 'test_postv']}" ); } @Test public void testPayloads() throws Exception { // This field uses TokenOffsetPayloadTokenFilter, which // stuffs start (20) and end offset (27) into the // payload: assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true" , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true", TermVectorParams.PAYLOADS, "true") ,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}" ); } } /* * <field name="test_basictv" type="text" termVectors="true"/> <field name="test_notv" type="text" termVectors="false"/> <field name="test_postv" type="text" termVectors="true" termPositions="true"/> <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/> <field name="test_posofftv" type="text" termVectors="true" termPositions="true" termOffsets="true"/> * * */