FieldAnalysisRequestHandler.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler;

import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.AnalysisParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.ContentStream;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.commons.io.IOUtils;

import java.io.Reader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Set;

/**
 * Provides the ability to specify multiple field types and field names in the same request. Expected parameters:
 * <table border="1" summary="table of parameters">
 * <tr>
 * <th align="left">Name</th>
 * <th align="left">Type</th>
 * <th align="left">required</th>
 * <th align="left">Description</th>
 * <th align="left">Multi-valued</th>
 * </tr>
 * <tr>
 * <td>analysis.fieldname</td>
 * <td>string</td>
 * <td>no</td>
 * <td>When present, the text will be analyzed based on the type of this field name.</td>
 * <td>Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified fields</td>
 * </tr>
 * <tr>
 * <td>analysis.fieldtype</td>
 * <td>string</td>
 * <td>no</td>
 * <td>When present, the text will be analyzed based on the specified type</td>
 * <td>Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified field types</td>
 * </tr>
 * <tr>
 * <td>analysis.fieldvalue</td>
 * <td>string</td>
 * <td>no</td>
 * <td>The text that will be analyzed. The analysis will mimic the index-time analysis.</td>
 * <td>No</td>
 * </tr>
 * <tr>
 * <td>{@code analysis.query} OR {@code q}</td>
 * <td>string</td>
 * <td>no</td>
 * <td>When present, the text that will be analyzed. The analysis will mimic the query-time analysis. Note that the
 * {@code analysis.query} parameter as precedes the {@code q} parameters.</td>
 * <td>No</td>
 * </tr>
 * <tr>
 * <td>analysis.showmatch</td>
 * <td>boolean</td>
 * <td>no</td>
 * <td>When set to {@code true} and when query analysis is performed, the produced tokens of the field value
 * analysis will be marked as "matched" for every token that is produces by the query analysis</td>
 * <td>No</td>
 * </tr>
 * </table>
 * <p>Note that if neither analysis.fieldname and analysis.fieldtype is specified, then the default search field's
 * analyzer is used.</p>
 * <p>Note that if one of analysis.value or analysis.query or q must be specified</p>
 *
 * @since solr 1.4 
 */
public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase {

  /**
   * {@inheritDoc}
   */
  @Override
  protected NamedList doAnalysis(SolrQueryRequest req) throws Exception {
    FieldAnalysisRequest analysisRequest = resolveAnalysisRequest(req);
    IndexSchema indexSchema = req.getSchema();
    return handleAnalysisRequest(analysisRequest, indexSchema);
  }

  @Override
  public String getDescription() {
    return "Provide a breakdown of the analysis process of field/query text";
  }

  // ================================================= Helper methods ================================================

  /**
   * Resolves the AnalysisRequest based on the parameters in the given SolrParams.
   *
   * @param req the request
   *
   * @return AnalysisRequest containing all the information about what needs to be analyzed, and using what
   *         fields/types
   */
  FieldAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) throws SolrException {
    SolrParams solrParams = req.getParams();
    FieldAnalysisRequest analysisRequest = new FieldAnalysisRequest();

    boolean useDefaultSearchField = true;
    if (solrParams.get(AnalysisParams.FIELD_TYPE) != null) {
      analysisRequest.setFieldTypes(Arrays.asList(solrParams.get(AnalysisParams.FIELD_TYPE).split(",")));
      useDefaultSearchField = false;
    }
    if (solrParams.get(AnalysisParams.FIELD_NAME) != null) {
      analysisRequest.setFieldNames(Arrays.asList(solrParams.get(AnalysisParams.FIELD_NAME).split(",")));
      useDefaultSearchField = false;
    }
    if (useDefaultSearchField) {
      if (solrParams.get(CommonParams.DF) != null) {
        analysisRequest.addFieldName(solrParams.get(CommonParams.DF));
      } else {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
            "Field analysis request must contain one of analysis.fieldtype, analysis.fieldname or df.");
      }
    }
    analysisRequest.setQuery(solrParams.get(AnalysisParams.QUERY, solrParams.get(CommonParams.Q)));

    String value = solrParams.get(AnalysisParams.FIELD_VALUE);
    if (analysisRequest.getQuery() == null && value == null)  {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
          "One of analysis.fieldvalue, q, or analysis.query parameters must be specified");
    }

    Iterable<ContentStream> streams = req.getContentStreams();
    if (streams != null) {
      // NOTE: Only the first content stream is currently processed
      for (ContentStream stream : streams) {
        Reader reader = null;
        try {
          reader = stream.getReader();
          value = IOUtils.toString(reader);
        } catch (IOException e) {
          // do nothing, leave value set to the request parameter
        }
        finally {
          IOUtils.closeQuietly(reader);
        }
        break;
      }
    }

    analysisRequest.setFieldValue(value);
    analysisRequest.setShowMatch(solrParams.getBool(AnalysisParams.SHOW_MATCH, false));
    return analysisRequest;
  }

  /**
   * Handles the resolved analysis request and returns the analysis breakdown response as a named list.
   *
   * @param request The request to handle.
   * @param schema  The index schema.
   *
   * @return The analysis breakdown as a named list.
   */
  protected NamedList<NamedList> handleAnalysisRequest(FieldAnalysisRequest request, IndexSchema schema) {
    NamedList<NamedList> analysisResults = new SimpleOrderedMap<>();

    NamedList<NamedList> fieldTypeAnalysisResults = new SimpleOrderedMap<>();
    if (request.getFieldTypes() != null)  {
      for (String fieldTypeName : request.getFieldTypes()) {
        FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
        fieldTypeAnalysisResults.add(fieldTypeName, analyzeValues(request, fieldType, null));
      }
    }

    NamedList<NamedList> fieldNameAnalysisResults = new SimpleOrderedMap<>();
    if (request.getFieldNames() != null)  {
      for (String fieldName : request.getFieldNames()) {
        FieldType fieldType = schema.getFieldType(fieldName);
        fieldNameAnalysisResults.add(fieldName, analyzeValues(request, fieldType, fieldName));
      }
    }

    analysisResults.add("field_types", fieldTypeAnalysisResults);
    analysisResults.add("field_names", fieldNameAnalysisResults);

    return analysisResults;
  }

  /**
   * Analyzes the index value (if it exists) and the query value (if it exists) in the given AnalysisRequest, using
   * the Analyzers of the given field type.
   *
   * @param analysisRequest AnalysisRequest from where the index and query values will be taken
   * @param fieldType       Type of field whose analyzers will be used
   * @param fieldName       Name of the field to be analyzed.  Can be {@code null}
   *
   * @return NamedList containing the tokens produced by the analyzers of the given field, separated into an index and
   *         a query group
   */ // package access for testing
  NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) {

    final String queryValue = analysisRequest.getQuery();
    final Set<BytesRef> termsToMatch = (queryValue != null && analysisRequest.isShowMatch())
      ? getQueryTokenSet(queryValue, fieldType.getQueryAnalyzer())
      : EMPTY_BYTES_SET;

    NamedList<NamedList> analyzeResults = new SimpleOrderedMap<>();
    if (analysisRequest.getFieldValue() != null) {
      AnalysisContext context = new AnalysisContext(fieldName, fieldType, fieldType.getIndexAnalyzer(), termsToMatch);
      NamedList analyzedTokens = analyzeValue(analysisRequest.getFieldValue(), context);
      analyzeResults.add("index", analyzedTokens);
    }
    if (analysisRequest.getQuery() != null) {
      AnalysisContext context = new AnalysisContext(fieldName, fieldType, fieldType.getQueryAnalyzer());
      NamedList analyzedTokens = analyzeValue(analysisRequest.getQuery(), context);
      analyzeResults.add("query", analyzedTokens);
    }

    return analyzeResults;
  }
}