/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler; import org.apache.lucene.analysis.Token; import org.apache.solr.client.solrj.request.FieldAnalysisRequest; import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.ContentStream; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.commons.io.IOUtils; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import java.io.Reader; import java.io.IOException; /** * RequestHandler that provides much the same functionality as analysis.jsp. Provides the ability to specify multiple * field types and field names in the same request. Expected parameters: * <table border="1"> * <tr> * <th align="left">Name</th> * <th align="left">Type</th> * <th align="left">required</th> * <th align="left">Description</th> * <th align="left">Multi-valued</th> * </tr> * <tr> * <td>analysis.fieldname</td> * <td>string</td> * <td>no</td> * <td>When present, the text will be analyzed based on the type of this field name.</td> * <td>Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified fields</td> * </tr> * <tr> * <td>analysis.fieldtype</td> * <td>string</td> * <td>no</td> * <td>When present, the text will be analyzed based on the specified type</td> * <td>Yes, this parameter may hold a comma-separated list of values and the analysis will be performed for each of the specified field types</td> * </tr> * <tr> * <td>analysis.fieldvalue</td> * <td>string</td> * <td>yes</td> * <td>The text that will be analyzed. The analysis will mimic the index-time analysis.</td> * <td>No</td> * </tr> * <tr> * <td>{@code analysis.query} OR {@code q}</td> * <td>string</td> * <td>no</td> * <td>When present, the text that will be analyzed. The analysis will mimic the query-time analysis. Note that the * {@code analysis.query} parameter as precedes the {@code q} parameters.</td> * <td>No</td> * </tr> * <tr> * <td>analysis.showmatch</td> * <td>boolean</td> * <td>no</td> * <td>When set to {@code true} and when query analysis is performed, the produced tokens of the field value * analysis will be marked as "matched" for every token that is produces by the query analysis</td> * <td>No</td> * </tr> * </table> * <p>Note that if neither analysis.fieldname and analysis.fieldtype is specified, then the default search field's * analyzer is used.</p> * * @version $Id: FieldAnalysisRequestHandler.java 950008 2010-06-01 10:35:13Z rmuir $ * @since solr 1.4 */ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase { /** * {@inheritDoc} */ protected NamedList doAnalysis(SolrQueryRequest req) throws Exception { FieldAnalysisRequest analysisRequest = resolveAnalysisRequest(req); IndexSchema indexSchema = req.getCore().getSchema(); return handleAnalysisRequest(analysisRequest, indexSchema); } @Override public String getDescription() { return "Provide a breakdown of the analysis process of field/query text"; } @Override public String getVersion() { return "$Revision: 950008 $"; } @Override public String getSourceId() { return "$Id: FieldAnalysisRequestHandler.java 950008 2010-06-01 10:35:13Z rmuir $"; } @Override public String getSource() { return "$URL: https://svn.apache.org/repos/asf/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java $"; } // ================================================= Helper methods ================================================ /** * Resolves the AnalysisRequest based on the parameters in the given SolrParams. * * @param req the request * * @return AnalysisRequest containing all the information about what needs to be analyzed, and using what * fields/types */ FieldAnalysisRequest resolveAnalysisRequest(SolrQueryRequest req) { SolrParams solrParams = req.getParams(); FieldAnalysisRequest analysisRequest = new FieldAnalysisRequest(); boolean useDefaultSearchField = true; if (solrParams.get(AnalysisParams.FIELD_TYPE) != null) { analysisRequest.setFieldTypes(Arrays.asList(solrParams.get(AnalysisParams.FIELD_TYPE).split(","))); useDefaultSearchField = false; } if (solrParams.get(AnalysisParams.FIELD_NAME) != null) { analysisRequest.setFieldNames(Arrays.asList(solrParams.get(AnalysisParams.FIELD_NAME).split(","))); useDefaultSearchField = false; } if (useDefaultSearchField) { analysisRequest.addFieldName(req.getSchema().getDefaultSearchFieldName()); } analysisRequest.setQuery(solrParams.get(AnalysisParams.QUERY, solrParams.get(CommonParams.Q))); String value = solrParams.get(AnalysisParams.FIELD_VALUE); Iterable<ContentStream> streams = req.getContentStreams(); if (streams != null) { // NOTE: Only the first content stream is currently processed for (ContentStream stream : streams) { Reader reader = null; try { reader = stream.getReader(); value = IOUtils.toString(reader); } catch (IOException e) { // do nothing, leave value set to the request parameter } finally { IOUtils.closeQuietly(reader); } break; } } analysisRequest.setFieldValue(value); analysisRequest.setShowMatch(solrParams.getBool(AnalysisParams.SHOW_MATCH, false)); return analysisRequest; } /** * Handles the resolved analysis request and returns the analysis breakdown response as a named list. * * @param request The request to handle. * @param schema The index schema. * * @return The analysis breakdown as a named list. */ protected NamedList<NamedList> handleAnalysisRequest(FieldAnalysisRequest request, IndexSchema schema) { NamedList<NamedList> analysisResults = new SimpleOrderedMap<NamedList>(); NamedList<NamedList> fieldTypeAnalysisResults = new SimpleOrderedMap<NamedList>(); if (request.getFieldTypes() != null) { for (String fieldTypeName : request.getFieldTypes()) { FieldType fieldType = schema.getFieldTypes().get(fieldTypeName); fieldTypeAnalysisResults.add(fieldTypeName, analyzeValues(request, fieldType, null)); } } NamedList<NamedList> fieldNameAnalysisResults = new SimpleOrderedMap<NamedList>(); if (request.getFieldNames() != null) { for (String fieldName : request.getFieldNames()) { FieldType fieldType = schema.getFieldType(fieldName); fieldNameAnalysisResults.add(fieldName, analyzeValues(request, fieldType, fieldName)); } } analysisResults.add("field_types", fieldTypeAnalysisResults); analysisResults.add("field_names", fieldNameAnalysisResults); return analysisResults; } /** * Analyzes the index value (if it exists) and the query value (if it exists) in the given AnalysisRequest, using * the Analyzers of the given field type. * * @param analysisRequest AnalysisRequest from where the index and query values will be taken * @param fieldType Type of field whose analyzers will be used * @param fieldName Name of the field to be analyzed. Can be {@code null} * * @return NamedList containing the tokens produced by the analyzers of the given field, separated into an index and * a query group */ private NamedList<NamedList> analyzeValues(FieldAnalysisRequest analysisRequest, FieldType fieldType, String fieldName) { Set<String> termsToMatch = new HashSet<String>(); String queryValue = analysisRequest.getQuery(); if (queryValue != null && analysisRequest.isShowMatch()) { List<Token> tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer()); for (Token token : tokens) { termsToMatch.add(token.toString()); } } NamedList<NamedList> analyzeResults = new SimpleOrderedMap<NamedList>(); if (analysisRequest.getFieldValue() != null) { AnalysisContext context = new AnalysisContext(fieldName, fieldType, fieldType.getAnalyzer(), termsToMatch); NamedList analyzedTokens = analyzeValue(analysisRequest.getFieldValue(), context); analyzeResults.add("index", analyzedTokens); } if (analysisRequest.getQuery() != null) { AnalysisContext context = new AnalysisContext(fieldName, fieldType, fieldType.getQueryAnalyzer()); NamedList analyzedTokens = analyzeValue(analysisRequest.getQuery(), context); analyzeResults.add("query", analyzedTokens); } return analyzeResults; } }