/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.client.solrj.response; import org.apache.solr.common.util.NamedList; import java.util.ArrayList; import java.util.List; import java.util.Map; /** * A base class for all analysis responses. * * @version $Id: AnalysisResponseBase.java 767412 2009-04-22 08:53:28Z shalin $ * @since solr 1.4 */ public class AnalysisResponseBase extends SolrResponseBase { /** * Parses the given named list and builds a list of analysis phases form it. Expects a named list of the form: * <p/> * <pre><code> * <lst name="index"> * <arr name="Tokenizer"> * <str name="text">the_text</str> * <str name="rawText">the_raw_text</str> (optional) * <str name="type">the_type</str> * <int name="start">1</str> * <int name="end">3</str> * <int name="position">1</str> * <bool name="match">true | false</bool> (optional) * </arr> * <arr name="Filter1"> * <str name="text">the_text</str> * <str name="rawText">the_raw_text</str> (optional) * <str name="type">the_type</str> * <int name="start">1</str> * <int name="end">3</str> * <int name="position">1</str> * <bool name="match">true | false</bool> (optional) * </arr> * ... * </lst> * </code></pre> * * @param phaseNL The names list to parse. * * @return The built analysis phases list. */ protected List<AnalysisPhase> buildPhases(NamedList<Object> phaseNL) { List<AnalysisPhase> phases = new ArrayList<AnalysisPhase>(phaseNL.size()); for (Map.Entry<String, Object> phaseEntry : phaseNL) { AnalysisPhase phase = new AnalysisPhase(phaseEntry.getKey()); List<NamedList> tokens = (List<NamedList>) phaseEntry.getValue(); for (NamedList token : tokens) { TokenInfo tokenInfo = buildTokenInfo(token); phase.addTokenInfo(tokenInfo); } phases.add(phase); } return phases; } /** * Parses the given named list and builds a token infoform it. Expects a named list of the form: * <p/> * <pre><code> * <arr name="Tokenizer"> * <str name="text">the_text</str> * <str name="rawText">the_raw_text</str> (optional) * <str name="type">the_type</str> * <int name="start">1</str> * <int name="end">3</str> * <int name="position">1</str> * <bool name="match">true | false</bool> (optional) * </arr> * </code></pre> * * @param tokenNL The named list to parse. * * @return The built token info. */ protected TokenInfo buildTokenInfo(NamedList tokenNL) { String text = (String) tokenNL.get("text"); String rawText = (String) tokenNL.get("rawText"); String type = (String) tokenNL.get("type"); int start = (Integer) tokenNL.get("start"); int end = (Integer) tokenNL.get("end"); int position = (Integer) tokenNL.get("position"); Boolean match = (Boolean) tokenNL.get("match"); return new TokenInfo(text, rawText, type, start, end, position, (match == null ? false : match)); } //================================================= Inner Classes ================================================== /** * A phase in the analysis process. The phase holds the tokens produced in this phase and the name of the class that * produced them. */ public static class AnalysisPhase { private final String className; private List<TokenInfo> tokens = new ArrayList<TokenInfo>(); AnalysisPhase(String className) { this.className = className; } /** * The name of the class (analyzer, tokenzier, or filter) that produced the token stream for this phase. * * @return The name of the class that produced the token stream for this phase. */ public String getClassName() { return className; } private void addTokenInfo(TokenInfo tokenInfo) { tokens.add(tokenInfo); } /** * Returns a list of tokens which represent the token stream produced in this phase. * * @return A list of tokens which represent the token stream produced in this phase. */ public List<TokenInfo> getTokens() { return tokens; } } /** * Holds all information of a token as part of an analysis phase. */ public static class TokenInfo { private final String text; private final String rawText; private final String type; private final int start; private final int end; private final int position; private final boolean match; /** * Constructs a new TokenInfo. * * @param text The text of the token * @param rawText The raw text of the token. If the token is stored in the index in a special format (e.g. * dates or padded numbers) this argument should hold this value. If the token is stored as is, * then this value should be {@code null}. * @param type The type fo the token (typically either {@code word} or {@code <ALPHANUM>} though it depends * on the tokenizer/filter used). * @param start The start position of the token in the original text where it was extracted from. * @param end The end position of the token in the original text where it was extracted from. * @param position The position of the token within the token stream. * @param match Indicates whether this token matches one of the the query tokens. */ TokenInfo(String text, String rawText, String type, int start, int end, int position, boolean match) { this.text = text; this.rawText = rawText; this.type = type; this.start = start; this.end = end; this.position = position; this.match = match; } /** * Returns the text of the token. * * @return The text of the token. */ public String getText() { return text; } /** * Returns the raw text of the token. If the token is index in a special format (e.g. date or paddded numbers) * it will be returned as the raw text. Returns {@code null} if the token is indexed as is. * * @return Returns the raw text of the token. */ public String getRawText() { return rawText; } /** * Returns the type of the token. Typically this will be {@code word} or {@code <ALPHANUM>}, but it really * depends on the tokenizer and filters that are used. * * @return The type of the token. */ public String getType() { return type; } /** * Returns the start position of this token within the text it was originally extracted from. * * @return The start position of this token within the text it was originally extracted from. */ public int getStart() { return start; } /** * Returns the end position of this token within the text it was originally extracted from. * * @return The end position of this token within the text it was originally extracted from. */ public int getEnd() { return end; } /** * Returns the position of this token within the produced token stream. * * @return The position of this token within the produced token stream. */ public int getPosition() { return position; } /** * Returns whether this token matches one of the query tokens (if query analysis is performed). * * @return Whether this token matches one of the query tokens (if query analysis is performed). */ public boolean isMatch() { return match; } } }