AnalysisResponseBase.java example

Explorer
solrcene-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.client.solrj.response;

import org.apache.solr.common.util.NamedList;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

/**
 * A base class for all analysis responses.
 *
 * @version $Id: AnalysisResponseBase.java 767412 2009-04-22 08:53:28Z shalin $
 * @since solr 1.4
 */
public class AnalysisResponseBase extends SolrResponseBase {

  /**
   * Parses the given named list and builds a list of analysis phases form it. Expects a named list of the form:
   * <p/>
   * <pre><code>
   *  <lst name="index">
   *      <arr name="Tokenizer">
   *          <str name="text">the_text</str>
   *          <str name="rawText">the_raw_text</str> (optional)
   *          <str name="type">the_type</str>
   *          <int name="start">1</str>
   *          <int name="end">3</str>
   *          <int name="position">1</str>
   *          <bool name="match">true | false</bool> (optional)
   *      </arr>
   *      <arr name="Filter1">
   *          <str name="text">the_text</str>
   *          <str name="rawText">the_raw_text</str> (optional)
   *          <str name="type">the_type</str>
   *          <int name="start">1</str>
   *          <int name="end">3</str>
   *          <int name="position">1</str>
   *          <bool name="match">true | false</bool> (optional)
   *      </arr>
   *      ...
   *  </lst>
   * </code></pre>
   *
   * @param phaseNL The names list to parse.
   *
   * @return The built analysis phases list.
   */
  protected List<AnalysisPhase> buildPhases(NamedList<Object> phaseNL) {
    List<AnalysisPhase> phases = new ArrayList<AnalysisPhase>(phaseNL.size());
    for (Map.Entry<String, Object> phaseEntry : phaseNL) {
      AnalysisPhase phase = new AnalysisPhase(phaseEntry.getKey());
      List<NamedList> tokens = (List<NamedList>) phaseEntry.getValue();
      for (NamedList token : tokens) {
        TokenInfo tokenInfo = buildTokenInfo(token);
        phase.addTokenInfo(tokenInfo);
      }
      phases.add(phase);
    }
    return phases;
  }

  /**
   * Parses the given named list and builds a token infoform it. Expects a named list of the form:
   * <p/>
   * <pre><code>
   *  <arr name="Tokenizer">
   *      <str name="text">the_text</str>
   *      <str name="rawText">the_raw_text</str> (optional)
   *      <str name="type">the_type</str>
   *      <int name="start">1</str>
   *      <int name="end">3</str>
   *      <int name="position">1</str>
   *      <bool name="match">true | false</bool> (optional)
   *  </arr>
   * </code></pre>
   *
   * @param tokenNL The named list to parse.
   *
   * @return The built token info.
   */
  protected TokenInfo buildTokenInfo(NamedList tokenNL) {
    String text = (String) tokenNL.get("text");
    String rawText = (String) tokenNL.get("rawText");
    String type = (String) tokenNL.get("type");
    int start = (Integer) tokenNL.get("start");
    int end = (Integer) tokenNL.get("end");
    int position = (Integer) tokenNL.get("position");
    Boolean match = (Boolean) tokenNL.get("match");
    return new TokenInfo(text, rawText, type, start, end, position, (match == null ? false : match));
  }


  //================================================= Inner Classes ==================================================

  /**
   * A phase in the analysis process. The phase holds the tokens produced in this phase and the name of the class that
   * produced them.
   */
  public static class AnalysisPhase {

    private final String className;
    private List<TokenInfo> tokens = new ArrayList<TokenInfo>();

    AnalysisPhase(String className) {
      this.className = className;
    }

    /**
     * The name of the class (analyzer, tokenzier, or filter) that produced the token stream for this phase.
     *
     * @return The name of the class that produced the token stream for this phase.
     */
    public String getClassName() {
      return className;
    }

    private void addTokenInfo(TokenInfo tokenInfo) {
      tokens.add(tokenInfo);
    }

    /**
     * Returns a list of tokens which represent the token stream produced in this phase.
     *
     * @return A list of tokens which represent the token stream produced in this phase.
     */
    public List<TokenInfo> getTokens() {
      return tokens;
    }

  }

  /**
   * Holds all information of a token as part of an analysis phase.
   */
  public static class TokenInfo {

    private final String text;
    private final String rawText;
    private final String type;
    private final int start;
    private final int end;
    private final int position;
    private final boolean match;

    /**
     * Constructs a new TokenInfo.
     *
     * @param text     The text of the token
     * @param rawText  The raw text of the token. If the token is stored in the index in a special format (e.g.
     *                 dates or padded numbers) this argument should hold this value. If the token is stored as is,
     *                 then this value should be {@code null}.
     * @param type     The type fo the token (typically either {@code word} or {@code <ALPHANUM>} though it depends
     *                 on the tokenizer/filter used).
     * @param start    The start position of the token in the original text where it was extracted from.
     * @param end      The end position of the token in the original text where it was extracted from.
     * @param position The position of the token within the token stream.
     * @param match    Indicates whether this token matches one of the the query tokens.
     */
    TokenInfo(String text, String rawText, String type, int start, int end, int position, boolean match) {
      this.text = text;
      this.rawText = rawText;
      this.type = type;
      this.start = start;
      this.end = end;
      this.position = position;
      this.match = match;
    }

    /**
     * Returns the text of the token.
     *
     * @return The text of the token.
     */
    public String getText() {
      return text;
    }

    /**
     * Returns the raw text of the token. If the token is index in a special format (e.g. date or paddded numbers)
     * it will be returned as the raw text. Returns {@code null} if the token is indexed as is.
     *
     * @return Returns the raw text of the token.
     */
    public String getRawText() {
      return rawText;
    }

    /**
     * Returns the type of the token. Typically this will be {@code word} or {@code <ALPHANUM>}, but it really
     * depends on the tokenizer and filters that are used.
     *
     * @return The type of the token.
     */
    public String getType() {
      return type;
    }

    /**
     * Returns the start position of this token within the text it was originally extracted from.
     *
     * @return The start position of this token within the text it was originally extracted from.
     */
    public int getStart() {
      return start;
    }

    /**
     * Returns the end position of this token within the text it was originally extracted from.
     *
     * @return The end position of this token within the text it was originally extracted from.
     */
    public int getEnd() {
      return end;
    }

    /**
     * Returns the position of this token within the produced token stream.
     *
     * @return The position of this token within the produced token stream.
     */
    public int getPosition() {
      return position;
    }

    /**
     * Returns whether this token matches one of the query tokens (if query analysis is performed).
     *
     * @return Whether this token matches one of the query tokens (if query analysis is performed).
     */
    public boolean isMatch() {
      return match;
    }
  }

}