FieldTypePluginLoader.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.schema;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.Version;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.plugin.AbstractPluginLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import static org.apache.solr.common.params.CommonParams.NAME;

public final class FieldTypePluginLoader 
  extends AbstractPluginLoader<FieldType> {

  private static final String LUCENE_MATCH_VERSION_PARAM
    = IndexSchema.LUCENE_MATCH_VERSION_PARAM;

  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  private final XPath xpath = XPathFactory.newInstance().newXPath();

  /**
   * @param schema The schema that will be used to initialize the FieldTypes
   * @param fieldTypes All FieldTypes that are instantiated by 
   *        this Plugin Loader will be added to this Map
   * @param schemaAware Any SchemaAware objects that are instantiated by 
   *        this Plugin Loader will be added to this collection.
   */
  public FieldTypePluginLoader(final IndexSchema schema,
                               final Map<String, FieldType> fieldTypes,
                               final Collection<SchemaAware> schemaAware) {
    super("[schema.xml] fieldType", FieldType.class, true, true);
    this.schema = schema;
    this.fieldTypes = fieldTypes;
    this.schemaAware = schemaAware;
  }

  private final IndexSchema schema;
  private final Map<String, FieldType> fieldTypes;
  private final Collection<SchemaAware> schemaAware;


  @Override
  protected FieldType create( SolrResourceLoader loader, 
                              String name, 
                              String className, 
                              Node node ) throws Exception {

    FieldType ft = loader.newInstance(className, FieldType.class);
    ft.setTypeName(name);
    
    String expression = "./analyzer[@type='query']";
    Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer queryAnalyzer = readAnalyzer(anode);

    expression = "./analyzer[@type='multiterm']";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer multiAnalyzer = readAnalyzer(anode);

    // An analyzer without a type specified, or with type="index"
    expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    Analyzer analyzer = readAnalyzer(anode);

    // a custom similarity[Factory]
    expression = "./similarity";
    anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
    SimilarityFactory simFactory = IndexSchema.readSimilarity(loader, anode);
    if (null != simFactory) {
      ft.setSimilarity(simFactory);
    }

    if (ft instanceof HasImplicitIndexAnalyzer) {
      ft.setIsExplicitAnalyzer(false);
      if (null != queryAnalyzer && null != analyzer) {
        if (log.isWarnEnabled()) {
          log.warn("Ignoring index-time analyzer for field: " + name);
        }
      } else if (null == queryAnalyzer) { // Accept non-query-time analyzer as a query-time analyzer 
        queryAnalyzer = analyzer;
      }
      if (null != queryAnalyzer) {
        ft.setIsExplicitQueryAnalyzer(true);
        ft.setQueryAnalyzer(queryAnalyzer);
      }
    } else {
      if (null == queryAnalyzer) {
        queryAnalyzer = analyzer;
        ft.setIsExplicitQueryAnalyzer(false);
      } else {
        ft.setIsExplicitQueryAnalyzer(true);
      }
      if (null == analyzer) {
        analyzer = queryAnalyzer;
        ft.setIsExplicitAnalyzer(false);
      } else {
        ft.setIsExplicitAnalyzer(true);
      }
  
      if (null != analyzer) {
        ft.setIndexAnalyzer(analyzer);
        ft.setQueryAnalyzer(queryAnalyzer);
        if (ft instanceof TextField) {
          if (null == multiAnalyzer) {
            multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer);
            ((TextField)ft).setIsExplicitMultiTermAnalyzer(false);
          } else {
            ((TextField)ft).setIsExplicitMultiTermAnalyzer(true);
          }
          ((TextField)ft).setMultiTermAnalyzer(multiAnalyzer);
        }
      }
    }
    if (ft instanceof SchemaAware){
      schemaAware.add((SchemaAware) ft);
    }
    return ft;
  }
  
  @Override
  protected void init(FieldType plugin, Node node) throws Exception {

    Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), NAME);
    plugin.setArgs(schema, params);
  }

  @Override
  protected FieldType register(String name, 
                               FieldType plugin) throws Exception {

    log.trace("fieldtype defined: " + plugin );
    return fieldTypes.put( name, plugin );
  }

  // The point here is that, if no multiterm analyzer was specified in the schema file, do one of several things:
  // 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
  //    lowercase filters and asciifoldingfilter.
  // 2> If legacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
  //    Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")

  private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
    if (queryAnalyzer == null) return null;

    if (!(queryAnalyzer instanceof TokenizerChain)) {
      return new KeywordAnalyzer();
    }

    TokenizerChain tc = (TokenizerChain) queryAnalyzer;
    MultiTermChainBuilder builder = new MultiTermChainBuilder();

    CharFilterFactory[] charFactories = tc.getCharFilterFactories();
    for (CharFilterFactory fact : charFactories) {
      builder.add(fact);
    }

    builder.add(tc.getTokenizerFactory());

    for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
      builder.add(fact);
    }

    return builder.build();
  }

  private static class MultiTermChainBuilder {
    static final KeywordTokenizerFactory keyFactory = new KeywordTokenizerFactory(new HashMap<String,String>());

    ArrayList<CharFilterFactory> charFilters = null;
    ArrayList<TokenFilterFactory> filters = new ArrayList<>(2);
    TokenizerFactory tokenizer = keyFactory;

    public void add(Object current) {
      if (!(current instanceof MultiTermAwareComponent)) return;
      AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent)current).getMultiTermComponent();
      if (newComponent instanceof TokenFilterFactory) {
        if (filters == null) {
          filters = new ArrayList<>(2);
        }
        filters.add((TokenFilterFactory)newComponent);
      } else if (newComponent instanceof TokenizerFactory) {
        tokenizer = (TokenizerFactory)newComponent;
      } else if (newComponent instanceof CharFilterFactory) {
        if (charFilters == null) {
          charFilters = new ArrayList<>(1);
        }
        charFilters.add( (CharFilterFactory)newComponent);

      } else {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
      }
    }

    public TokenizerChain build() {
      CharFilterFactory[] charFilterArr =  charFilters == null ? null : charFilters.toArray(new CharFilterFactory[charFilters.size()]);
      TokenFilterFactory[] filterArr = filters == null ? new TokenFilterFactory[0] : filters.toArray(new TokenFilterFactory[filters.size()]);
      return new TokenizerChain(charFilterArr, tokenizer, filterArr);
    }


  }


  //
  // <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
  //
  //
  private Analyzer readAnalyzer(Node node) throws XPathExpressionException {
                                
    final SolrResourceLoader loader = schema.getResourceLoader();

    // parent node used to be passed in as "fieldtype"
    // if (!fieldtype.hasChildNodes()) return null;
    // Node node = DOMUtil.getChild(fieldtype,"analyzer");
    
    if (node == null) return null;
    NamedNodeMap attrs = node.getAttributes();
    String analyzerName = DOMUtil.getAttr(attrs,"class");

    // check for all of these up front, so we can error if used in 
    // conjunction with an explicit analyzer class.
    NodeList charFilterNodes = (NodeList)xpath.evaluate
      ("./charFilter",  node, XPathConstants.NODESET);
    NodeList tokenizerNodes = (NodeList)xpath.evaluate
      ("./tokenizer", node, XPathConstants.NODESET);
    NodeList tokenFilterNodes = (NodeList)xpath.evaluate
      ("./filter", node, XPathConstants.NODESET);
      
    if (analyzerName != null) {

      // explicitly check for child analysis factories instead of
      // just any child nodes, because the user might have their
      // own custom nodes (ie: <description> or something like that)
      if (0 != charFilterNodes.getLength() ||
          0 != tokenizerNodes.getLength() ||
          0 != tokenFilterNodes.getLength()) {
        throw new SolrException
        ( SolrException.ErrorCode.SERVER_ERROR,
          "Configuration Error: Analyzer class='" + analyzerName +
          "' can not be combined with nested analysis factories");
      }

      try {
        // No need to be core-aware as Analyzers are not in the core-aware list
        final Class<? extends Analyzer> clazz = loader.findClass(analyzerName, Analyzer.class);
        Analyzer analyzer = clazz.newInstance();

        final String matchVersionStr = DOMUtil.getAttr(attrs, LUCENE_MATCH_VERSION_PARAM);
        final Version luceneMatchVersion = (matchVersionStr == null) ?
          schema.getDefaultLuceneMatchVersion() :
          Config.parseLuceneVersionString(matchVersionStr);
        if (luceneMatchVersion == null) {
          throw new SolrException
            ( SolrException.ErrorCode.SERVER_ERROR,
              "Configuration Error: Analyzer '" + clazz.getName() +
              "' needs a 'luceneMatchVersion' parameter");
        }
        analyzer.setVersion(luceneMatchVersion);
        return analyzer;
      } catch (Exception e) {
        log.error("Cannot load analyzer: "+analyzerName, e);
        throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
                                 "Cannot load analyzer: "+analyzerName, e );
      }
    }

    // Load the CharFilters

    final ArrayList<CharFilterFactory> charFilters 
      = new ArrayList<>();
    AbstractPluginLoader<CharFilterFactory> charFilterLoader =
      new AbstractPluginLoader<CharFilterFactory>
      ("[schema.xml] analyzer/charFilter", CharFilterFactory.class, false, false) {

      @Override
      protected CharFilterFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
        final Map<String,String> params = DOMUtil.toMap(node.getAttributes());
        String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
        params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, CharFilterFactory.class.getSimpleName()).toString());
        CharFilterFactory factory = loader.newInstance(className, CharFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
        factory.setExplicitLuceneMatchVersion(null != configuredVersion);
        return factory;
      }

      @Override
      protected void init(CharFilterFactory plugin, Node node) throws Exception {
        if( plugin != null ) {
          charFilters.add( plugin );
        }
      }

      @Override
      protected CharFilterFactory register(String name, 
                                           CharFilterFactory plugin) {
        return null; // used for map registration
      }
    };

    charFilterLoader.load( loader, charFilterNodes );

    // Load the Tokenizer
    // Although an analyzer only allows a single Tokenizer, we load a list to make sure
    // the configuration is ok

    final ArrayList<TokenizerFactory> tokenizers 
      = new ArrayList<>(1);
    AbstractPluginLoader<TokenizerFactory> tokenizerLoader =
      new AbstractPluginLoader<TokenizerFactory>
      ("[schema.xml] analyzer/tokenizer", TokenizerFactory.class, false, false) {
      
      @Override
      protected TokenizerFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
        final Map<String,String> params = DOMUtil.toMap(node.getAttributes());
        String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
        params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenizerFactory.class.getSimpleName()).toString());
        TokenizerFactory factory = loader.newInstance(className, TokenizerFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
        factory.setExplicitLuceneMatchVersion(null != configuredVersion);
        return factory;
      }
      
      @Override
      protected void init(TokenizerFactory plugin, Node node) throws Exception {
        if( !tokenizers.isEmpty() ) {
          throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
              "The schema defines multiple tokenizers for: "+node );
        }
        tokenizers.add( plugin );
      }

      @Override
      protected TokenizerFactory register(String name, TokenizerFactory plugin) {
        return null; // used for map registration
      }
    };

    tokenizerLoader.load( loader, tokenizerNodes );
    
    // Make sure something was loaded
    if( tokenizers.isEmpty() ) {
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,"analyzer without class or tokenizer");
    }

    // Load the Filters

    final ArrayList<TokenFilterFactory> filters 
      = new ArrayList<>();

    AbstractPluginLoader<TokenFilterFactory> filterLoader = 
      new AbstractPluginLoader<TokenFilterFactory>("[schema.xml] analyzer/filter", TokenFilterFactory.class, false, false)
    {
      @Override
      protected TokenFilterFactory create(SolrResourceLoader loader, String name, String className, Node node) throws Exception {
        final Map<String,String> params = DOMUtil.toMap(node.getAttributes());
        String configuredVersion = params.remove(LUCENE_MATCH_VERSION_PARAM);
        params.put(LUCENE_MATCH_VERSION_PARAM, parseConfiguredVersion(configuredVersion, TokenFilterFactory.class.getSimpleName()).toString());
        TokenFilterFactory factory = loader.newInstance
            (className, TokenFilterFactory.class, getDefaultPackages(), new Class[] { Map.class }, new Object[] { params });
        factory.setExplicitLuceneMatchVersion(null != configuredVersion);
        return factory;
      }
      
      @Override
      protected void init(TokenFilterFactory plugin, Node node) throws Exception {
        if( plugin != null ) {
          filters.add( plugin );
        }
      }

      @Override
      protected TokenFilterFactory register(String name, TokenFilterFactory plugin) throws Exception {
        return null; // used for map registration
      }
    };
    filterLoader.load( loader, tokenFilterNodes );
    
    return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]),
                              tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));
  }

  private Version parseConfiguredVersion(String configuredVersion, String pluginClassName) {
    Version version = (configuredVersion != null) ?
            Config.parseLuceneVersionString(configuredVersion) : schema.getDefaultLuceneMatchVersion();

    if (!version.onOrAfter(Version.LUCENE_6_0_0)) {
      log.warn(pluginClassName + " is using deprecated " + version +
        " emulation. You should at some point declare and reindex to at least 6.0, because " +
        "5.x emulation is deprecated and will be removed in 7.0");
    }
    return version;
  }
    
}