/* * Licensed under the Apache License, Version 2.0 (the "License"); * * You may not use this file except in compliance with the License. * * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * * See the License for the specific language governing permissions and * limitations under the License. * * Contributions from 2013-2017 where performed either by US government * employees, or under US Veterans Health Administration contracts. * * US Veterans Health Administration contributions by government employees * are work of the U.S. Government and are not subject to copyright * protection in the United States. Portions contributed by government * employees are USGovWork (17USC ยง105). Not subject to copyright. * * Contribution by contractors to the US Veterans Health Administration * during this period are contractually contributed under the * Apache License, Version 2.0. * * See: https://www.usa.gov/government-works * * Contributions prior to 2013: * * Copyright (C) International Health Terminology Standards Development Organisation. * Licensed under the Apache License, Version 2.0. * */ package sh.isaac.provider.query.lucene; //~--- non-JDK imports -------------------------------------------------------- import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; //~--- classes ---------------------------------------------------------------- /** * {@link PerFieldAnalyzer} * An analyzer that indexes everything with the {@link StandardAnalyzer} unless the field ends * with the string "_wsa" - in which case, it is indexed with the {@link WhitespaceAnalyzer} and * a {@link LowerCaseFilter} * * This can be enhanced in the future to properly handle other languages as well. * * @author <a href="mailto:daniel.armbrust.list@gmail.com">Dan Armbrust</a> */ public class PerFieldAnalyzer extends AnalyzerWrapper { /** The Constant WHITE_SPACE_FIELD_MARKER. */ public static final String WHITE_SPACE_FIELD_MARKER = "_wsa"; //~--- fields -------------------------------------------------------------- /** The wa. */ private final WhitespaceAnalyzer wa = new WhitespaceAnalyzer(); /** The sa. */ private final StandardAnalyzer sa = new StandardAnalyzer(); //~--- constructors -------------------------------------------------------- /** * Instantiates a new per field analyzer. */ public PerFieldAnalyzer() { super(Analyzer.PER_FIELD_REUSE_STRATEGY); } //~--- methods ------------------------------------------------------------- /** * To string. * * @return the string */ @Override public String toString() { return "PerFieldAnalyzer(default=" + this.sa + ", fields ending with '" + WHITE_SPACE_FIELD_MARKER + "': " + this.wa + ")"; } /** * Wrap components. * * @param fieldName the field name * @param components the components * @return the token stream components * @see org.apache.lucene.analysis.AnalyzerWrapper#wrapComponents(java.lang.String, org.apache.lucene.analysis.Analyzer.TokenStreamComponents) */ @Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { if (fieldName.endsWith(WHITE_SPACE_FIELD_MARKER)) { return new TokenStreamComponents(components.getTokenizer(), new LowerCaseFilter(components.getTokenStream())); } else { return components; } } //~--- get methods --------------------------------------------------------- /** * Gets the wrapped analyzer. * * @param fieldName the field name * @return the wrapped analyzer * @see org.apache.lucene.analysis.AnalyzerWrapper#getWrappedAnalyzer(java.lang.String) */ @Override protected Analyzer getWrappedAnalyzer(String fieldName) { if (fieldName.endsWith(WHITE_SPACE_FIELD_MARKER)) { return this.wa; } else { return this.sa; } } }