SynonymMap.java example

Explorer
solrcene-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis.synonym;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.util.Version;

import java.util.*;

/** Mapping rules for use with {@link SynonymFilter}
 */
public class SynonymMap {
  /** @lucene.internal */
  public CharArrayMap<SynonymMap> submap; // recursive: Map<String, SynonymMap>
  /** @lucene.internal */
  public Token[] synonyms;
  int flags;

  static final int INCLUDE_ORIG=0x01;
  static final int IGNORE_CASE=0x02;

  public SynonymMap() {}
  public SynonymMap(boolean ignoreCase) {
    if (ignoreCase) flags |= IGNORE_CASE;
  }

  public boolean includeOrig() { return (flags & INCLUDE_ORIG) != 0; }
  public boolean ignoreCase() { return (flags & IGNORE_CASE) != 0; }

  /**
   * @param singleMatch  List<String>, the sequence of strings to match
   * @param replacement  List<Token> the list of tokens to use on a match
   * @param includeOrig  sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens
   * @param mergeExisting merge the replacement tokens with any other mappings that exist
   */
  public void add(List<String> singleMatch, List<Token> replacement, boolean includeOrig, boolean mergeExisting) {
    SynonymMap currMap = this;
    for (String str : singleMatch) {
      if (currMap.submap==null) {
        // for now hardcode at 2.9, as its what the old code did.
        // would be nice to fix, but shouldn't store a version in each submap!!!
        currMap.submap = new CharArrayMap<SynonymMap>(Version.LUCENE_29, 1, ignoreCase());
      }

      SynonymMap map = currMap.submap.get(str);
      if (map==null) {
        map = new SynonymMap();
        map.flags |= flags & IGNORE_CASE;
        currMap.submap.put(str, map);
      }

      currMap = map;
    }

    if (currMap.synonyms != null && !mergeExisting) {
      throw new RuntimeException("SynonymFilter: there is already a mapping for " + singleMatch);
    }
    List<Token> superset = currMap.synonyms==null ? replacement :
          mergeTokens(Arrays.asList(currMap.synonyms), replacement);
    currMap.synonyms = (Token[])superset.toArray(new Token[superset.size()]);
    if (includeOrig) currMap.flags |= INCLUDE_ORIG;
  }


  public String toString() {
    StringBuilder sb = new StringBuilder("<");
    if (synonyms!=null) {
      sb.append("[");
      for (int i=0; i<synonyms.length; i++) {
        if (i!=0) sb.append(',');
        sb.append(synonyms[i]);
      }
      if ((flags & INCLUDE_ORIG)!=0) {
        sb.append(",ORIG");
      }
      sb.append("],");
    }
    sb.append(submap);
    sb.append(">");
    return sb.toString();
  }



  /** Produces a List<Token> from a List<String> */
  public static List<Token> makeTokens(List<String> strings) {
    List<Token> ret = new ArrayList<Token>(strings.size());
    for (String str : strings) {
      //Token newTok = new Token(str,0,0,"SYNONYM");
      Token newTok = new Token(str, 0,0,"SYNONYM");
      ret.add(newTok);
    }
    return ret;
  }


  /**
   * Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
   * the tokens end up at the same position.
   *
   * Example:  [a b] merged with [c d] produces [a/b c/d]  ('/' denotes tokens in the same position)
   * Example:  [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2]  (a,n means a has posInc=n)
   *
   */
  public static List<Token> mergeTokens(List<Token> lst1, List<Token> lst2) {
    ArrayList<Token> result = new ArrayList<Token>();
    if (lst1 ==null || lst2 ==null) {
      if (lst2 != null) result.addAll(lst2);
      if (lst1 != null) result.addAll(lst1);
      return result;
    }

    int pos=0;
    Iterator<Token> iter1=lst1.iterator();
    Iterator<Token> iter2=lst2.iterator();
    Token tok1 = iter1.hasNext() ? iter1.next() : null;
    Token tok2 = iter2.hasNext() ? iter2.next() : null;
    int pos1 = tok1!=null ? tok1.getPositionIncrement() : 0;
    int pos2 = tok2!=null ? tok2.getPositionIncrement() : 0;
    while(tok1!=null || tok2!=null) {
      while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
        Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
        tok.copyBuffer(tok1.buffer(), 0, tok1.length());
        tok.setPositionIncrement(pos1-pos);
        result.add(tok);
        pos=pos1;
        tok1 = iter1.hasNext() ? iter1.next() : null;
        pos1 += tok1!=null ? tok1.getPositionIncrement() : 0;
      }
      while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
        Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
        tok.copyBuffer(tok2.buffer(), 0, tok2.length());
        tok.setPositionIncrement(pos2-pos);
        result.add(tok);
        pos=pos2;
        tok2 = iter2.hasNext() ? iter2.next() : null;
        pos2 += tok2!=null ? tok2.getPositionIncrement() : 0;
      }
    }
    return result;
  }

}