BaseSynonymParserTestCase.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.synonym;


import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Util;

/**
 * Base class for testing synonym parsers.
 */
public abstract class BaseSynonymParserTestCase extends BaseTokenStreamTestCase {
  /**
   * Helper method to validate synonym parsing.
   *
   * @param synonynMap  the generated synonym map after parsing
   * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
   *                    All spaces will be replaced by word separators.
   * @param includeOrig if synonyms should include original
   * @param synonyms    actual synonyms. All word separators are replaced with a single space.
   */
  public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms)
      throws Exception {
    word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
    BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
    assertNotNull("No synonyms found for: " + word, value);

    ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
    final int code = bytesReader.readVInt();

    final boolean keepOrig = (code & 0x1) == 0;
    assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
        includeOrig, keepOrig);

    final int count = code >>> 1;
    assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count,
        synonyms.length, count);

    Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));

    BytesRef scratchBytes = new BytesRef();
    for (int i = 0; i < count; i++) {
      synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
      String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
      assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
    }
  }

  /**
   * Validates that there are no synonyms for the given word.
   * @param synonynMap  the generated synonym map after parsing
   * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
   *                    All spaces will be replaced by word separators.
   */
  public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
    word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
    BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
    assertNull("There should be no synonyms for: " + word, value);
  }

  public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String synonym)
      throws Exception {
    assertEntryEquals(synonynMap, word, includeOrig, new String[]{synonym});
  }

  public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, String[] types, int[] posIncrements, int[] posLengths) throws IOException {
    assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths);
  }
}