TestSynonymGraphFilter.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.analysis.synonym;

import java.io.IOException;
import java.io.StringReader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.FlattenGraphFilter;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonTestUtil;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.Util;

public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {

  /** Set as a side effect by {@link #getAnalyzer} and {@link #getFlattenAnalyzer}. */
  private SynonymGraphFilter synFilter;
  private FlattenGraphFilter flattenFilter;

  public void testBasicKeepOrigOneOutput() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b",
                     new String[] {"c", "x", "a", "b"},
                     new int[]    { 0,   2,   2,   4},
                     new int[]    { 1,   5,   3,   5},
                     new String[] {"word", "SYNONYM", "word", "word"},
                     new int[]    { 1,   1,   0,   1},
                     new int[]    { 1,   2,   1,   1});
    a.close();
  }

  public void testMixedKeepOrig() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", true);
    add(b, "e f", "y", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b c e f g",
                     new String[] {"c", "x", "a", "b", "c", "y", "g"},
                     new int[]    { 0,   2,   2,   4,   6,   8,   12},
                     new int[]    { 1,   5,   3,   5,   7,   11,  13},
                     new String[] {"word", "SYNONYM", "word", "word", "word", "SYNONYM", "word"},
                     new int[]    { 1,   1,   0,   1,   1,   1,   1},
                     new int[]    { 1,   2,   1,   1,   1,   1,   1});
    a.close();
  }

  public void testNoParseAfterBuffer() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "b a", "x", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "b b b",
                     new String[] {"b", "b", "b"},
                     new int[]    { 0,   2,   4},
                     new int[]    { 1,   3,   5},
                     new String[] {"word", "word", "word"},
                     new int[]    { 1,   1,   1},
                     new int[]    { 1,   1,   1});
    a.close();
  }

  public void testOneInputMultipleOutputKeepOrig() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", true);
    add(b, "a b", "y", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b c",
                     new String[] {"c", "x", "y",  "a", "b", "c"},
                     new int[]    { 0,   2,   2,   2,   4,   6},
                     new int[]    { 1,   5,   5,   3,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "word", "word", "word"},
                     new int[]    { 1,   1,   0,   0,   1,   1,   1,   1},
                     new int[]    { 1,   2,   2,   1,   1,   1,   1,   1});
    a.close();
  }

  /**
   * Verify type of token and positionLength after analyzer.
   */
  public void testPositionLengthAndTypeSimple() throws Exception {
    String testFile =
        "spider man, spiderman";

    Analyzer analyzer = solrSynsToAnalyzer(testFile);

    assertAnalyzesToPositions(analyzer, "spider man",
        new String[]{"spiderman", "spider", "man"},
        new String[]{"SYNONYM", "word", "word"},
        new int[]{1, 0, 1},
        new int[]{2, 1, 1});
  }

  /**
   * parse a syn file with some escaped syntax chars
   */
  public void testEscapedStuff() throws Exception {
    String testFile =
        "a\\=>a => b\\=>b\n" +
            "a\\,a => b\\,b";
    Analyzer analyzer = solrSynsToAnalyzer(testFile);

    assertAnalyzesTo(analyzer, "ball",
        new String[]{"ball"},
        new int[]{1});

    assertAnalyzesTo(analyzer, "a=>a",
        new String[]{"b=>b"},
        new int[]{1});

    assertAnalyzesTo(analyzer, "a,a",
                     new String[]{"b,b"},
                     new int[]{1});
    analyzer.close();
  }

  /**
   * parse a syn file with bad syntax
   */
  public void testInvalidAnalyzesToNothingOutput() throws Exception {
    String testFile = "a => 1";
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, false);
    SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
    try {
      parser.parse(new StringReader(testFile));
      fail("didn't get expected exception");
    } catch (ParseException expected) {
      // expected exc
    }
    analyzer.close();
  }

  /**
   * parse a syn file with bad syntax
   */
  public void testInvalidDoubleMap() throws Exception {
    String testFile = "a => b => c";
    Analyzer analyzer = new MockAnalyzer(random());
    SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
    try {
      parser.parse(new StringReader(testFile));
      fail("didn't get expected exception");
    } catch (ParseException expected) {
     // expected exc
    }
    analyzer.close();
  }

  /**
   * Tests some simple examples from the solr wiki
   */
  public void testSimple() throws Exception {
    String testFile =
        "i-pod, ipod, ipoooood\n" +
            "foo => foo bar\n" +
            "foo => baz\n" +
            "this test, that testing";

    Analyzer analyzer = solrSynsToAnalyzer(testFile);

    assertAnalyzesTo(analyzer, "ball",
        new String[]{"ball"},
        new int[]{1});

    assertAnalyzesTo(analyzer, "i-pod",
                     new String[]{"ipod", "ipoooood", "i-pod"},
                     new int[]{1, 0, 0});

    assertAnalyzesTo(analyzer, "foo",
        new String[]{"foo", "baz", "bar"},
        new int[]{1, 0, 1});

    assertAnalyzesTo(analyzer, "this test",
        new String[]{"that", "this", "testing", "test"},
        new int[]{1, 0, 1, 0});
    analyzer.close();
  }

  public void testBufferLength() throws Exception {
    String testFile =
        "c => 8 2 5 6 7\n" +
            "f c e d f, 1\n" +
            "c g a f d, 6 5 5\n" +
            "e c => 4\n" +
            "g => 5\n" +
            "a g b f e => 5 0 7 7\n" +
            "b => 1";
    Analyzer analyzer = solrSynsToAnalyzer(testFile);

    String doc = "b c g a f b d";
    String[] expected = new String[]{"1", "8", "2", "5", "6", "7", "5", "a", "f", "1", "d"};
    assertAnalyzesTo(analyzer, doc, expected);
  }

  private Analyzer solrSynsToAnalyzer(String syns) throws IOException, ParseException {
    Analyzer analyzer = new MockAnalyzer(random());
    SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
    parser.parse(new StringReader(syns));
    analyzer.close();
    return getFlattenAnalyzer(parser, true);
  }

  public void testMoreThanOneLookAhead() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b c d", "x", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "a b c e",
                     new String[] {"a", "b", "c", "e"},
                     new int[]    { 0,   2,   4,   6},
                     new int[]    { 1,   3,   5,   7},
                     new String[] {"word", "word", "word", "word"},
                     new int[]    { 1,   1,   1,   1},
                     new int[]    { 1,   1,   1,   1});
    a.close();
  }

  public void testLookaheadAfterParse() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "b b", "x", true);
    add(b, "b", "y", true);

    Analyzer a = getAnalyzer(b, true);

    assertAnalyzesTo(a, "b a b b",
                     new String[] {"y", "b", "a", "x", "b", "b"},
                     new int[]    {0,    0,   2,   4,   4,   6},  
                     new int[]    {1,    1,   3,   7,   5,   7},  
                     null,
                     new int[]    {1,    0,   1,   1,   0,   1},  
                     new int[]    {1,    1,   1,   2,   1,   1},
                     true);
  }

  public void testLookaheadSecondParse() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "b b b", "x", true);
    add(b, "b", "y", true);

    Analyzer a = getAnalyzer(b, true);

    assertAnalyzesTo(a, "b b",
                     new String[] {"y", "b", "y", "b"},
                     new int[]    { 0,   0,   2,   2},  
                     new int[]    { 1,   1,   3,   3},  
                     null,
                     new int[]    { 1,    0,   1,   0},  
                     new int[]    { 1,    1,   1,   1},
                     true);
  }

  public void testOneInputMultipleOutputNoKeepOrig() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", false);
    add(b, "a b", "y", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b c",
                     new String[] {"c", "x", "y", "c"},
                     new int[]    { 0,   2,   2,   6},
                     new int[]    { 1,   5,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "word"},
                     new int[]    { 1,   1,   0,   1},
                     new int[]    { 1,   1,   1,   1});
    a.close();
  }

  public void testOneInputMultipleOutputMixedKeepOrig() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", true);
    add(b, "a b", "y", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b c",
                     new String[] {"c", "x", "y",  "a", "b", "c"},
                     new int[]    { 0,   2,   2,   2,   4,   6},
                     new int[]    { 1,   5,   5,   3,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "word", "word", "word"},
                     new int[]    { 1,   1,   0,   0,   1,   1,   1,   1},
                     new int[]    { 1,   2,   2,   1,   1,   1,   1,   1});
    a.close();
  }

  public void testSynAtEnd() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c d e a b",
                     new String[] {"c", "d", "e", "x", "a", "b"},
                     new int[]    { 0,   2,   4,   6,   6,   8},
                     new int[]    { 1,   3,   5,   9,   7,   9},
                     new String[] {"word", "word", "word", "SYNONYM", "word", "word"},
                     new int[]    { 1,   1,   1,   1,   0,   1},
                     new int[]    { 1,   1,   1,   2,   1,   1});
    a.close();
  }

  public void testTwoSynsInARow() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a", "x", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a a b",
                     new String[] {"c", "x", "x", "b"},
                     new int[]    { 0,   2,   4,   6},
                     new int[]    { 1,   3,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "word"},
                     new int[]    { 1,   1,   1,   1},
                     new int[]    { 1,   1,   1,   1});
    a.close();
  }

  public void testBasicKeepOrigTwoOutputs() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x y", true);
    add(b, "a b", "m n o", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b d",
                     new String[] {"c", "x", "m", "a", "y", "n", "o", "b", "d"},
                     new int[]    { 0,   2,   2,   2,   2,   2,   2,   4,   6},
                     new int[]    { 1,   5,   5,   3,   5,   5,   5,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                     new int[]    { 1,   1,   0,   0,   1,   1,   1,   1,   1},
                     new int[]    { 1,   1,   2,   4,   4,   1,   2,   1,   1});
    a.close();
  }

  public void testNoCaptureIfNoMatch() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x y", true);

    Analyzer a = getAnalyzer(b, true);

    assertAnalyzesTo(a,
                     "c d d",
                     new String[] {"c", "d", "d"},
                     new int[]    { 0,   2,   4},
                     new int[]    { 1,   3,   5},
                     new String[] {"word", "word", "word"},
                     new int[]    { 1,   1,   1},
                     new int[]    { 1,   1,   1});
    assertEquals(0, synFilter.getCaptureCount());
    a.close();
  }

  public void testBasicNotKeepOrigOneOutput() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b",
                     new String[] {"c", "x"},
                     new int[] {0, 2},
                     new int[] {1, 5},
                     new String[] {"word", "SYNONYM"},
                     new int[] {1, 1},
                     new int[] {1, 1});
    a.close();
  }

  public void testBasicNoKeepOrigTwoOutputs() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x y", false);
    add(b, "a b", "m n o", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b d",
                     new String[] {"c", "x", "m", "y", "n", "o", "d"},
                     new int[]    { 0,   2,   2,   2,   2,   2,   6},
                     new int[]    { 1,   5,   5,   5,   5,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"},
                     new int[]    { 1,   1,   0,   1,   1,   1,   1},
                     new int[]    { 1,   1,   2,   3,   1,   1,   1});
    a.close();
  }

  public void testIgnoreCase() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x y", false);
    add(b, "a b", "m n o", false);
    
    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c A B D",
                     new String[] {"c", "x", "m", "y", "n", "o", "D"},
                     new int[]    { 0,   2,   2,   2,   2,   2,   6},
                     new int[]    { 1,   5,   5,   5,   5,   5,   7},
                     new String[] {"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"},
                     new int[]    { 1,   1,   0,   1,   1,   1,   1},
                     new int[]    { 1,   1,   2,   3,   1,   1,   1});
    a.close();
  }

  public void testDoNotIgnoreCase() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x y", false);
    add(b, "a b", "m n o", false);
    
    Analyzer a = getAnalyzer(b, false);
    assertAnalyzesTo(a,
                     "c A B D",
                     new String[] {"c", "A", "B", "D"},
                     new int[]    { 0,   2,   4,   6},
                     new int[]    { 1,   3,   5,   7},
                     new String[] {"word", "word", "word", "word"},
                     new int[]    { 1,   1,   1,   1},
                     new int[]    { 1,   1,   1,   1});
    a.close();
  }

  public void testBufferedFinish1() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b c", "m n o", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a b",
                     new String[] {"c", "a", "b"},
                     new int[]    { 0,   2,   4},
                     new int[]    { 1,   3,   5},
                     new String[] {"word", "word", "word"},
                     new int[]    { 1,   1,   1},
                     new int[]    { 1,   1,   1});
    a.close();
  }

  public void testBufferedFinish2() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "m n o", false);
    add(b, "d e", "m n o", false);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "c a d",
                     new String[] {"c", "a", "d"},
                     new int[]    { 0,   2,   4},
                     new int[]    { 1,   3,   5},
                     new String[] {"word", "word", "word"},
                     new int[]    { 1,   1,   1},
                     new int[]    { 1,   1,   1});
    a.close();
  }

  public void testCanReuse() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b", "x", true);
    Analyzer a = getAnalyzer(b, true);
    for(int i=0;i<10;i++) {
      assertAnalyzesTo(a,
                       "c a b",
                       new String[] {"c", "x", "a", "b"},
                       new int[]    { 0,   2,   2,   4},
                       new int[]    { 1,   5,   3,   5},
                       new String[] {"word", "SYNONYM", "word", "word"},
                       new int[]    { 1,   1,   0,   1},
                       new int[]    { 1,   2,   1,   1});
    }
    a.close();
  }

  /** Multiple input tokens map to a single output token */
  public void testManyToOne() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b c", "z", true);

    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "a b c d",
                     new String[] {"z", "a", "b", "c", "d"},
                     new int[]    { 0,   0,   2,   4,   6},
                     new int[]    { 5,   1,   3,   5,   7},
                     new String[] {"SYNONYM", "word", "word", "word", "word"},
                     new int[]    { 1,   0,   1,   1,   1},
                     new int[]    { 3,   1,   1,   1,   1});
    a.close();
  }
  
  public void testBufferAfterMatch() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "a b c d", "x", true);
    add(b, "a b", "y", false);

    // The 'c' token has to be buffered because SynGraphFilter
    // needs to know whether a b c d -> x matches:
    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a,
                     "f a b c e",
                     new String[] {"f", "y", "c", "e"},
                     new int[]    { 0,   2,   6,   8},
                     new int[]    { 1,   5,   7,   9},
                     new String[] {"word", "SYNONYM", "word", "word"},
                     new int[]    { 1,   1,   1,   1},
                     new int[]    { 1,   1,   1,   1});
    a.close();
  }

  public void testZeroSyns() throws Exception {
    Tokenizer tokenizer = new MockTokenizer();
    tokenizer.setReader(new StringReader("aa bb"));
    try {
      new SynonymGraphFilter(tokenizer, new SynonymMap.Builder(true).build(), true);
      fail("did not hit expected exception");
    } catch (IllegalArgumentException iae) {
      // expected
      assertEquals("fst must be non-null", iae.getMessage());
    }
  }

  public void testOutputHangsOffEnd() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    // b hangs off the end (no input token under it):
    add(b, "a", "a b", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);
    assertAnalyzesTo(a, "a",
                     new String[] {"a", "b"},
                     new int[]    { 0,   0},  
                     new int[]    { 1,   1},
                     null,
                     new int[]    { 1,   1},
                     new int[]    { 1,   1},
                     true);
    a.close();
  }

  public void testDedup() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "a b",
        new String[]{"ab"},
        new int[]{1});
    a.close();
  }

  public void testNoDedup() throws Exception {
    // dedup is false:
    SynonymMap.Builder b = new SynonymMap.Builder(false);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "a b",
        new String[]{"ab", "ab", "ab"},
        new int[]{1, 0, 0});
    a.close();
  }

  public void testMatching() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a c", "ac", keepOrig);
    add(b, "a", "aa", keepOrig);
    add(b, "b", "bb", keepOrig);
    add(b, "z x c v", "zxcv", keepOrig);
    add(b, "x c", "xc", keepOrig);

    Analyzer a = getFlattenAnalyzer(b, true);

    checkOneTerm(a, "$", "$");
    checkOneTerm(a, "a", "aa");
    checkOneTerm(a, "b", "bb");

    assertAnalyzesTo(a, "a $",
        new String[]{"aa", "$"},
        new int[]{1, 1});

    assertAnalyzesTo(a, "$ a",
        new String[]{"$", "aa"},
        new int[]{1, 1});

    assertAnalyzesTo(a, "a a",
        new String[]{"aa", "aa"},
        new int[]{1, 1});

    assertAnalyzesTo(a, "z x c v",
        new String[]{"zxcv"},
        new int[]{1});

    assertAnalyzesTo(a, "z x c $",
        new String[]{"z", "xc", "$"},
        new int[]{1, 1, 1});
    a.close();
  }

  public void testBasic1() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    add(b, "a", "foo", true);
    add(b, "a b", "bar fee", true);
    add(b, "b c", "dog collar", true);
    add(b, "c d", "dog harness holder extras", true);
    add(b, "m c e", "dog barks loudly", false);
    add(b, "i j k", "feep", true);

    add(b, "e f", "foo bar", false);
    add(b, "e f", "baz bee", false);

    add(b, "z", "boo", false);
    add(b, "y", "bee", true);
    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "a b c",
                     new String[] {"bar", "a", "fee", "b", "c"},
                     new int[] {1, 0, 1, 0, 1});

    assertAnalyzesTo(a, "x a b c d",
                     new String[] {"x", "bar", "a", "fee", "b", "dog", "c", "harness", "d", "holder", "extras"},
                     new int[] {1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1});

    assertAnalyzesTo(a, "a b a",
                     new String[] {"bar", "a", "fee", "b", "foo", "a"},
                     new int[] {1, 0, 1, 0, 1, 0});

    // outputs no longer add to one another:
    assertAnalyzesTo(a, "c d c d",
                     new String[] {"dog", "c", "harness", "d", "holder", "extras", "dog", "c", "harness", "d", "holder", "extras"},
                     new int[] {1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1});

    // two outputs for same input
    assertAnalyzesTo(a, "e f",
                     new String[] {"foo", "baz", "bar", "bee"},
                     new int[] {1, 0, 1, 0});

    // verify multi-word / single-output offsets:
    assertAnalyzesTo(a, "g i j k g",
                     new String[] {"g", "feep", "i", "j", "k", "g"},
                     new int[] {1, 1, 0, 1, 1, 1});

    // mixed keepOrig true/false:
    assertAnalyzesTo(a, "a m c e x",
                     new String[] {"foo", "a", "dog", "barks", "loudly", "x"},
                     new int[] {1, 0, 1, 1, 1, 1});
    assertAnalyzesTo(a, "c d m c e x",
                     new String[] {"dog", "c", "harness", "d", "holder", "extras", "dog", "barks", "loudly","x"},
                     new int[] {1, 0, 1, 0, 1, 1, 1, 1, 1, 1});
    assertTrue(synFilter.getCaptureCount() > 0);

    // no captureStates when no syns matched
    assertAnalyzesTo(a, "p q r s t",
                     new String[] {"p", "q", "r", "s", "t"},
                     new int[] {1, 1, 1, 1, 1});
    assertEquals(0, synFilter.getCaptureCount());

    // captureStates are necessary for the single-token syn case:
    assertAnalyzesTo(a, "p q z y t",
                     new String[] {"p", "q", "boo", "bee", "y", "t"},
                     new int[] {1, 1, 1, 1, 0, 1});
    assertTrue(synFilter.getCaptureCount() > 0);
  }

  public void testBasic2() throws Exception {
    boolean keepOrig = true;
    do {
      keepOrig = !keepOrig;

      SynonymMap.Builder b = new SynonymMap.Builder(true);
      add(b,"aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
      add(b, "bbb", "bbbb1 bbbb2", keepOrig);
      Analyzer a = getFlattenAnalyzer(b, true);

      if (keepOrig) {
        assertAnalyzesTo(a, "xyzzy bbb pot of gold",
                         new String[] {"xyzzy", "bbbb1", "bbb", "bbbb2", "pot", "of", "gold"},
                         new int[] {1, 1, 0, 1, 1, 1, 1});
        assertAnalyzesTo(a, "xyzzy aaa pot of gold",
                         new String[] {"xyzzy", "aaaa1", "aaa", "aaaa2", "aaaa2", "pot", "of", "gold"},
                         new int[] {1, 1, 0, 1, 1, 1, 1, 1});
      } else {
        assertAnalyzesTo(a, "xyzzy bbb pot of gold",
                         new String[] {"xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold"},
                         new int[] {1, 1, 1, 1, 1, 1});
        assertAnalyzesTo(a, "xyzzy aaa pot of gold",
                         new String[] {"xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold"},
                         new int[] {1, 1, 1, 1, 1, 1, 1});
      }
    } while (keepOrig);
  }

  /** If we expand synonyms during indexing, it's a bit better than
   *  SynonymFilter is today, but still necessarily has false
   *  positive and negative PhraseQuery matches because we do not  
   *  index posLength, so we lose information. */
  public void testFlattenedGraph() throws Exception {

    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "wtf", "what the fudge", true);

    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "wtf happened",
                     new String[] {"what", "wtf", "the", "fudge", "happened"},
                     new int[]    {    0,     0,      0,     0,       4},  
                     new int[]    {    3,     3,      3,     3,       12},
                     null,
                     new int[]    {    1,     0,      1,     1,       1},
                     new int[]    {    1,     3,      1,     1,       1},
                     true);

    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
    Document doc = new Document();
    doc.add(newTextField("field", "wtf happened", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

    IndexSearcher s = newSearcher(r);

    // Good (this should not match, and doesn't):
    assertEquals(0, s.count(new PhraseQuery("field", "what", "happened")));

    // Bad (this should match, but doesn't):
    assertEquals(0, s.count(new PhraseQuery("field", "wtf", "happened")));

    // Good (this should match, and does):
    assertEquals(1, s.count(new PhraseQuery("field", "what", "the", "fudge", "happened")));

    // Bad (this should not match, but does):
    assertEquals(1, s.count(new PhraseQuery("field", "wtf", "the")));

    IOUtils.close(r, dir);
  }

  // Needs TermAutomatonQuery, which is in sandbox still:
  /*
  public void testAccurateGraphQuery1() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("field", "wtf happened", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

    IndexSearcher s = newSearcher(r);

    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "what the fudge", "wtf", true);

    SynonymMap map = b.build();

    TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();

    TokenStream in = new CannedTokenStream(0, 23, new Token[] {
        token("what", 1, 1, 0, 4),
        token("the", 1, 1, 5, 8),
        token("fudge", 1, 1, 9, 14),
        token("happened", 1, 1, 15, 23),
      });

    assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    in = new CannedTokenStream(0, 12, new Token[] {
        token("wtf", 1, 1, 0, 3),
        token("happened", 1, 1, 4, 12),
      });

    assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    // "what happened" should NOT match:
    in = new CannedTokenStream(0, 13, new Token[] {
        token("what", 1, 1, 0, 4),
        token("happened", 1, 1, 5, 13),
      });
    assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    IOUtils.close(r, dir);
  }
  */

  /** If we expand synonyms at search time, the results are correct. */
  // Needs TermAutomatonQuery, which is in sandbox still:
  /*
  public void testAccurateGraphQuery2() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("field", "say wtf happened", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

    IndexSearcher s = newSearcher(r);

    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "what the fudge", "wtf", true);

    SynonymMap map = b.build();

    TokenStream in = new CannedTokenStream(0, 26, new Token[] {
        token("say", 1, 1, 0, 3),
        token("what", 1, 1, 3, 7),
        token("the", 1, 1, 8, 11),
        token("fudge", 1, 1, 12, 17),
        token("happened", 1, 1, 18, 26),
      });

    TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();

    assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    // "what happened" should NOT match:
    in = new CannedTokenStream(0, 13, new Token[] {
        token("what", 1, 1, 0, 4),
        token("happened", 1, 1, 5, 13),
      });
    assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    IOUtils.close(r, dir);
  }
  */

  // Needs TermAutomatonQuery, which is in sandbox still:
  /*
  public void testAccurateGraphQuery3() throws Exception {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
    Document doc = new Document();
    doc.add(newTextField("field", "say what the fudge happened", Field.Store.NO));
    w.addDocument(doc);
    IndexReader r = w.getReader();
    w.close();

    IndexSearcher s = newSearcher(r);

    SynonymMap.Builder b = new SynonymMap.Builder();
    add(b, "wtf", "what the fudge", true);

    SynonymMap map = b.build();

    TokenStream in = new CannedTokenStream(0, 15, new Token[] {
        token("say", 1, 1, 0, 3),
        token("wtf", 1, 1, 3, 6),
        token("happened", 1, 1, 7, 15),
      });

    TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();

    assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    // "what happened" should NOT match:
    in = new CannedTokenStream(0, 13, new Token[] {
        token("what", 1, 1, 0, 4),
        token("happened", 1, 1, 5, 13),
      });
    assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

    IOUtils.close(r, dir);
  }

  private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {
    final Token t = new Token(term, startOffset, endOffset);
    t.setPositionIncrement(posInc);
    t.setPositionLength(posLength);
    return t;
  }
  */

  private String randomNonEmptyString() {
    while(true) {
      String s = TestUtil.randomUnicodeString(random()).trim();
      //String s = TestUtil.randomSimpleString(random()).trim();
      if (s.length() != 0 && s.indexOf('\u0000') == -1) {
        return s;
      }
    }
  }

  // Adds MockGraphTokenFilter after SynFilter:
  public void testRandomGraphAfter() throws Exception {
    final int numIters = atLeast(3);
    for (int i = 0; i < numIters; i++) {
      SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
      final int numEntries = atLeast(10);
      for (int j = 0; j < numEntries; j++) {
        add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
      }
      final SynonymMap map = b.build();
      final boolean ignoreCase = random().nextBoolean();
      final boolean doFlatten = random().nextBoolean();
      
      final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
          TokenStream syns = new SynonymGraphFilter(tokenizer, map, ignoreCase);
          TokenStream graph = new MockGraphTokenFilter(random(), syns);
          if (doFlatten) {
            graph = new FlattenGraphFilter(graph);
          }
          return new TokenStreamComponents(tokenizer, graph);
        }
      };

      checkRandomData(random(), analyzer, 100);
      analyzer.close();
    }
  }

  public void testEmptyStringInput() throws IOException {
    final int numIters = atLeast(10);
    for (int i = 0; i < numIters; i++) {
      SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
      final int numEntries = atLeast(10);
      for (int j = 0; j < numEntries; j++) {
        add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
      }
      final boolean ignoreCase = random().nextBoolean();

      Analyzer analyzer = getAnalyzer(b, ignoreCase);

      checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), "");
      analyzer.close();
    }
  }

  /** simple random test, doesn't verify correctness.
   *  does verify it doesnt throw exceptions, or that the stream doesn't misbehave
   */
  public void testRandom2() throws Exception {
    final int numIters = atLeast(3);
    for (int i = 0; i < numIters; i++) {
      SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
      final int numEntries = atLeast(10);
      for (int j = 0; j < numEntries; j++) {
        add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
      }
      final boolean ignoreCase = random().nextBoolean();
      final boolean doFlatten = random().nextBoolean();

      Analyzer analyzer;
      if (doFlatten) {
        analyzer = getFlattenAnalyzer(b, ignoreCase);
      } else {
        analyzer = getAnalyzer(b, ignoreCase);
      }

      checkRandomData(random(), analyzer, 100);
      analyzer.close();
    }
  }

  /** simple random test like testRandom2, but for larger docs
   */
  public void testRandomHuge() throws Exception {
    final int numIters = atLeast(3);
    for (int i = 0; i < numIters; i++) {
      SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
      final int numEntries = atLeast(10);
      if (VERBOSE) {
        System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
      }
      for (int j = 0; j < numEntries; j++) {
        add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
      }
      final boolean ignoreCase = random().nextBoolean();
      final boolean doFlatten = random().nextBoolean();
      
      Analyzer analyzer;
      if (doFlatten) {
        analyzer = getFlattenAnalyzer(b, ignoreCase);
      } else {
        analyzer = getAnalyzer(b, ignoreCase);
      }

      checkRandomData(random(), analyzer, 100, 1024);
      analyzer.close();
    }
  }

  public void testEmptyTerm() throws IOException {
    final int numIters = atLeast(10);
    for (int i = 0; i < numIters; i++) {
      SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
      final int numEntries = atLeast(10);
      for (int j = 0; j < numEntries; j++) {
        add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
      }
      final boolean ignoreCase = random().nextBoolean();
      
      final Analyzer analyzer = getAnalyzer(b, ignoreCase);

      checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), "");
      analyzer.close();
    }
  }

  // LUCENE-3375
  public void testVanishingTermsNoFlatten() throws Exception {
    String testFile = 
      "aaa => aaaa1 aaaa2 aaaa3\n" + 
      "bbb => bbbb1 bbbb2\n";
    Analyzer analyzer = solrSynsToAnalyzer(testFile);

    assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold",
                     new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" });
    
    // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
    assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold",
                     new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" });
    analyzer.close();
  }

  // LUCENE-3375
  public void testVanishingTermsWithFlatten() throws Exception {
    String testFile = 
      "aaa => aaaa1 aaaa2 aaaa3\n" + 
      "bbb => bbbb1 bbbb2\n";
      
    Analyzer analyzer = solrSynsToAnalyzer(testFile);
    
    assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold",
                     new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" });
    
    // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
    assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold",
                     new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" });
    analyzer.close();
  }

  public void testBuilderDedup() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    Analyzer a = getAnalyzer(b, true);

    assertAnalyzesTo(a, "a b",
        new String[] { "ab" },
        new int[] { 1 });
    a.close();
  }

  public void testBuilderNoDedup() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(false);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    Analyzer a = getAnalyzer(b, true);

    assertAnalyzesTo(a, "a b",
        new String[] { "ab", "ab", "ab" },
        new int[] { 1, 0, 0 });
    a.close();
  }

  public void testRecursion1() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    add(b, "zoo", "zoo", keepOrig);
    Analyzer a = getAnalyzer(b, true);
    
    assertAnalyzesTo(a, "zoo zoo $ zoo",
        new String[] { "zoo", "zoo", "$", "zoo" },
        new int[] { 1, 1, 1, 1 });
    a.close();
  }

  public void testRecursion2() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    add(b, "zoo", "zoo", keepOrig);
    add(b, "zoo", "zoo zoo", keepOrig);
    Analyzer a = getAnalyzer(b, true);

    // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo");
    assertAnalyzesTo(a, "zoo zoo $ zoo",
                     new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" },
        new int[] { 1, 0, 1, 1, 0, 1, 1, 1, 0, 1 });
    a.close();
  }

  public void testRecursion3() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = true;
    add(b, "zoo zoo", "zoo", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "zoo zoo $ zoo",
       new String[]{"zoo", "zoo", "zoo", "$", "zoo"},
        new int[]{1, 0, 1, 1, 1});
    a.close();
  }

  public void testRecursion4() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = true;
    add(b, "zoo zoo", "zoo", keepOrig);
    add(b, "zoo", "zoo zoo", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);
    assertAnalyzesTo(a, "zoo zoo $ zoo",
        new String[]{"zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"},
       new int[]{1, 0, 1, 1, 1, 0, 1});
    a.close();
  }

  public void testKeepOrig() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = true;
    add(b, "a b", "ab", keepOrig);
    add(b, "a c", "ac", keepOrig);
    add(b, "a", "aa", keepOrig);
    add(b, "b", "bb", keepOrig);
    add(b, "z x c v", "zxcv", keepOrig);
    add(b, "x c", "xc", keepOrig);
    Analyzer a = getAnalyzer(b, true);
    
    assertAnalyzesTo(a, "$", 
        new String[] { "$" },
        new int[] { 1 });
    assertAnalyzesTo(a, "a", 
        new String[] { "aa", "a" },
        new int[] { 1, 0 });
    assertAnalyzesTo(a, "a", 
        new String[] { "aa", "a" },
        new int[] { 1, 0 });
    assertAnalyzesTo(a, "$ a", 
        new String[] { "$", "aa", "a" },
        new int[] { 1, 1, 0 });
    assertAnalyzesTo(a, "a $", 
        new String[] { "aa", "a", "$" },
        new int[] { 1, 0, 1 });
    assertAnalyzesTo(a, "$ a !", 
        new String[] { "$", "aa", "a", "!" },
        new int[] { 1, 1, 0, 1 });
    assertAnalyzesTo(a, "a a", 
        new String[] { "aa", "a", "aa", "a" },
        new int[] { 1, 0, 1, 0 });
    assertAnalyzesTo(a, "b", 
        new String[] { "bb", "b" },
        new int[] { 1, 0 });
    assertAnalyzesTo(a, "z x c v",
        new String[] { "zxcv", "z", "x", "c", "v" },
        new int[] { 1, 0, 1, 1, 1 });
    assertAnalyzesTo(a, "z x c $",
        new String[] { "z", "xc", "x", "c", "$" },
        new int[] { 1, 1, 0, 1, 1 });
    a.close();
  }

  /**
   * verify type of token and positionLengths on synonyms of different word counts, with non preserving, explicit rules.
   */
  public void testNonPreservingMultiwordSynonyms() throws Exception {
    String testFile =
      "aaa => two words\n" +
      "bbb => one two, very many multiple words\n" +
      "ee ff, gg, h i j k, h i => one\n" +
      "cc dd => usa,united states,u s a,united states of america";

    Analyzer analyzer = solrSynsToAnalyzer(testFile);

    assertAnalyzesTo(analyzer, "aaa",
        new String[]{"two", "words"},
        new int[]{0, 0},
        new int[]{3, 3},
        new String[]{"SYNONYM", "SYNONYM"},
        new int[]{1, 1},
        new int[]{1, 1});

    assertAnalyzesToPositions(analyzer, "amazing aaa",
        new String[]{"amazing", "two", "words"},
        new String[]{"word", "SYNONYM", "SYNONYM"},
        new int[]{1, 1, 1},
        new int[]{1, 1, 1});

    assertAnalyzesTo(analyzer, "p bbb s",
        new String[]{"p", "one", "very", "two", "many", "multiple", "words", "s"},
        new int[]{0, 2, 2, 2, 2, 2, 2, 6},
        new int[]{1, 5, 5, 5, 5, 5, 5, 7},
        new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"},
        new int[]{1, 1, 0, 1, 0, 1, 1, 1},
        new int[]{1, 1, 1, 3, 1, 1, 1, 1});

    assertAnalyzesTo(analyzer, "p ee ff s",
        new String[]{"p", "one", "s"},
        new int[]{0, 2, 8},
        new int[]{1, 7, 9},
        new String[]{"word", "SYNONYM", "word"},
        new int[]{1, 1, 1},
        new int[]{1, 1, 1});

    assertAnalyzesTo(analyzer, "p h i j s",
        new String[]{"p", "one", "j", "s"},
        new int[]{0, 2, 6, 8},
        new int[]{1, 5, 7, 9},
        new String[]{"word", "SYNONYM", "word", "word"},
        new int[]{1, 1, 1, 1},
        new int[]{1, 1, 1, 1});

    analyzer.close();
  }

  private Analyzer getAnalyzer(SynonymMap.Builder b, final boolean ignoreCase) throws IOException {
    final SynonymMap map = b.build();
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
          // Make a local variable so testRandomHuge doesn't share it across threads!
          SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase);
          TestSynonymGraphFilter.this.flattenFilter = null;
          TestSynonymGraphFilter.this.synFilter = synFilter;
          return new TokenStreamComponents(tokenizer, synFilter);
        }
      };
  }

  /** Appends FlattenGraphFilter too */
  private Analyzer getFlattenAnalyzer(SynonymMap.Builder b, boolean ignoreCase) throws IOException {
    final SynonymMap map = b.build();
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
          // Make a local variable so testRandomHuge doesn't share it across threads!
          SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase);
          FlattenGraphFilter flattenFilter = new FlattenGraphFilter(synFilter);
          TestSynonymGraphFilter.this.synFilter = synFilter;
          TestSynonymGraphFilter.this.flattenFilter = flattenFilter;
          return new TokenStreamComponents(tokenizer, flattenFilter);
        }
      };
  }

  private void add(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
    if (VERBOSE) {
      //System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);
    }
    CharsRefBuilder inputCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(input.split(" +"), inputCharsRef);

    CharsRefBuilder outputCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(output.split(" +"), outputCharsRef);

    b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
  }

  private char[] randomBinaryChars(int minLen, int maxLen, double bias, char base) {
    int len = TestUtil.nextInt(random(), minLen, maxLen);
    char[] chars = new char[len];
    for(int i=0;i<len;i++) {
      char ch;
      if (random().nextDouble() < bias) {
        ch = base;
      } else {
        ch = (char) (base+1);
      }
      chars[i] = ch;
    }

    return chars;
  }

  private static String toTokenString(char[] chars) {
    StringBuilder b = new StringBuilder();
    for(char c : chars) {
      if (b.length() > 0) {
        b.append(' ');
      }
      b.append(c);
    }
    return b.toString();
  }

  private static class OneSyn {
    char[] in;
    char[] out;
    boolean keepOrig;

    @Override
    public String toString() {
      return toTokenString(in) + " --> " + toTokenString(out) + " (keepOrig=" + keepOrig + ")";
    }
  }

  public void testRandomSyns() throws Exception {
    int synCount = atLeast(10);
    double bias = random().nextDouble();
    boolean dedup = random().nextBoolean();

    boolean flatten = random().nextBoolean();

    SynonymMap.Builder b = new SynonymMap.Builder(dedup);
    List<OneSyn> syns = new ArrayList<>();
    // Makes random syns from random a / b tokens, mapping to random x / y tokens
    if (VERBOSE) {
      System.out.println("TEST: make " + synCount + " syns");
      System.out.println("  bias for a over b=" + bias);
      System.out.println("  dedup=" + dedup);
      System.out.println("  flatten=" + flatten);
    }

    int maxSynLength = 0;

    for(int i=0;i<synCount;i++) {
      OneSyn syn = new OneSyn();
      syn.in = randomBinaryChars(1, 5, bias, 'a');
      syn.out = randomBinaryChars(1, 5, 0.5, 'x');
      syn.keepOrig = random().nextBoolean();
      syns.add(syn);

      maxSynLength = Math.max(maxSynLength, syn.in.length);

      if (VERBOSE) {
        System.out.println("  " + syn);
      }
      add(b, toTokenString(syn.in), toTokenString(syn.out), syn.keepOrig);
    }

    // Compute max allowed lookahead for flatten filter:
    int maxFlattenLookahead = 0;
    if (flatten) {
      for(int i=0;i<synCount;i++) {
        OneSyn syn1 = syns.get(i);
        int count = syn1.out.length;
        boolean keepOrig = syn1.keepOrig;
        for(int j=0;j<synCount;j++) {
          OneSyn syn2 = syns.get(i);
          keepOrig |= syn2.keepOrig;
          if (syn1.in.equals(syn2.in)) {
            count += syn2.out.length;
          }
        }

        if (keepOrig) {
          count += syn1.in.length;
        }

        maxFlattenLookahead = Math.max(maxFlattenLookahead, count);
      }
    }

    // Only used w/ VERBOSE:
    Analyzer aNoFlattened;
    if (VERBOSE) {
      aNoFlattened = getAnalyzer(b, true);
    } else {
      aNoFlattened = null;
    }

    Analyzer a;
    if (flatten) {
      a = getFlattenAnalyzer(b, true);
    } else {
      a = getAnalyzer(b, true);
    }

    int iters = atLeast(20);
    for(int iter=0;iter<iters;iter++) {

      String doc = toTokenString(randomBinaryChars(50, 100, bias, 'a'));
      //String doc = toTokenString(randomBinaryChars(10, 50, bias, 'a'));

      if (VERBOSE) {
        System.out.println("TEST: iter="+  iter + " doc=" + doc);
      }
      Automaton expected = slowSynFilter(doc, syns, flatten);
      if (VERBOSE) {
        System.out.println("  expected:\n" + expected.toDot());
        if (flatten) {
          Automaton unflattened = toAutomaton(aNoFlattened.tokenStream("field", new StringReader(doc)));
          System.out.println("  actual unflattened:\n" + unflattened.toDot());
        }
      }
      Automaton actual = toAutomaton(a.tokenStream("field", new StringReader(doc)));
      if (VERBOSE) {
        System.out.println("  actual:\n" + actual.toDot());
      }

      assertTrue("maxLookaheadUsed=" + synFilter.getMaxLookaheadUsed() + " maxSynLength=" + maxSynLength,
                 synFilter.getMaxLookaheadUsed() <= maxSynLength);
      if (flatten) {
        assertTrue("flatten maxLookaheadUsed=" + flattenFilter.getMaxLookaheadUsed() + " maxFlattenLookahead=" + maxFlattenLookahead,
                   flattenFilter.getMaxLookaheadUsed() <= maxFlattenLookahead);
      }

      checkAnalysisConsistency(random(), a, random().nextBoolean(), doc);
      // We can easily have a non-deterministic automaton at this point, e.g. if
      // more than one syn matched at given point, or if the syn mapped to an
      // output token that also happens to be in the input:
      try {
        actual = Operations.determinize(actual, 50000);
      } catch (TooComplexToDeterminizeException tctde) {
        // Unfortunately the syns can easily create difficult-to-determinize graphs:
        assertTrue(approxEquals(actual, expected));
        continue;
      }

      try {
        expected = Operations.determinize(expected, 50000);
      } catch (TooComplexToDeterminizeException tctde) {
        // Unfortunately the syns can easily create difficult-to-determinize graphs:
        assertTrue(approxEquals(actual, expected));
        continue;
      }

      assertTrue(approxEquals(actual, expected));
      assertTrue(Operations.sameLanguage(actual, expected));
    }

    a.close();
  }

  /** Only used when true equality is too costly to check! */
  private boolean approxEquals(Automaton actual, Automaton expected) {
    // Don't collapse these into one line else the thread stack won't say which direction failed!:
    boolean b1 = approxSubsetOf(actual, expected);
    boolean b2 = approxSubsetOf(expected, actual);
    return b1 && b2;
  }

  private boolean approxSubsetOf(Automaton a1, Automaton a2) {
    AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(a1);
    for(int i=0;i<2000;i++) {
      int[] ints = ras.getRandomAcceptedString(random());
      IntsRef path = new IntsRef(ints, 0, ints.length);
      if (accepts(a2, path) == false) {
        throw new RuntimeException("a2 does not accept " + path);
      }
    }

    // Presumed true
    return true;
  }

  /** Like {@link Operations#run} except the incoming automaton is allowed to be non-deterministic. */
  private static boolean accepts(Automaton a, IntsRef path) {
    Set<Integer> states = new HashSet<>();
    states.add(0);
    Transition t = new Transition();
    for(int i=0;i<path.length;i++) {
      int digit = path.ints[path.offset+i];
      Set<Integer> nextStates = new HashSet<>();
      for(int state : states) {
        int count = a.initTransition(state, t);
        for(int j=0;j<count;j++) {
          a.getNextTransition(t);
          if (digit >= t.min && digit <= t.max) {
            nextStates.add(t.dest);
          }
        }
      }
      states = nextStates;
      if (states.isEmpty()) {
        return false;
      }
    }

    for(int state : states) {
      if (a.isAccept(state)) {
        return true;
      }
    }

    return false;
  }

  /** Stupid, slow brute-force, yet hopefully bug-free, synonym filter. */
  private Automaton slowSynFilter(String doc, List<OneSyn> syns, boolean flatten) {
    String[] tokens = doc.split(" +");
    if (VERBOSE) {
      System.out.println("  doc has " + tokens.length + " tokens");
    }
    int i=0;
    Automaton.Builder a = new Automaton.Builder();
    int lastState = a.createState();
    while (i<tokens.length) {
      // Consider all possible syn matches starting at this point:
      assert tokens[i].length() == 1;
      if (VERBOSE) {
        System.out.println("    i=" + i);
      }

      List<OneSyn> matches = new ArrayList<>();
      for(OneSyn syn : syns) {
        if (i + syn.in.length <= tokens.length) {
          boolean match = true;
          for(int j=0;j<syn.in.length;j++) {
            if (tokens[i+j].charAt(0) != syn.in[j]) {
              match = false;
              break;
            }
          }

          if (match) {
            if (matches.isEmpty() == false) {
              if (syn.in.length < matches.get(0).in.length) {
                // Greedy matching: we already found longer syns matching here
                continue;
              } else if (syn.in.length > matches.get(0).in.length) {
                // Greedy matching: all previous matches were shorter, so we drop them
                matches.clear();
              } else {
                // Keep the current matches: we allow multiple synonyms matching the same input string
              }
            }

            matches.add(syn);
          }
        }
      }

      int nextState = a.createState();

      if (matches.isEmpty() == false) {
        // We have match(es) starting at this token
        if (VERBOSE) {
          System.out.println("  matches @ i=" + i + ": " + matches);
        }
        // We keepOrig if any of the matches said to:
        boolean keepOrig = false;
        for(OneSyn syn : matches) {
          keepOrig |= syn.keepOrig;
        }

        List<Integer> flatStates;
        if (flatten) {
          flatStates = new ArrayList<>();
        } else {
          flatStates = null;
        }

        if (keepOrig) {
          // Add path for the original tokens
          addSidePath(a, lastState, nextState, matches.get(0).in, flatStates);
        }

        for(OneSyn syn : matches) {
          addSidePath(a, lastState, nextState, syn.out, flatStates);
        }

        i += matches.get(0).in.length;
      } else {
        a.addTransition(lastState, nextState, tokens[i].charAt(0));
        i++;
      }

      lastState = nextState;
    }

    a.setAccept(lastState, true);

    return topoSort(a.finish());
  }

  /** Just creates a side path from startState to endState with the provided tokens. */
  private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List<Integer> flatStates) {
    int lastState = startState;
    for(int i=0;i<tokens.length;i++) {
      int nextState;
      if (i == tokens.length-1) {
        nextState = endState;
      } else if (flatStates == null || i >= flatStates.size()) {
        nextState = a.createState();
        if (flatStates != null) {
          assert i == flatStates.size();
          flatStates.add(nextState);
        }
      } else {
        nextState = flatStates.get(i);
      }
      a.addTransition(lastState, nextState, tokens[i]);

      lastState = nextState;
    }
  }

  private Automaton toAutomaton(TokenStream ts) throws IOException {
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    ts.reset();
    Automaton a = new Automaton();
    int srcNode = -1;
    int destNode = -1;
    int state = a.createState();
    while (ts.incrementToken()) {
      assert termAtt.length() == 1;
      char c = termAtt.charAt(0);
      int posInc = posIncAtt.getPositionIncrement();
      if (posInc != 0) {
        srcNode += posInc;
        while (state < srcNode) {
          state = a.createState();
        }
      }
      destNode = srcNode + posLenAtt.getPositionLength();
      while (state < destNode) {
        state = a.createState();
      }
      a.addTransition(srcNode, destNode, c);
    }
    ts.end();
    ts.close();
    a.finishState();
    a.setAccept(destNode, true);
    return a;
  }

  /*
  private String toDot(TokenStream ts) throws IOException {
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
    PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    ts.reset();
    int srcNode = -1;
    int destNode = -1;

    StringBuilder b = new StringBuilder();
    b.append("digraph Automaton {\n");
    b.append("  rankdir = LR\n");
    b.append("  node [width=0.2, height=0.2, fontsize=8]\n");
    b.append("  initial [shape=plaintext,label=\"\"]\n");
    b.append("  initial -> 0\n");

    while (ts.incrementToken()) {
      int posInc = posIncAtt.getPositionIncrement();
      if (posInc != 0) {
        srcNode += posInc;
        b.append("  ");
        b.append(srcNode);
        b.append(" [shape=circle,label=\"" + srcNode + "\"]\n");
      }
      destNode = srcNode + posLenAtt.getPositionLength();
      b.append("  ");
      b.append(srcNode);
      b.append(" -> ");
      b.append(destNode);
      b.append(" [label=\"");
      b.append(termAtt);
      b.append("\"");
      if (typeAtt.type().equals("word") == false) {
        b.append(" color=red");
      }
      b.append("]\n");
    }
    ts.end();
    ts.close();

    b.append('}');
    return b.toString();
  }
  */

  /** Renumbers nodes according to their topo sort */
  private Automaton topoSort(Automaton in) {
    int[] newToOld = Operations.topoSortStates(in);
    int[] oldToNew = new int[newToOld.length];

    Automaton.Builder a = new Automaton.Builder();
    //System.out.println("remap:");
    for(int i=0;i<newToOld.length;i++) {
      a.createState();
      oldToNew[newToOld[i]] = i;
      //System.out.println("  " + newToOld[i] + " -> " + i);
      if (in.isAccept(newToOld[i])) {
        a.setAccept(i, true);
        //System.out.println("    **");
      }
    }

    Transition t = new Transition();
    for(int i=0;i<newToOld.length;i++) {
      int count = in.initTransition(newToOld[i], t);
      for(int j=0;j<count;j++) {
        in.getNextTransition(t);
        a.addTransition(i, oldToNew[t.dest], t.min, t.max);
      }
    }

    return a.finish();
  }

  /**
   * verify type of token and positionLengths on synonyms of different word counts.
   */
  public void testPositionLengthAndType() throws Exception {
    String testFile =
        "spider man, spiderman\n" +
        "usa,united states,u s a,united states of america";
    Analyzer analyzer = new MockAnalyzer(random());
    SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);

    parser.parse(new StringReader(testFile));
    analyzer.close();

    SynonymMap map = parser.build();
    analyzer = getFlattenAnalyzer(parser, true);

    BytesRef value = Util.get(map.fst, Util.toUTF32(new CharsRef("usa"), new IntsRefBuilder()));
    ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
    final int code = bytesReader.readVInt();
    final int count = code >>> 1;

    final int[] synonymsIdxs = new int[count];
    for (int i = 0; i < count; i++) {
      synonymsIdxs[i] = bytesReader.readVInt();
    }

    BytesRef scratchBytes = new BytesRef();
    map.words.get(synonymsIdxs[2], scratchBytes);

    int synonymLength = 1;
    for (int i = scratchBytes.offset; i < scratchBytes.offset + scratchBytes.length; i++) {
      if (scratchBytes.bytes[i] == SynonymMap.WORD_SEPARATOR) {
        synonymLength++;
      }
    }

    assertEquals(count, 3);
    assertEquals(synonymLength, 4);

    assertAnalyzesTo(analyzer, "spider man",
                     new String[]{"spiderman", "spider", "man"},
                     new int[]{0, 0, 7},
                     new int[]{10, 6, 10},
                     new String[]{"SYNONYM", "word", "word"},
                     new int[]{1, 0, 1},
                     new int[]{2, 1, 1});

    assertAnalyzesToPositions(analyzer, "amazing spider man",
                              new String[]{"amazing", "spiderman", "spider", "man"},
                              new String[]{"word", "SYNONYM", "word", "word"},
                              new int[]{1, 1, 0, 1},
                              new int[]{1, 2, 1, 1});

    // System.out.println(toDot(getAnalyzer(parser, true).tokenStream("field", new StringReader("the usa is wealthy"))));

    assertAnalyzesTo(analyzer, "the united states of america is wealthy",
                     new String[]{"the", "usa", "united", "u", "united", "states", "s", "states", "a", "of", "america", "is", "wealthy"},
                     new int[]      {0,     4,        4,   4,        4,       11,  11,       11,   18,  18,        21,   29,        32},
                     new int[]      {3,    28,       10,  10,       10,       28,  17,       17,   28,  20,        28,   31,        39},
                     new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "word", "word", "word", "word"},
                     new int[]      {1,     1,        0,   0,        0,        1,   0,        0,    1,   0,         1,    1,         1},
                     new int[]      {1,     4,        1,   1,        1,        3,   1,        1,    2,   1,         1,    1,         1});

    assertAnalyzesToPositions(analyzer, "spiderman",
                              new String[]{"spider", "spiderman", "man"},
                              new String[]{"SYNONYM", "word", "SYNONYM"},
                              new int[]{1, 0, 1},
                              new int[]{1, 2, 1});

    assertAnalyzesTo(analyzer, "spiderman enemies",
                     new String[]{"spider", "spiderman", "man", "enemies"},
                     new int[]{0, 0, 0, 10},
                     new int[]{9, 9, 9, 17},
                     new String[]{"SYNONYM", "word", "SYNONYM", "word"},
                     new int[]{1, 0, 1, 1},
                     new int[]{1, 2, 1, 1});

    assertAnalyzesTo(analyzer, "the usa is wealthy",
                     new String[]{"the", "united", "u", "united", "usa", "states", "s", "states", "a", "of", "america", "is", "wealthy"},
                     new int[]      {0,        4,   4,        4,     4,        4,   4,        4,   4,    4,         4,    8,        11},
                     new int[]      {3,        7,   7,        7,     7,        7,   7,        7,   7,    7,         7,   10,        18},
                     new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                     new int[]      {1,        1,   0,        0,     0,        1,   0,        0,   1,    0,         1,    1,         1},
                     new int[]      {1,        1,   1,        1,     4,        3,   1,        1,   2,    1,         1,    1,         1});
    
    assertGraphStrings(analyzer, "the usa is wealthy", new String[] {
        "the usa is wealthy",
        "the united states is wealthy",
        "the u s a is wealthy",
        "the united states of america is wealthy",
        // Wrong. Here only due to "sausagization" of the multi word synonyms.
        "the u states is wealthy",
        "the u states a is wealthy",
        "the u s of america is wealthy",
        "the u states of america is wealthy",
        "the united s a is wealthy",
        "the united states a is wealthy",
        "the united s of america is wealthy"});

    assertAnalyzesTo(analyzer, "the united states is wealthy",
                     new String[]{"the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "is", "wealthy"},
                     new int[]      {0,     4,   4,        4,        4,  11,       11,       11,  11,   11,        11,   18,        21},
                     new int[]      {3,    17,  10,       10,       10,  17,       17,       17,  17,   17,        17,   20,        28},
                     new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                     new int[]      {1,     1,   0,        0,        0,   1,        0,        0,   1,    0,         1,    1,         1},
                     new int[]      {1,     4,   1,        1,        1,   1,        1,        3,   2,    1,         1,    1,         1},
                     false);

    assertAnalyzesTo(analyzer, "the united states of balance",
                     new String[]{"the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "of", "balance"},
                     new int[]      {0,     4,   4,        4,        4,  11,       11,       11,  11,   11,        11,   18,        21},
                     new int[]      {3,    17,  10,       10,       10,  17,       17,       17,  17,   17,        17,   20,        28},
                     new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                     new int[]      {1,     1,   0,        0,        0,   1,        0,        0,   1,    0,         1,    1,         1},
                     new int[]      {1,     4,   1,        1,        1,   1,        1,        3,   2,    1,         1,    1,         1});

    analyzer.close();
  }

  public void testMultiwordOffsets() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = true;
    add(b, "national hockey league", "nhl", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "national hockey league",
        new String[]{"nhl", "national", "hockey", "league"},
        new int[]{0, 0, 9, 16},
        new int[]{22, 8, 15, 22},
        new int[]{1, 0, 1, 1});
    a.close();
  }

  public void testIncludeOrig() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = true;
    add(b, "a b", "ab", keepOrig);
    add(b, "a c", "ac", keepOrig);
    add(b, "a", "aa", keepOrig);
    add(b, "b", "bb", keepOrig);
    add(b, "z x c v", "zxcv", keepOrig);
    add(b, "x c", "xc", keepOrig);

    Analyzer a = getFlattenAnalyzer(b, true);

    assertAnalyzesTo(a, "$",
        new String[]{"$"},
        new int[]{1});
    assertAnalyzesTo(a, "a",
        new String[]{"aa", "a"},
        new int[]{1, 0});
    assertAnalyzesTo(a, "a",
        new String[]{"aa", "a"},
        new int[]{1, 0});
    assertAnalyzesTo(a, "$ a",
        new String[]{"$", "aa", "a"},
        new int[]{1, 1, 0});
    assertAnalyzesTo(a, "a $",
        new String[]{"aa", "a", "$"},
        new int[]{1, 0, 1});
    assertAnalyzesTo(a, "$ a !",
        new String[]{"$", "aa", "a", "!"},
        new int[]{1, 1, 0, 1});
    assertAnalyzesTo(a, "a a",
        new String[]{"aa", "a", "aa", "a"},
        new int[]{1, 0, 1, 0});
    assertAnalyzesTo(a, "b",
        new String[]{"bb", "b"},
        new int[]{1, 0});
    assertAnalyzesTo(a, "z x c v",
        new String[]{"zxcv", "z", "x", "c", "v"},
        new int[]{1, 0, 1, 1, 1});
    assertAnalyzesTo(a, "z x c $",
        new String[]{"z", "xc", "x", "c", "$"},
        new int[]{1, 1, 0, 1, 1});
    a.close();
  }

  public void testUpperCase() throws IOException {
    assertMapping("word", "synonym");
    assertMapping("word".toUpperCase(Locale.ROOT), "synonym");
  }

  private void assertMapping(String inputString, String outputString) throws IOException {
    SynonymMap.Builder builder = new SynonymMap.Builder(false);
    // the rules must be lowercased up front, but the incoming tokens will be case insensitive:
    CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
    CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
    builder.add(input, output, true);
    Analyzer analyzer = new CustomAnalyzer(builder.build());
    TokenStream tokenStream = analyzer.tokenStream("field", inputString);
    assertTokenStreamContents(tokenStream, new String[]{
        outputString, inputString
      });
  }

  static class CustomAnalyzer extends Analyzer {
    private SynonymMap synonymMap;

    CustomAnalyzer(SynonymMap synonymMap) {
      this.synonymMap = synonymMap;
    }

    @Override
    protected TokenStreamComponents createComponents(String s) {
      Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
      TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True
      return new TokenStreamComponents(tokenizer, tokenStream);
    }
  }
}