IndexRepositoryImplPerformanceTest.java example

Explorer
geode-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package org.apache.geode.cache.lucene.internal.repository;

import org.apache.geode.DataSerializable;
import org.apache.geode.DataSerializer;
import org.apache.geode.cache.*;
import org.apache.geode.cache.asyncqueue.AsyncEventQueue;
import org.apache.geode.cache.lucene.*;
import org.apache.geode.cache.lucene.internal.LuceneIndexStats;
import org.apache.geode.cache.lucene.internal.LuceneServiceImpl;
import org.apache.geode.cache.lucene.internal.directory.RegionDirectory;
import org.apache.geode.cache.lucene.internal.distributed.TopEntriesCollector;
import org.apache.geode.cache.lucene.internal.filesystem.ChunkKey;
import org.apache.geode.cache.lucene.internal.filesystem.File;
import org.apache.geode.cache.lucene.internal.filesystem.FileSystemStats;
import org.apache.geode.cache.lucene.internal.repository.serializer.HeterogeneousLuceneSerializer;
import org.apache.geode.test.junit.categories.PerformanceTest;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.RAMDirectory;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;

import static org.apache.geode.distributed.ConfigurationProperties.*;


/**
 * Microbenchmark of the IndexRepository to compare an IndexRepository built on top of cache with a
 * stock lucene IndexWriter with a RAMDirectory.
 */
@Category(PerformanceTest.class)
@Ignore("Tests have no assertions")
public class IndexRepositoryImplPerformanceTest {

  private static final int NUM_WORDS = 1000;
  private static int[] COMMIT_INTERVAL = new int[] {100, 1000, 5000};
  private static int NUM_ENTRIES = 500_000;
  private static int NUM_QUERIES = 500_000;

  private StandardAnalyzer analyzer = new StandardAnalyzer();

  @Test
  public void testIndexRepository() throws Exception {


    doTest("IndexRepository", new TestCallbacks() {

      private Cache cache;
      private IndexRepositoryImpl repo;
      private IndexWriter writer;

      @Override
      public void addObject(String key, String text) throws Exception {
        repo.create(key, new TestObject(text));
      }

      @Override
      public void commit() throws Exception {
        repo.commit();
      }

      @Override
      public void init() throws Exception {
        cache = new CacheFactory().set(MCAST_PORT, "0").set(LOG_LEVEL, "error").create();
        Region<String, File> fileRegion =
            cache.<String, File>createRegionFactory(RegionShortcut.REPLICATE).create("files");
        Region<ChunkKey, byte[]> chunkRegion =
            cache.<ChunkKey, byte[]>createRegionFactory(RegionShortcut.REPLICATE).create("chunks");

        RegionDirectory dir = new RegionDirectory(fileRegion, chunkRegion,
            new FileSystemStats(cache.getDistributedSystem(), "region-index"));
        final LuceneIndexStats stats =
            new LuceneIndexStats(cache.getDistributedSystem(), "region-index");


        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        writer = new IndexWriter(dir, config);
        String[] indexedFields = new String[] {"text"};
        HeterogeneousLuceneSerializer mapper = new HeterogeneousLuceneSerializer(indexedFields);
        repo = new IndexRepositoryImpl(fileRegion, writer, mapper, stats, null);
      }

      @Override
      public void cleanup() throws IOException {
        writer.close();
        cache.close();
      }

      @Override
      public void waitForAsync() throws Exception {
        // do nothing
      }

      @Override
      public int query(Query query) throws IOException {
        TopEntriesCollector collector = new TopEntriesCollector();
        repo.query(query, 100, collector);
        return collector.size();
      }
    });
  }

  /**
   * Test our full lucene index implementation
   * 
   * @throws Exception
   */
  @Test
  public void testLuceneIndex() throws Exception {


    doTest("LuceneIndex", new TestCallbacks() {

      private Cache cache;
      private Region<String, TestObject> region;
      private LuceneService service;

      @Override
      public void addObject(String key, String text) throws Exception {
        region.create(key, new TestObject(text));
      }

      @Override
      public void commit() throws Exception {
        // NA
      }

      @Override
      public void init() throws Exception {
        cache = new CacheFactory().set(MCAST_PORT, "0").set(LOG_LEVEL, "warning").create();
        service = LuceneServiceProvider.get(cache);
        service.createIndex("index", "/region", "text");
        region =
            cache.<String, TestObject>createRegionFactory(RegionShortcut.PARTITION)
                .setPartitionAttributes(
                    new PartitionAttributesFactory<>().setTotalNumBuckets(1).create())
                .create("region");
      }

      @Override
      public void cleanup() throws IOException {
        cache.close();
      }

      @Override
      public void waitForAsync() throws Exception {
        AsyncEventQueue aeq =
            cache.getAsyncEventQueue(LuceneServiceImpl.getUniqueIndexName("index", "/region"));

        // We will be at most 10 ms off
        while (aeq.size() > 0) {
          Thread.sleep(10);
        }
      }

      @Override
      public int query(final Query query) throws Exception {
        LuceneQuery<Object, Object> luceneQuery = service.createLuceneQueryFactory().create("index",
            "/region", new LuceneQueryProvider() {

              @Override
              public Query getQuery(LuceneIndex index) throws LuceneQueryException {
                return query;
              }
            });

        PageableLuceneQueryResults<Object, Object> results = luceneQuery.findPages();
        return results.size();
      }
    });
  }

  @Test
  public void testLuceneWithRegionDirectory() throws Exception {
    doTest("RegionDirectory", new TestCallbacks() {

      public Cache cache;
      private IndexWriter writer;
      private SearcherManager searcherManager;

      @Override
      public void init() throws Exception {
        cache = new CacheFactory().set(MCAST_PORT, "0").set(LOG_LEVEL, "warning").create();
        final FileSystemStats stats = new FileSystemStats(cache.getDistributedSystem(), "stats");
        RegionDirectory dir = new RegionDirectory(new ConcurrentHashMap<String, File>(),
            new ConcurrentHashMap<ChunkKey, byte[]>(), stats);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        writer = new IndexWriter(dir, config);
        searcherManager = new SearcherManager(writer, true, true, null);
      }

      @Override
      public void addObject(String key, String text) throws Exception {
        Document doc = new Document();
        doc.add(new TextField("key", key, Store.YES));
        doc.add(new TextField("text", text, Store.NO));
        writer.addDocument(doc);
      }

      @Override
      public void commit() throws Exception {
        writer.commit();
        searcherManager.maybeRefresh();
      }

      @Override
      public void cleanup() throws Exception {
        writer.close();
        cache.close();;
      }

      @Override
      public void waitForAsync() throws Exception {
        // do nothing
      }

      @Override
      public int query(Query query) throws Exception {
        IndexSearcher searcher = searcherManager.acquire();
        try {
          return searcher.count(query);
        } finally {
          searcherManager.release(searcher);
        }
      }

    });

  }

  @Test
  public void testLucene() throws Exception {
    doTest("Lucene", new TestCallbacks() {

      private IndexWriter writer;
      private SearcherManager searcherManager;

      @Override
      public void init() throws Exception {
        RAMDirectory dir = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        writer = new IndexWriter(dir, config);
        searcherManager = new SearcherManager(writer, true, true, null);
      }

      @Override
      public void addObject(String key, String text) throws Exception {
        Document doc = new Document();
        doc.add(new TextField("key", key, Store.YES));
        doc.add(new TextField("text", text, Store.NO));
        writer.addDocument(doc);
      }

      @Override
      public void commit() throws Exception {
        writer.commit();
        searcherManager.maybeRefresh();
      }

      @Override
      public void cleanup() throws Exception {
        writer.close();
      }

      @Override
      public void waitForAsync() throws Exception {
        // do nothing
      }

      @Override
      public int query(Query query) throws Exception {
        IndexSearcher searcher = searcherManager.acquire();
        try {
          return searcher.count(query);
        } finally {
          searcherManager.release(searcher);
        }
      }

    });

  }

  private void doTest(String testName, TestCallbacks callbacks) throws Exception {

    // Create some random words. We need to be careful
    // to make sure we get NUM_WORDS distinct words here
    Set<String> wordSet = new HashSet<String>();
    Random rand = new Random();
    while (wordSet.size() < NUM_WORDS) {
      int length = rand.nextInt(12) + 3;
      char[] text = new char[length];
      for (int i = 0; i < length; i++) {
        text[i] = (char) (rand.nextInt(26) + 97);
      }
      wordSet.add(new String(text));
    }
    List<String> words = new ArrayList<String>(wordSet.size());
    words.addAll(wordSet);



    // warm up
    writeRandomWords(callbacks, words, rand, NUM_ENTRIES / 10, NUM_QUERIES / 10,
        COMMIT_INTERVAL[0]);

    // Do the actual test

    for (int i = 0; i < COMMIT_INTERVAL.length; i++) {
      Results results = writeRandomWords(callbacks, words, rand, NUM_ENTRIES, NUM_QUERIES / 10,
          COMMIT_INTERVAL[i]);

      System.out.println(testName + " writes(entries=" + NUM_ENTRIES + ", commit="
          + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.writeTime));
      System.out.println(testName + " queries(entries=" + NUM_ENTRIES + ", commit="
          + COMMIT_INTERVAL[i] + "): " + TimeUnit.NANOSECONDS.toMillis(results.queryTime));
    }
  }

  private Results writeRandomWords(TestCallbacks callbacks, List<String> words, Random rand,
      int numEntries, int numQueries, int commitInterval) throws Exception {
    Results results = new Results();
    callbacks.init();
    int[] counts = new int[words.size()];
    long start = System.nanoTime();
    try {
      for (int i = 0; i < numEntries; i++) {
        int word1 = rand.nextInt(words.size());
        int word2 = rand.nextInt(words.size());
        counts[word1]++;
        counts[word2]++;
        String value = words.get(word1) + " " + words.get(word2);
        callbacks.addObject("key" + i, value);

        if (i % commitInterval == 0 && i != 0) {
          callbacks.commit();
        }
      }
      callbacks.commit();
      callbacks.waitForAsync();
      long end = System.nanoTime();
      results.writeTime = end - start;


      start = System.nanoTime();
      for (int i = 0; i < numQueries; i++) {
        int wordIndex = rand.nextInt(words.size());
        String word = words.get(wordIndex);
        Query query = new TermQuery(new Term("text", word));
        int size = callbacks.query(query);
        // int size = callbacks.query(parser.parse(word));
        // All of my tests sometimes seem to be missing a couple of words, including the stock
        // lucene
        // assertIndexDetailsEquals("Error on query " + i + " word=" + word, counts[wordIndex],
        // size);
      }
      end = System.nanoTime();
      results.queryTime = end - start;

      return results;
    } finally {
      callbacks.cleanup();
    }
  }

  private static class TestObject implements DataSerializable {
    private String text;

    public TestObject() {

    }

    public TestObject(String text) {
      super();
      this.text = text;
    }

    @Override
    public void toData(DataOutput out) throws IOException {
      DataSerializer.writeString(text, out);
    }

    @Override
    public void fromData(DataInput in) throws IOException, ClassNotFoundException {
      text = DataSerializer.readString(in);
    }

    @Override
    public String toString() {
      return text;
    }


  }

  private interface TestCallbacks {
    public void init() throws Exception;

    public int query(Query query) throws Exception;

    public void addObject(String key, String text) throws Exception;

    public void commit() throws Exception;

    public void waitForAsync() throws Exception;

    public void cleanup() throws Exception;
  }

  private static class Results {
    long writeTime;
    long queryTime;
  }
}