SirenTestCase.java example

Explorer
siren-master
/**
 * Copyright 2014 National University of Ireland, Galway.
 *
 * This file is part of the SIREn project. Project and contact information:
 *
 *  https://github.com/rdelbru/SIREn
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sindice.siren.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.AssertingIndexSearcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util._TestUtil;
import org.sindice.siren.analysis.AnyURIAnalyzer;
import org.sindice.siren.analysis.AnyURIAnalyzer.URINormalisation;
import org.sindice.siren.analysis.JsonAnalyzer;
import org.sindice.siren.analysis.MockSirenAnalyzer;
import org.sindice.siren.analysis.MockSirenDocument;
import org.sindice.siren.analysis.MockSirenReader;
import org.sindice.siren.analysis.TupleAnalyzer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class SirenTestCase extends LuceneTestCase {

  protected static final Logger logger = LoggerFactory.getLogger(SirenTestCase.class);

  public static final String DEFAULT_TEST_FIELD = "content";

  public static Analyzer newTupleAnalyzer() {
    final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
    uriAnalyzer.setUriNormalisation(URINormalisation.FULL);
    final TupleAnalyzer analyzer = new TupleAnalyzer(TEST_VERSION_CURRENT,
      new StandardAnalyzer(TEST_VERSION_CURRENT), uriAnalyzer);
    return analyzer;
  }

  public static Analyzer newJsonAnalyzer() {
    final AnyURIAnalyzer fieldAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
    fieldAnalyzer.setUriNormalisation(URINormalisation.FULL);
    final Analyzer literalAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
    final JsonAnalyzer analyzer = new JsonAnalyzer(TEST_VERSION_CURRENT,
      fieldAnalyzer, literalAnalyzer);
    return analyzer;
  }

  public static Analyzer newMockAnalyzer() {
    return new MockSirenAnalyzer();
  }

  private static FieldType newFieldType() {
    final FieldType ft = new FieldType();
    ft.setStored(false);
    ft.setOmitNorms(false);
    ft.setIndexed(true);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    ft.setTokenized(true);
    return ft;
  }

  protected static FieldType newStoredFieldType() {
    final FieldType ft = newFieldType();
    ft.setStored(true);
    return ft;
  }

  private FieldType newStoredNoNormFieldType() {
    final FieldType ft = newStoredFieldType();
    ft.setOmitNorms(true);
    return ft;
  }

  protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
                                                          final Analyzer analyzer,
                                                          final Codec codec)
  throws IOException {
    return newRandomIndexWriter(dir, analyzer, codec,
      newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
    .setCodec(codec).setMergePolicy(newLogMergePolicy())
    .setSimilarity(new DefaultSimilarity()));
  }

  protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
                                                          final Analyzer analyzer,
                                                          final Codec codec,
                                                          final IndexWriterConfig config)
  throws IOException {
    final RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    writer.setDoRandomForceMergeAssert(true);
    return writer;
  }

  protected static IndexReader newIndexReader(final RandomIndexWriter writer)
  throws IOException {
    // We are wrapping by default the reader into a slow reader, as most of the
    // tests require an atomic reader
    return SlowCompositeReaderWrapper.wrap(writer.getReader());
  }

  /**
   * Create a new searcher over the reader. This searcher might randomly use
   * threads.
   * <p>
   * Override the original {@link LuceneTestCase#newSearcher(IndexReader)}
   * implementation in order to avoid getting {@link AssertingIndexSearcher}
   * which is incompatible with SIREn.
   */
  public static IndexSearcher newSearcher(final IndexReader r) throws IOException {
    final Random random = random();
    if (usually()) {
      // compared to the original implementation, we do not wrap to avoid
      // wrapping into an AssertingAtomicReader
      return random.nextBoolean() ? new IndexSearcher(r) : new IndexSearcher(r.getContext());
    } else {
      int threads = 0;
      final ThreadPoolExecutor ex;
      if (random.nextBoolean()) {
        ex = null;
      } else {
        threads = _TestUtil.nextInt(random, 1, 8);
        ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
            new LinkedBlockingQueue<Runnable>(),
            new NamedThreadFactory("LuceneTestCase"));
      }
      if (ex != null) {
       if (VERBOSE) {
        System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
       }
       r.addReaderClosedListener(new ReaderClosedListener() {
         @Override
         public void onClose(final IndexReader reader) {
           _TestUtil.shutdownExecutorService(ex);
         }
       });
      }
      final IndexSearcher ret = random.nextBoolean()
          ? new IndexSearcher(r, ex)
          : new IndexSearcher(r.getContext(), ex);
      return ret;
    }
  }

  protected static void addDocument(final RandomIndexWriter writer, final String data)
  throws IOException {
    final Document doc = new Document();
    doc.add(new Field(DEFAULT_TEST_FIELD, data, newStoredFieldType()));
    writer.addDocument(doc);
    writer.commit();
  }

  protected void addDocumentNoNorms(final RandomIndexWriter writer, final String data)
  throws IOException {
    final Document doc = new Document();
    doc.add(new Field(DEFAULT_TEST_FIELD, data, this.newStoredNoNormFieldType()));
    writer.addDocument(doc);
    writer.commit();
  }

  /**
   * Atomically adds a block of documents with sequentially
   * assigned document IDs.
   * <br>
   * See also {@link IndexWriter#addDocuments(Iterable)}
   */
  protected static void addDocuments(final RandomIndexWriter writer,
                                     final String[] data)
  throws IOException {
    final ArrayList<Document> docs = new ArrayList<Document>();

    for (final String entry : data) {
      final Document doc = new Document();
      doc.add(new Field(DEFAULT_TEST_FIELD, entry, newStoredFieldType()));
      docs.add(doc);
    }
    writer.addDocuments(docs);
    writer.commit();
  }

  protected static void addDocuments(final RandomIndexWriter writer,
                                     final MockSirenDocument ... sdocs)
  throws IOException {
    final ArrayList<Document> docs = new ArrayList<Document>(sdocs.length);
    for (final MockSirenDocument sdoc : sdocs) {
      final Document doc = new Document();
      doc.add(new Field(DEFAULT_TEST_FIELD, new MockSirenReader(sdoc), newFieldType()));
      docs.add(doc);
    }
    writer.addDocuments(docs);
    writer.commit();
  }

  protected void deleteAll(final RandomIndexWriter writer) throws IOException {
    writer.deleteAll();
    writer.commit();
  }

  protected void forceMerge(final RandomIndexWriter writer) throws IOException {
    writer.forceMerge(1);
  }

}