/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.AssertingIndexSearcher;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util._TestUtil;
import org.sindice.siren.analysis.AnyURIAnalyzer;
import org.sindice.siren.analysis.AnyURIAnalyzer.URINormalisation;
import org.sindice.siren.analysis.JsonAnalyzer;
import org.sindice.siren.analysis.MockSirenAnalyzer;
import org.sindice.siren.analysis.MockSirenDocument;
import org.sindice.siren.analysis.MockSirenReader;
import org.sindice.siren.analysis.TupleAnalyzer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class SirenTestCase extends LuceneTestCase {
protected static final Logger logger = LoggerFactory.getLogger(SirenTestCase.class);
public static final String DEFAULT_TEST_FIELD = "content";
public static Analyzer newTupleAnalyzer() {
final AnyURIAnalyzer uriAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
uriAnalyzer.setUriNormalisation(URINormalisation.FULL);
final TupleAnalyzer analyzer = new TupleAnalyzer(TEST_VERSION_CURRENT,
new StandardAnalyzer(TEST_VERSION_CURRENT), uriAnalyzer);
return analyzer;
}
public static Analyzer newJsonAnalyzer() {
final AnyURIAnalyzer fieldAnalyzer = new AnyURIAnalyzer(TEST_VERSION_CURRENT);
fieldAnalyzer.setUriNormalisation(URINormalisation.FULL);
final Analyzer literalAnalyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
final JsonAnalyzer analyzer = new JsonAnalyzer(TEST_VERSION_CURRENT,
fieldAnalyzer, literalAnalyzer);
return analyzer;
}
public static Analyzer newMockAnalyzer() {
return new MockSirenAnalyzer();
}
private static FieldType newFieldType() {
final FieldType ft = new FieldType();
ft.setStored(false);
ft.setOmitNorms(false);
ft.setIndexed(true);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
ft.setTokenized(true);
return ft;
}
protected static FieldType newStoredFieldType() {
final FieldType ft = newFieldType();
ft.setStored(true);
return ft;
}
private FieldType newStoredNoNormFieldType() {
final FieldType ft = newStoredFieldType();
ft.setOmitNorms(true);
return ft;
}
protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
final Analyzer analyzer,
final Codec codec)
throws IOException {
return newRandomIndexWriter(dir, analyzer, codec,
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
.setCodec(codec).setMergePolicy(newLogMergePolicy())
.setSimilarity(new DefaultSimilarity()));
}
protected static RandomIndexWriter newRandomIndexWriter(final Directory dir,
final Analyzer analyzer,
final Codec codec,
final IndexWriterConfig config)
throws IOException {
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
writer.setDoRandomForceMergeAssert(true);
return writer;
}
protected static IndexReader newIndexReader(final RandomIndexWriter writer)
throws IOException {
// We are wrapping by default the reader into a slow reader, as most of the
// tests require an atomic reader
return SlowCompositeReaderWrapper.wrap(writer.getReader());
}
/**
* Create a new searcher over the reader. This searcher might randomly use
* threads.
* <p>
* Override the original {@link LuceneTestCase#newSearcher(IndexReader)}
* implementation in order to avoid getting {@link AssertingIndexSearcher}
* which is incompatible with SIREn.
*/
public static IndexSearcher newSearcher(final IndexReader r) throws IOException {
final Random random = random();
if (usually()) {
// compared to the original implementation, we do not wrap to avoid
// wrapping into an AssertingAtomicReader
return random.nextBoolean() ? new IndexSearcher(r) : new IndexSearcher(r.getContext());
} else {
int threads = 0;
final ThreadPoolExecutor ex;
if (random.nextBoolean()) {
ex = null;
} else {
threads = _TestUtil.nextInt(random, 1, 8);
ex = new ThreadPoolExecutor(threads, threads, 0L, TimeUnit.MILLISECONDS,
new LinkedBlockingQueue<Runnable>(),
new NamedThreadFactory("LuceneTestCase"));
}
if (ex != null) {
if (VERBOSE) {
System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
}
r.addReaderClosedListener(new ReaderClosedListener() {
@Override
public void onClose(final IndexReader reader) {
_TestUtil.shutdownExecutorService(ex);
}
});
}
final IndexSearcher ret = random.nextBoolean()
? new IndexSearcher(r, ex)
: new IndexSearcher(r.getContext(), ex);
return ret;
}
}
protected static void addDocument(final RandomIndexWriter writer, final String data)
throws IOException {
final Document doc = new Document();
doc.add(new Field(DEFAULT_TEST_FIELD, data, newStoredFieldType()));
writer.addDocument(doc);
writer.commit();
}
protected void addDocumentNoNorms(final RandomIndexWriter writer, final String data)
throws IOException {
final Document doc = new Document();
doc.add(new Field(DEFAULT_TEST_FIELD, data, this.newStoredNoNormFieldType()));
writer.addDocument(doc);
writer.commit();
}
/**
* Atomically adds a block of documents with sequentially
* assigned document IDs.
* <br>
* See also {@link IndexWriter#addDocuments(Iterable)}
*/
protected static void addDocuments(final RandomIndexWriter writer,
final String[] data)
throws IOException {
final ArrayList<Document> docs = new ArrayList<Document>();
for (final String entry : data) {
final Document doc = new Document();
doc.add(new Field(DEFAULT_TEST_FIELD, entry, newStoredFieldType()));
docs.add(doc);
}
writer.addDocuments(docs);
writer.commit();
}
protected static void addDocuments(final RandomIndexWriter writer,
final MockSirenDocument ... sdocs)
throws IOException {
final ArrayList<Document> docs = new ArrayList<Document>(sdocs.length);
for (final MockSirenDocument sdoc : sdocs) {
final Document doc = new Document();
doc.add(new Field(DEFAULT_TEST_FIELD, new MockSirenReader(sdoc), newFieldType()));
docs.add(doc);
}
writer.addDocuments(docs);
writer.commit();
}
protected void deleteAll(final RandomIndexWriter writer) throws IOException {
writer.deleteAll();
writer.commit();
}
protected void forceMerge(final RandomIndexWriter writer) throws IOException {
writer.forceMerge(1);
}
}