/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.codecs.mockrandom; import java.io.IOException; import java.util.Random; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; import org.apache.lucene.codecs.TermStats; import org.apache.lucene.codecs.blockterms.BlockTermsReader; import org.apache.lucene.codecs.blockterms.BlockTermsWriter; import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexReader; import org.apache.lucene.codecs.blockterms.FixedGapTermsIndexWriter; import org.apache.lucene.codecs.blockterms.TermsIndexReaderBase; import org.apache.lucene.codecs.blockterms.TermsIndexWriterBase; import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexReader; import org.apache.lucene.codecs.blockterms.VariableGapTermsIndexWriter; import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter; import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsReader; import org.apache.lucene.codecs.blocktreeords.OrdsBlockTreeTermsWriter; import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader; import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter; import org.apache.lucene.codecs.memory.FSTOrdTermsReader; import org.apache.lucene.codecs.memory.FSTOrdTermsWriter; import org.apache.lucene.codecs.memory.FSTTermsReader; import org.apache.lucene.codecs.memory.FSTTermsWriter; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; /** * Randomly combines terms index impl w/ postings impls. */ public final class MockRandomPostingsFormat extends PostingsFormat { private final Random seedRandom; private static final String SEED_EXT = "sd"; public MockRandomPostingsFormat() { // This ctor should *only* be used at read-time: get NPE if you use it! this(null); } public MockRandomPostingsFormat(Random random) { super("MockRandom"); if (random == null) { this.seedRandom = new Random(0L) { @Override protected int next(int arg0) { throw new IllegalStateException("Please use MockRandomPostingsFormat(Random)"); } }; } else { this.seedRandom = new Random(random.nextLong()); } } @Override public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { int minSkipInterval; if (state.segmentInfo.maxDoc() > 1000000) { // Test2BPostings can OOME otherwise: minSkipInterval = 3; } else { minSkipInterval = 2; } // we pull this before the seed intentionally: because it's not consumed at runtime // (the skipInterval is written into postings header). // NOTE: Currently not passed to postings writer. // before, it was being passed in wrongly as acceptableOverhead! int skipInterval = TestUtil.nextInt(seedRandom, minSkipInterval, 10); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: skipInterval=" + skipInterval); } final long seed = seedRandom.nextLong(); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing to seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed); } final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT); try(IndexOutput out = state.directory.createOutput(seedFileName, state.context)) { CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix); out.writeLong(seed); CodecUtil.writeFooter(out); } final Random random = new Random(seed); random.nextInt(); // consume a random for buffersize PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state); final FieldsConsumer fields; final int t1 = random.nextInt(5); if (t1 == 0) { boolean success = false; try { fields = new FSTTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.close(); } } } else if (t1 == 1) { boolean success = false; try { fields = new FSTOrdTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.close(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write final int minTermsInBlock = TestUtil.nextInt(random, 2, 100); final int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random.nextInt(100)); boolean success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.close(); } } } else if (t1 == 3) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing Block terms dict"); } boolean success = false; final TermsIndexWriterBase indexWriter; try { if (random.nextBoolean()) { int termIndexInterval = TestUtil.nextInt(random, 1, 100); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: fixed-gap terms index (tii=" + termIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state, termIndexInterval); } else { final VariableGapTermsIndexWriter.IndexTermSelector selector; final int n2 = random.nextInt(3); if (n2 == 0) { final int tii = TestUtil.nextInt(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { final int docFreqThresh = TestUtil.nextInt(random, 2, 100); final int tii = TestUtil.nextInt(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { final long seed2 = random.nextLong(); final int gap = TestUtil.nextInt(random, 2, 40); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new VariableGapTermsIndexWriter.IndexTermSelector() { final Random rand = new Random(seed2); @Override public boolean isIndexTerm(BytesRef term, TermStats stats) { return rand.nextInt(gap) == gap/2; } @Override public void newField(FieldInfo fieldInfo) { } }; } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.close(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.close(); } finally { indexWriter.close(); } } } } else if (t1 == 4) { // Use OrdsBlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: writing OrdsBlockTree"); } // TODO: would be nice to allow 1 but this is very // slow to write final int minTermsInBlock = TestUtil.nextInt(random, 2, 100); final int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random.nextInt(100)); boolean success = false; try { fields = new OrdsBlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.close(); } } } else { // BUG! throw new AssertionError(); } return fields; } @Override public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT); final ChecksumIndexInput in = state.directory.openChecksumInput(seedFileName, state.context); CodecUtil.checkIndexHeader(in, "MockRandomSeed", 0, 0, state.segmentInfo.getId(), state.segmentSuffix); final long seed = in.readLong(); CodecUtil.checkFooter(in); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed); } in.close(); final Random random = new Random(seed); int readBufferSize = TestUtil.nextInt(random, 1, 4096); if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader = new Lucene50PostingsReader(state); final FieldsProducer fields; final int t1 = random.nextInt(5); if (t1 == 0) { boolean success = false; try { fields = new FSTTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.close(); } } } else if (t1 == 1) { boolean success = false; try { fields = new FSTOrdTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.close(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading BlockTree terms dict"); } boolean success = false; try { fields = new BlockTreeTermsReader(postingsReader, state); success = true; } finally { if (!success) { postingsReader.close(); } } } else if (t1 == 3) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading Block terms dict"); } final TermsIndexReaderBase indexReader; boolean success = false; try { final boolean doFixedGap = random.nextBoolean(); // randomness diverges from writer, here: if (doFixedGap) { if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: fixed-gap terms index"); } indexReader = new FixedGapTermsIndexReader(state); } else { final int n2 = random.nextInt(3); if (n2 == 1) { random.nextInt(); } else if (n2 == 2) { random.nextLong(); } if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: variable-gap terms index"); } indexReader = new VariableGapTermsIndexReader(state); } success = true; } finally { if (!success) { postingsReader.close(); } } success = false; try { fields = new BlockTermsReader(indexReader, postingsReader, state); success = true; } finally { if (!success) { try { postingsReader.close(); } finally { indexReader.close(); } } } } else if (t1 == 4) { // Use OrdsBlockTree terms dict if (LuceneTestCase.VERBOSE) { System.out.println("MockRandomCodec: reading OrdsBlockTree terms dict"); } boolean success = false; try { fields = new OrdsBlockTreeTermsReader(postingsReader, state); success = true; } finally { if (!success) { postingsReader.close(); } } } else { // BUG! throw new AssertionError(); } return fields; } }