package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Random;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene46.Lucene46Codec;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FlushInfo;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/**
* Abstract class to do basic tests for a postings format.
* NOTE: This test focuses on the postings
* (docs/freqs/positions/payloads/offsets) impl, not the
* terms dict. The [stretch] goal is for this test to be
* so thorough in testing a new PostingsFormat that if this
* test passes, then all Lucene/Solr tests should also pass. Ie,
* if there is some bug in a given PostingsFormat that this
* test fails to catch then this test needs to be improved! */
// TODO can we make it easy for testing to pair up a "random terms dict impl" with your postings base format...
// TODO test when you reuse after skipping a term or two, eg the block reuse case
/* TODO
- threads
- assert doc=-1 before any nextDoc
- if a PF passes this test but fails other tests then this
test has a bug!!
- test tricky reuse cases, eg across fields
- verify you get null if you pass needFreq/needOffset but
they weren't indexed
*/
public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
/**
* Returns the Codec to run tests against
*/
protected abstract Codec getCodec();
private enum Option {
// Sometimes use .advance():
SKIPPING,
// Sometimes reuse the Docs/AndPositionsEnum across terms:
REUSE_ENUMS,
// Sometimes pass non-null live docs:
LIVE_DOCS,
// Sometimes seek to term using previously saved TermState:
TERM_STATE,
// Sometimes don't fully consume docs from the enum
PARTIAL_DOC_CONSUME,
// Sometimes don't fully consume positions at each doc
PARTIAL_POS_CONSUME,
// Sometimes check payloads
PAYLOADS,
// Test w/ multiple threads
THREADS
};
/** Given the same random seed this always enumerates the
* same random postings */
private static class SeedPostings extends DocsAndPositionsEnum {
// Used only to generate docIDs; this way if you pull w/
// or w/o positions you get the same docID sequence:
private final Random docRandom;
private final Random random;
public int docFreq;
private final int maxDocSpacing;
private final int payloadSize;
private final boolean fixedPayloads;
private final Bits liveDocs;
private final BytesRef payload;
private final IndexOptions options;
private final boolean doPositions;
private final boolean allowPayloads;
private int docID;
private int freq;
public int upto;
private int pos;
private int offset;
private int startOffset;
private int endOffset;
private int posSpacing;
private int posUpto;
public SeedPostings(long seed, int minDocFreq, int maxDocFreq, Bits liveDocs, IndexOptions options, boolean allowPayloads) {
random = new Random(seed);
docRandom = new Random(random.nextLong());
docFreq = TestUtil.nextInt(random, minDocFreq, maxDocFreq);
this.liveDocs = liveDocs;
this.allowPayloads = allowPayloads;
// TODO: more realistic to inversely tie this to numDocs:
maxDocSpacing = TestUtil.nextInt(random, 1, 100);
if (random.nextInt(10) == 7) {
// 10% of the time create big payloads:
payloadSize = 1 + random.nextInt(3);
} else {
payloadSize = 1 + random.nextInt(1);
}
fixedPayloads = random.nextBoolean();
byte[] payloadBytes = new byte[payloadSize];
payload = new BytesRef(payloadBytes);
this.options = options;
doPositions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS.compareTo(options) <= 0;
}
@Override
public int nextDoc() {
while(true) {
_nextDoc();
if (liveDocs == null || docID == NO_MORE_DOCS || liveDocs.get(docID)) {
return docID;
}
}
}
private int _nextDoc() {
// Must consume random:
while(posUpto < freq) {
nextPosition();
}
if (upto < docFreq) {
if (upto == 0 && docRandom.nextBoolean()) {
// Sometimes index docID = 0
} else if (maxDocSpacing == 1) {
docID++;
} else {
// TODO: sometimes have a biggish gap here!
docID += TestUtil.nextInt(docRandom, 1, maxDocSpacing);
}
if (random.nextInt(200) == 17) {
freq = TestUtil.nextInt(random, 1, 1000);
} else if (random.nextInt(10) == 17) {
freq = TestUtil.nextInt(random, 1, 20);
} else {
freq = TestUtil.nextInt(random, 1, 4);
}
pos = 0;
offset = 0;
posUpto = 0;
posSpacing = TestUtil.nextInt(random, 1, 100);
upto++;
return docID;
} else {
return docID = NO_MORE_DOCS;
}
}
@Override
public int docID() {
return docID;
}
@Override
public int freq() {
return freq;
}
@Override
public int nextPosition() {
if (!doPositions) {
posUpto = freq;
return 0;
}
assert posUpto < freq;
if (posUpto == 0 && random.nextBoolean()) {
// Sometimes index pos = 0
} else if (posSpacing == 1) {
pos++;
} else {
pos += TestUtil.nextInt(random, 1, posSpacing);
}
if (payloadSize != 0) {
if (fixedPayloads) {
payload.length = payloadSize;
random.nextBytes(payload.bytes);
} else {
int thisPayloadSize = random.nextInt(payloadSize);
if (thisPayloadSize != 0) {
payload.length = payloadSize;
random.nextBytes(payload.bytes);
} else {
payload.length = 0;
}
}
} else {
payload.length = 0;
}
if (!allowPayloads) {
payload.length = 0;
}
startOffset = offset + random.nextInt(5);
endOffset = startOffset + random.nextInt(10);
offset = endOffset;
posUpto++;
return pos;
}
@Override
public int startOffset() {
return startOffset;
}
@Override
public int endOffset() {
return endOffset;
}
@Override
public BytesRef getPayload() {
return payload.length == 0 ? null : payload;
}
@Override
public int advance(int target) throws IOException {
return slowAdvance(target);
}
@Override
public long cost() {
return docFreq;
}
}
private static class FieldAndTerm {
String field;
BytesRef term;
public FieldAndTerm(String field, BytesRef term) {
this.field = field;
this.term = BytesRef.deepCopyOf(term);
}
}
// Holds all postings:
private static Map<String,SortedMap<BytesRef,Long>> fields;
private static FieldInfos fieldInfos;
private static FixedBitSet globalLiveDocs;
private static List<FieldAndTerm> allTerms;
private static int maxDoc;
private static long totalPostings;
private static long totalPayloadBytes;
private static SeedPostings getSeedPostings(String term, long seed, boolean withLiveDocs, IndexOptions options, boolean allowPayloads) {
int minDocFreq, maxDocFreq;
if (term.startsWith("big_")) {
minDocFreq = RANDOM_MULTIPLIER * 50000;
maxDocFreq = RANDOM_MULTIPLIER * 70000;
} else if (term.startsWith("medium_")) {
minDocFreq = RANDOM_MULTIPLIER * 3000;
maxDocFreq = RANDOM_MULTIPLIER * 6000;
} else if (term.startsWith("low_")) {
minDocFreq = RANDOM_MULTIPLIER;
maxDocFreq = RANDOM_MULTIPLIER * 40;
} else {
minDocFreq = 1;
maxDocFreq = 3;
}
return new SeedPostings(seed, minDocFreq, maxDocFreq, withLiveDocs ? globalLiveDocs : null, options, allowPayloads);
}
@BeforeClass
public static void createPostings() throws IOException {
totalPostings = 0;
totalPayloadBytes = 0;
fields = new TreeMap<>();
final int numFields = TestUtil.nextInt(random(), 1, 5);
if (VERBOSE) {
System.out.println("TEST: " + numFields + " fields");
}
maxDoc = 0;
FieldInfo[] fieldInfoArray = new FieldInfo[numFields];
int fieldUpto = 0;
while (fieldUpto < numFields) {
String field = TestUtil.randomSimpleString(random());
if (fields.containsKey(field)) {
continue;
}
fieldInfoArray[fieldUpto] = new FieldInfo(field, true, fieldUpto, false, false, true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
null, DocValuesType.NUMERIC, null);
fieldUpto++;
SortedMap<BytesRef,Long> postings = new TreeMap<>();
fields.put(field, postings);
Set<String> seenTerms = new HashSet<>();
int numTerms;
if (random().nextInt(10) == 7) {
numTerms = atLeast(50);
} else {
numTerms = TestUtil.nextInt(random(), 2, 20);
}
for(int termUpto=0;termUpto<numTerms;termUpto++) {
String term = TestUtil.randomSimpleString(random());
if (seenTerms.contains(term)) {
continue;
}
seenTerms.add(term);
if (TEST_NIGHTLY && termUpto == 0 && fieldUpto == 1) {
// Make 1 big term:
term = "big_" + term;
} else if (termUpto == 1 && fieldUpto == 1) {
// Make 1 medium term:
term = "medium_" + term;
} else if (random().nextBoolean()) {
// Low freq term:
term = "low_" + term;
} else {
// Very low freq term (don't multiply by RANDOM_MULTIPLIER):
term = "verylow_" + term;
}
long termSeed = random().nextLong();
postings.put(new BytesRef(term), termSeed);
// NOTE: sort of silly: we enum all the docs just to
// get the maxDoc
DocsEnum docsEnum = getSeedPostings(term, termSeed, false, IndexOptions.DOCS_ONLY, true);
int doc;
int lastDoc = 0;
while((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
lastDoc = doc;
}
maxDoc = Math.max(lastDoc, maxDoc);
}
}
fieldInfos = new FieldInfos(fieldInfoArray);
// It's the count, not the last docID:
maxDoc++;
globalLiveDocs = new FixedBitSet(maxDoc);
double liveRatio = random().nextDouble();
for(int i=0;i<maxDoc;i++) {
if (random().nextDouble() <= liveRatio) {
globalLiveDocs.set(i);
}
}
allTerms = new ArrayList<>();
for(Map.Entry<String,SortedMap<BytesRef,Long>> fieldEnt : fields.entrySet()) {
String field = fieldEnt.getKey();
for(Map.Entry<BytesRef,Long> termEnt : fieldEnt.getValue().entrySet()) {
allTerms.add(new FieldAndTerm(field, termEnt.getKey()));
}
}
if (VERBOSE) {
System.out.println("TEST: done init postings; " + allTerms.size() + " total terms, across " + fieldInfos.size() + " fields");
}
}
@AfterClass
public static void afterClass() throws Exception {
allTerms = null;
fieldInfos = null;
fields = null;
globalLiveDocs = null;
}
private static class SeedFields extends Fields {
final Map<String,SortedMap<BytesRef,Long>> fields;
final FieldInfos fieldInfos;
final IndexOptions maxAllowed;
final boolean allowPayloads;
public SeedFields(Map<String,SortedMap<BytesRef,Long>> fields, FieldInfos fieldInfos, IndexOptions maxAllowed, boolean allowPayloads) {
this.fields = fields;
this.fieldInfos = fieldInfos;
this.maxAllowed = maxAllowed;
this.allowPayloads = allowPayloads;
}
@Override
public Iterator<String> iterator() {
return fields.keySet().iterator();
}
@Override
public Terms terms(String field) {
SortedMap<BytesRef,Long> terms = fields.get(field);
if (terms == null) {
return null;
} else {
return new SeedTerms(terms, fieldInfos.fieldInfo(field), maxAllowed, allowPayloads);
}
}
@Override
public int size() {
return fields.size();
}
}
private static class SeedTerms extends Terms {
final SortedMap<BytesRef,Long> terms;
final FieldInfo fieldInfo;
final IndexOptions maxAllowed;
final boolean allowPayloads;
public SeedTerms(SortedMap<BytesRef,Long> terms, FieldInfo fieldInfo, IndexOptions maxAllowed, boolean allowPayloads) {
this.terms = terms;
this.fieldInfo = fieldInfo;
this.maxAllowed = maxAllowed;
this.allowPayloads = allowPayloads;
}
@Override
public TermsEnum iterator(TermsEnum reuse) {
SeedTermsEnum termsEnum;
if (reuse != null && reuse instanceof SeedTermsEnum) {
termsEnum = (SeedTermsEnum) reuse;
if (termsEnum.terms != terms) {
termsEnum = new SeedTermsEnum(terms, maxAllowed, allowPayloads);
}
} else {
termsEnum = new SeedTermsEnum(terms, maxAllowed, allowPayloads);
}
termsEnum.reset();
return termsEnum;
}
@Override
public long size() {
return terms.size();
}
@Override
public long getSumTotalTermFreq() {
throw new UnsupportedOperationException();
}
@Override
public long getSumDocFreq() {
throw new UnsupportedOperationException();
}
@Override
public int getDocCount() {
throw new UnsupportedOperationException();
}
@Override
public boolean hasFreqs() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
}
@Override
public boolean hasOffsets() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}
@Override
public boolean hasPositions() {
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
}
@Override
public boolean hasPayloads() {
return allowPayloads && fieldInfo.hasPayloads();
}
}
private static class SeedTermsEnum extends TermsEnum {
final SortedMap<BytesRef,Long> terms;
final IndexOptions maxAllowed;
final boolean allowPayloads;
private Iterator<Map.Entry<BytesRef,Long>> iterator;
private Map.Entry<BytesRef,Long> current;
public SeedTermsEnum(SortedMap<BytesRef,Long> terms, IndexOptions maxAllowed, boolean allowPayloads) {
this.terms = terms;
this.maxAllowed = maxAllowed;
this.allowPayloads = allowPayloads;
}
void reset() {
iterator = terms.entrySet().iterator();
}
@Override
public SeekStatus seekCeil(BytesRef text) {
SortedMap<BytesRef,Long> tailMap = terms.tailMap(text);
if (tailMap.isEmpty()) {
return SeekStatus.END;
} else {
iterator = tailMap.entrySet().iterator();
if (tailMap.firstKey().equals(text)) {
return SeekStatus.FOUND;
} else {
return SeekStatus.NOT_FOUND;
}
}
}
@Override
public BytesRef next() {
if (iterator.hasNext()) {
current = iterator.next();
return term();
} else {
return null;
}
}
@Override
public void seekExact(long ord) {
throw new UnsupportedOperationException();
}
@Override
public BytesRef term() {
return current.getKey();
}
@Override
public long ord() {
throw new UnsupportedOperationException();
}
@Override
public int docFreq() {
throw new UnsupportedOperationException();
}
@Override
public long totalTermFreq() {
throw new UnsupportedOperationException();
}
@Override
public final DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
if (liveDocs != null) {
throw new IllegalArgumentException("liveDocs must be null");
}
if ((flags & DocsEnum.FLAG_FREQS) != 0 && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS) < 0) {
return null;
}
return getSeedPostings(current.getKey().utf8ToString(), current.getValue(), false, maxAllowed, allowPayloads);
}
@Override
public final DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
if (liveDocs != null) {
throw new IllegalArgumentException("liveDocs must be null");
}
if (maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
return null;
}
if ((flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0 && maxAllowed.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
return null;
}
if ((flags & DocsAndPositionsEnum.FLAG_PAYLOADS) != 0 && allowPayloads == false) {
return null;
}
return getSeedPostings(current.getKey().utf8ToString(), current.getValue(), false, maxAllowed, allowPayloads);
}
}
// TODO maybe instead of @BeforeClass just make a single test run: build postings & index & test it?
private FieldInfos currentFieldInfos;
// maxAllowed = the "highest" we can index, but we will still
// randomly index at lower IndexOption
private FieldsProducer buildIndex(Directory dir, IndexOptions maxAllowed, boolean allowPayloads, boolean alwaysTestMax) throws IOException {
Codec codec = getCodec();
SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", maxDoc, false, codec, null);
int maxIndexOption = Arrays.asList(IndexOptions.values()).indexOf(maxAllowed);
if (VERBOSE) {
System.out.println("\nTEST: now build index");
}
int maxIndexOptionNoOffsets = Arrays.asList(IndexOptions.values()).indexOf(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
// TODO use allowPayloads
FieldInfo[] newFieldInfoArray = new FieldInfo[fields.size()];
for(int fieldUpto=0;fieldUpto<fields.size();fieldUpto++) {
FieldInfo oldFieldInfo = fieldInfos.fieldInfo(fieldUpto);
String pf = TestUtil.getPostingsFormat(codec, oldFieldInfo.name);
int fieldMaxIndexOption;
if (doesntSupportOffsets.contains(pf)) {
fieldMaxIndexOption = Math.min(maxIndexOptionNoOffsets, maxIndexOption);
} else {
fieldMaxIndexOption = maxIndexOption;
}
// Randomly picked the IndexOptions to index this
// field with:
IndexOptions indexOptions = IndexOptions.values()[alwaysTestMax ? fieldMaxIndexOption : random().nextInt(1+fieldMaxIndexOption)];
boolean doPayloads = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;
newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.name,
true,
fieldUpto,
false,
false,
doPayloads,
indexOptions,
null,
DocValuesType.NUMERIC,
null);
}
FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);
// Estimate that flushed segment size will be 25% of
// what we use in RAM:
long bytes = totalPostings * 8 + totalPayloadBytes;
SegmentWriteState writeState = new SegmentWriteState(null, dir,
segmentInfo, newFieldInfos,
null, new IOContext(new FlushInfo(maxDoc, bytes)));
Fields seedFields = new SeedFields(fields, newFieldInfos, maxAllowed, allowPayloads);
codec.postingsFormat().fieldsConsumer(writeState).write(seedFields);
if (VERBOSE) {
System.out.println("TEST: after indexing: files=");
for(String file : dir.listAll()) {
System.out.println(" " + file + ": " + dir.fileLength(file) + " bytes");
}
}
currentFieldInfos = newFieldInfos;
SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ);
return codec.postingsFormat().fieldsProducer(readState);
}
private static class ThreadState {
// Only used with REUSE option:
public DocsEnum reuseDocsEnum;
public DocsAndPositionsEnum reuseDocsAndPositionsEnum;
}
private void verifyEnum(ThreadState threadState,
String field,
BytesRef term,
TermsEnum termsEnum,
// Maximum options (docs/freqs/positions/offsets) to test:
IndexOptions maxTestOptions,
IndexOptions maxIndexOptions,
EnumSet<Option> options,
boolean alwaysTestMax) throws IOException {
if (VERBOSE) {
System.out.println(" verifyEnum: options=" + options + " maxTestOptions=" + maxTestOptions);
}
// Make sure TermsEnum really is positioned on the
// expected term:
assertEquals(term, termsEnum.term());
// 50% of the time time pass liveDocs:
boolean useLiveDocs = options.contains(Option.LIVE_DOCS) && random().nextBoolean();
Bits liveDocs;
if (useLiveDocs) {
liveDocs = globalLiveDocs;
if (VERBOSE) {
System.out.println(" use liveDocs");
}
} else {
liveDocs = null;
if (VERBOSE) {
System.out.println(" no liveDocs");
}
}
FieldInfo fieldInfo = currentFieldInfos.fieldInfo(field);
// NOTE: can be empty list if we are using liveDocs:
SeedPostings expected = getSeedPostings(term.utf8ToString(),
fields.get(field).get(term),
useLiveDocs,
maxIndexOptions,
true);
assertEquals(expected.docFreq, termsEnum.docFreq());
boolean allowFreqs = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0 &&
maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
boolean doCheckFreqs = allowFreqs && (alwaysTestMax || random().nextInt(3) <= 2);
boolean allowPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 &&
maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
boolean doCheckPositions = allowPositions && (alwaysTestMax || random().nextInt(3) <= 2);
boolean allowOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0 &&
maxTestOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
boolean doCheckOffsets = allowOffsets && (alwaysTestMax || random().nextInt(3) <= 2);
boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads() && (alwaysTestMax || random().nextInt(3) <= 2);
DocsEnum prevDocsEnum = null;
DocsEnum docsEnum;
DocsAndPositionsEnum docsAndPositionsEnum;
if (!doCheckPositions) {
if (allowPositions && random().nextInt(10) == 7) {
// 10% of the time, even though we will not check positions, pull a DocsAndPositions enum
if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) {
prevDocsEnum = threadState.reuseDocsAndPositionsEnum;
}
int flags = 0;
if (alwaysTestMax || random().nextBoolean()) {
flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
}
if (alwaysTestMax || random().nextBoolean()) {
flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
}
if (VERBOSE) {
System.out.println(" get DocsAndPositionsEnum (but we won't check positions) flags=" + flags);
}
threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, flags);
docsEnum = threadState.reuseDocsAndPositionsEnum;
docsAndPositionsEnum = threadState.reuseDocsAndPositionsEnum;
} else {
if (VERBOSE) {
System.out.println(" get DocsEnum");
}
if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) {
prevDocsEnum = threadState.reuseDocsEnum;
}
threadState.reuseDocsEnum = termsEnum.docs(liveDocs, prevDocsEnum, doCheckFreqs ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
docsEnum = threadState.reuseDocsEnum;
docsAndPositionsEnum = null;
}
} else {
if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) {
prevDocsEnum = threadState.reuseDocsAndPositionsEnum;
}
int flags = 0;
if (alwaysTestMax || doCheckOffsets || random().nextInt(3) == 1) {
flags |= DocsAndPositionsEnum.FLAG_OFFSETS;
}
if (alwaysTestMax || doCheckPayloads|| random().nextInt(3) == 1) {
flags |= DocsAndPositionsEnum.FLAG_PAYLOADS;
}
if (VERBOSE) {
System.out.println(" get DocsAndPositionsEnum flags=" + flags);
}
threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, flags);
docsEnum = threadState.reuseDocsAndPositionsEnum;
docsAndPositionsEnum = threadState.reuseDocsAndPositionsEnum;
}
assertNotNull("null DocsEnum", docsEnum);
int initialDocID = docsEnum.docID();
assertEquals("inital docID should be -1" + docsEnum, -1, initialDocID);
if (VERBOSE) {
if (prevDocsEnum == null) {
System.out.println(" got enum=" + docsEnum);
} else if (prevDocsEnum == docsEnum) {
System.out.println(" got reuse enum=" + docsEnum);
} else {
System.out.println(" got enum=" + docsEnum + " (reuse of " + prevDocsEnum + " failed)");
}
}
// 10% of the time don't consume all docs:
int stopAt;
if (!alwaysTestMax && options.contains(Option.PARTIAL_DOC_CONSUME) && expected.docFreq > 1 && random().nextInt(10) == 7) {
stopAt = random().nextInt(expected.docFreq-1);
if (VERBOSE) {
System.out.println(" will not consume all docs (" + stopAt + " vs " + expected.docFreq + ")");
}
} else {
stopAt = expected.docFreq;
if (VERBOSE) {
System.out.println(" consume all docs");
}
}
double skipChance = alwaysTestMax ? 0.5 : random().nextDouble();
int numSkips = expected.docFreq < 3 ? 1 : TestUtil.nextInt(random(), 1, Math.min(20, expected.docFreq / 3));
int skipInc = expected.docFreq/numSkips;
int skipDocInc = maxDoc/numSkips;
// Sometimes do 100% skipping:
boolean doAllSkipping = options.contains(Option.SKIPPING) && random().nextInt(7) == 1;
double freqAskChance = alwaysTestMax ? 1.0 : random().nextDouble();
double payloadCheckChance = alwaysTestMax ? 1.0 : random().nextDouble();
double offsetCheckChance = alwaysTestMax ? 1.0 : random().nextDouble();
if (VERBOSE) {
if (options.contains(Option.SKIPPING)) {
System.out.println(" skipChance=" + skipChance + " numSkips=" + numSkips);
} else {
System.out.println(" no skipping");
}
if (doCheckFreqs) {
System.out.println(" freqAskChance=" + freqAskChance);
}
if (doCheckPayloads) {
System.out.println(" payloadCheckChance=" + payloadCheckChance);
}
if (doCheckOffsets) {
System.out.println(" offsetCheckChance=" + offsetCheckChance);
}
}
while (expected.upto <= stopAt) {
if (expected.upto == stopAt) {
if (stopAt == expected.docFreq) {
assertEquals("DocsEnum should have ended but didn't", DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
// Common bug is to forget to set this.doc=NO_MORE_DOCS in the enum!:
assertEquals("DocsEnum should have ended but didn't", DocsEnum.NO_MORE_DOCS, docsEnum.docID());
}
break;
}
if (options.contains(Option.SKIPPING) && (doAllSkipping || random().nextDouble() <= skipChance)) {
int targetDocID = -1;
if (expected.upto < stopAt && random().nextBoolean()) {
// Pick target we know exists:
final int skipCount = TestUtil.nextInt(random(), 1, skipInc);
for(int skip=0;skip<skipCount;skip++) {
if (expected.nextDoc() == DocsEnum.NO_MORE_DOCS) {
break;
}
}
} else {
// Pick random target (might not exist):
final int skipDocIDs = TestUtil.nextInt(random(), 1, skipDocInc);
if (skipDocIDs > 0) {
targetDocID = expected.docID() + skipDocIDs;
expected.advance(targetDocID);
}
}
if (expected.upto >= stopAt) {
int target = random().nextBoolean() ? maxDoc : DocsEnum.NO_MORE_DOCS;
if (VERBOSE) {
System.out.println(" now advance to end (target=" + target + ")");
}
assertEquals("DocsEnum should have ended but didn't", DocsEnum.NO_MORE_DOCS, docsEnum.advance(target));
break;
} else {
if (VERBOSE) {
if (targetDocID != -1) {
System.out.println(" now advance to random target=" + targetDocID + " (" + expected.upto + " of " + stopAt + ") current=" + docsEnum.docID());
} else {
System.out.println(" now advance to known-exists target=" + expected.docID() + " (" + expected.upto + " of " + stopAt + ") current=" + docsEnum.docID());
}
}
int docID = docsEnum.advance(targetDocID != -1 ? targetDocID : expected.docID());
assertEquals("docID is wrong", expected.docID(), docID);
}
} else {
expected.nextDoc();
if (VERBOSE) {
System.out.println(" now nextDoc to " + expected.docID() + " (" + expected.upto + " of " + stopAt + ")");
}
int docID = docsEnum.nextDoc();
assertEquals("docID is wrong", expected.docID(), docID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
}
}
if (doCheckFreqs && random().nextDouble() <= freqAskChance) {
if (VERBOSE) {
System.out.println(" now freq()=" + expected.freq());
}
int freq = docsEnum.freq();
assertEquals("freq is wrong", expected.freq(), freq);
}
if (doCheckPositions) {
int freq = docsEnum.freq();
int numPosToConsume;
if (!alwaysTestMax && options.contains(Option.PARTIAL_POS_CONSUME) && random().nextInt(5) == 1) {
numPosToConsume = random().nextInt(freq);
} else {
numPosToConsume = freq;
}
for(int i=0;i<numPosToConsume;i++) {
int pos = expected.nextPosition();
if (VERBOSE) {
System.out.println(" now nextPosition to " + pos);
}
assertEquals("position is wrong", pos, docsAndPositionsEnum.nextPosition());
if (doCheckPayloads) {
BytesRef expectedPayload = expected.getPayload();
if (random().nextDouble() <= payloadCheckChance) {
if (VERBOSE) {
System.out.println(" now check expectedPayload length=" + (expectedPayload == null ? 0 : expectedPayload.length));
}
if (expectedPayload == null || expectedPayload.length == 0) {
assertNull("should not have payload", docsAndPositionsEnum.getPayload());
} else {
BytesRef payload = docsAndPositionsEnum.getPayload();
assertNotNull("should have payload but doesn't", payload);
assertEquals("payload length is wrong", expectedPayload.length, payload.length);
for(int byteUpto=0;byteUpto<expectedPayload.length;byteUpto++) {
assertEquals("payload bytes are wrong",
expectedPayload.bytes[expectedPayload.offset + byteUpto],
payload.bytes[payload.offset+byteUpto]);
}
// make a deep copy
payload = BytesRef.deepCopyOf(payload);
assertEquals("2nd call to getPayload returns something different!", payload, docsAndPositionsEnum.getPayload());
}
} else {
if (VERBOSE) {
System.out.println(" skip check payload length=" + (expectedPayload == null ? 0 : expectedPayload.length));
}
}
}
if (doCheckOffsets) {
if (random().nextDouble() <= offsetCheckChance) {
if (VERBOSE) {
System.out.println(" now check offsets: startOff=" + expected.startOffset() + " endOffset=" + expected.endOffset());
}
assertEquals("startOffset is wrong", expected.startOffset(), docsAndPositionsEnum.startOffset());
assertEquals("endOffset is wrong", expected.endOffset(), docsAndPositionsEnum.endOffset());
} else {
if (VERBOSE) {
System.out.println(" skip check offsets");
}
}
} else if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) {
if (VERBOSE) {
System.out.println(" now check offsets are -1");
}
assertEquals("startOffset isn't -1", -1, docsAndPositionsEnum.startOffset());
assertEquals("endOffset isn't -1", -1, docsAndPositionsEnum.endOffset());
}
}
}
}
}
private static class TestThread extends Thread {
private Fields fieldsSource;
private EnumSet<Option> options;
private IndexOptions maxIndexOptions;
private IndexOptions maxTestOptions;
private boolean alwaysTestMax;
private BasePostingsFormatTestCase testCase;
public TestThread(BasePostingsFormatTestCase testCase, Fields fieldsSource, EnumSet<Option> options, IndexOptions maxTestOptions,
IndexOptions maxIndexOptions, boolean alwaysTestMax) {
this.fieldsSource = fieldsSource;
this.options = options;
this.maxTestOptions = maxTestOptions;
this.maxIndexOptions = maxIndexOptions;
this.alwaysTestMax = alwaysTestMax;
this.testCase = testCase;
}
@Override
public void run() {
try {
try {
testCase.testTermsOneThread(fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
} catch (Throwable t) {
throw new RuntimeException(t);
}
} finally {
fieldsSource = null;
testCase = null;
}
}
}
private void testTerms(final Fields fieldsSource, final EnumSet<Option> options,
final IndexOptions maxTestOptions,
final IndexOptions maxIndexOptions,
final boolean alwaysTestMax) throws Exception {
if (options.contains(Option.THREADS)) {
int numThreads = TestUtil.nextInt(random(), 2, 5);
Thread[] threads = new Thread[numThreads];
for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
threads[threadUpto] = new TestThread(this, fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
threads[threadUpto].start();
}
for(int threadUpto=0;threadUpto<numThreads;threadUpto++) {
threads[threadUpto].join();
}
} else {
testTermsOneThread(fieldsSource, options, maxTestOptions, maxIndexOptions, alwaysTestMax);
}
}
private void testTermsOneThread(Fields fieldsSource, EnumSet<Option> options,
IndexOptions maxTestOptions,
IndexOptions maxIndexOptions, boolean alwaysTestMax) throws IOException {
ThreadState threadState = new ThreadState();
// Test random terms/fields:
List<TermState> termStates = new ArrayList<>();
List<FieldAndTerm> termStateTerms = new ArrayList<>();
Collections.shuffle(allTerms, random());
int upto = 0;
while (upto < allTerms.size()) {
boolean useTermState = termStates.size() != 0 && random().nextInt(5) == 1;
FieldAndTerm fieldAndTerm;
TermsEnum termsEnum;
TermState termState = null;
if (!useTermState) {
// Seek by random field+term:
fieldAndTerm = allTerms.get(upto++);
if (VERBOSE) {
System.out.println("\nTEST: seek to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString() );
}
} else {
// Seek by previous saved TermState
int idx = random().nextInt(termStates.size());
fieldAndTerm = termStateTerms.get(idx);
if (VERBOSE) {
System.out.println("\nTEST: seek using TermState to term=" + fieldAndTerm.field + ":" + fieldAndTerm.term.utf8ToString());
}
termState = termStates.get(idx);
}
Terms terms = fieldsSource.terms(fieldAndTerm.field);
assertNotNull(terms);
termsEnum = terms.iterator(null);
if (!useTermState) {
assertTrue(termsEnum.seekExact(fieldAndTerm.term));
} else {
termsEnum.seekExact(fieldAndTerm.term, termState);
}
boolean savedTermState = false;
if (options.contains(Option.TERM_STATE) && !useTermState && random().nextInt(5) == 1) {
// Save away this TermState:
termStates.add(termsEnum.termState());
termStateTerms.add(fieldAndTerm);
savedTermState = true;
}
verifyEnum(threadState,
fieldAndTerm.field,
fieldAndTerm.term,
termsEnum,
maxTestOptions,
maxIndexOptions,
options,
alwaysTestMax);
// Sometimes save term state after pulling the enum:
if (options.contains(Option.TERM_STATE) && !useTermState && !savedTermState && random().nextInt(5) == 1) {
// Save away this TermState:
termStates.add(termsEnum.termState());
termStateTerms.add(fieldAndTerm);
useTermState = true;
}
// 10% of the time make sure you can pull another enum
// from the same term:
if (alwaysTestMax || random().nextInt(10) == 7) {
// Try same term again
if (VERBOSE) {
System.out.println("TEST: try enum again on same term");
}
verifyEnum(threadState,
fieldAndTerm.field,
fieldAndTerm.term,
termsEnum,
maxTestOptions,
maxIndexOptions,
options,
alwaysTestMax);
}
}
}
private void testFields(Fields fields) throws Exception {
Iterator<String> iterator = fields.iterator();
while (iterator.hasNext()) {
iterator.next();
try {
iterator.remove();
fail("Fields.iterator() allows for removal");
} catch (UnsupportedOperationException expected) {
// expected;
}
}
assertFalse(iterator.hasNext());
try {
iterator.next();
fail("Fields.iterator() doesn't throw NoSuchElementException when past the end");
} catch (NoSuchElementException expected) {
// expected
}
}
/** Indexes all fields/terms at the specified
* IndexOptions, and fully tests at that IndexOptions. */
private void testFull(IndexOptions options, boolean withPayloads) throws Exception {
File path = TestUtil.getTempDir("testPostingsFormat.testExact");
Directory dir = newFSDirectory(path);
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = buildIndex(dir, options, withPayloads, true);
testFields(fieldsProducer);
IndexOptions[] allOptions = IndexOptions.values();
int maxIndexOption = Arrays.asList(allOptions).indexOf(options);
for(int i=0;i<=maxIndexOption;i++) {
testTerms(fieldsProducer, EnumSet.allOf(Option.class), allOptions[i], options, true);
if (withPayloads) {
// If we indexed w/ payloads, also test enums w/o accessing payloads:
testTerms(fieldsProducer, EnumSet.complementOf(EnumSet.of(Option.PAYLOADS)), allOptions[i], options, true);
}
}
fieldsProducer.close();
dir.close();
TestUtil.rmDir(path);
}
public void testDocsOnly() throws Exception {
testFull(IndexOptions.DOCS_ONLY, false);
}
public void testDocsAndFreqs() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS, false);
}
public void testDocsAndFreqsAndPositions() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, false);
}
public void testDocsAndFreqsAndPositionsAndPayloads() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, true);
}
public void testDocsAndFreqsAndPositionsAndOffsets() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
}
public void testDocsAndFreqsAndPositionsAndOffsetsAndPayloads() throws Exception {
testFull(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true);
}
public void testRandom() throws Exception {
int iters = 5;
for(int iter=0;iter<iters;iter++) {
File path = TestUtil.getTempDir("testPostingsFormat");
Directory dir = newFSDirectory(path);
boolean indexPayloads = random().nextBoolean();
// TODO test thread safety of buildIndex too
FieldsProducer fieldsProducer = buildIndex(dir, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, indexPayloads, false);
testFields(fieldsProducer);
// NOTE: you can also test "weaker" index options than
// you indexed with:
testTerms(fieldsProducer, EnumSet.allOf(Option.class), IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false);
fieldsProducer.close();
fieldsProducer = null;
dir.close();
TestUtil.rmDir(path);
}
}
public void testEmptyField() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
iwc.setCodec(getCodec());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(newStringField("", "something", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
AtomicReader ar = getOnlySegmentReader(ir);
Fields fields = ar.fields();
int fieldCount = fields.size();
// -1 is allowed, if the codec doesn't implement fields.size():
assertTrue(fieldCount == 1 || fieldCount == -1);
Terms terms = ar.terms("");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator(null);
assertNotNull(termsEnum.next());
assertEquals(termsEnum.term(), new BytesRef("something"));
assertNull(termsEnum.next());
ir.close();
iw.close();
dir.close();
}
public void testEmptyFieldAndEmptyTerm() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
iwc.setCodec(getCodec());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
doc.add(newStringField("", "", Field.Store.NO));
iw.addDocument(doc);
DirectoryReader ir = iw.getReader();
AtomicReader ar = getOnlySegmentReader(ir);
Fields fields = ar.fields();
int fieldCount = fields.size();
// -1 is allowed, if the codec doesn't implement fields.size():
assertTrue(fieldCount == 1 || fieldCount == -1);
Terms terms = ar.terms("");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator(null);
assertNotNull(termsEnum.next());
assertEquals(termsEnum.term(), new BytesRef(""));
assertNull(termsEnum.next());
ir.close();
iw.close();
dir.close();
}
// tests that ghost fields still work
// TODO: can this be improved?
public void testGhosts() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
iwc.setCodec(getCodec());
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
iw.addDocument(doc);
doc.add(newStringField("ghostField", "something", Field.Store.NO));
iw.addDocument(doc);
iw.forceMerge(1);
iw.deleteDocuments(new Term("ghostField", "something")); // delete the only term for the field
iw.forceMerge(1);
DirectoryReader ir = iw.getReader();
AtomicReader ar = getOnlySegmentReader(ir);
Fields fields = ar.fields();
// Ghost busting terms dict impls will have
// fields.size() == 0; all others must be == 1:
assertTrue(fields.size() <= 1);
Terms terms = fields.terms("ghostField");
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
BytesRef term = termsEnum.next();
if (term != null) {
DocsEnum docsEnum = termsEnum.docs(null, null);
assertTrue(docsEnum.nextDoc() == DocsEnum.NO_MORE_DOCS);
}
}
ir.close();
iw.close();
dir.close();
}
private static class TermFreqs {
long totalTermFreq;
int docFreq;
};
// LUCENE-5123: make sure we can visit postings twice
// during flush/merge
public void testInvertedWrite() throws Exception {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
// Must be concurrent because thread(s) can be merging
// while up to one thread flushes, and each of those
// threads iterates over the map while the flushing
// thread might be adding to it:
final Map<String,TermFreqs> termFreqs = new ConcurrentHashMap<>();
final AtomicLong sumDocFreq = new AtomicLong();
final AtomicLong sumTotalTermFreq = new AtomicLong();
// TODO: would be better to use / delegate to the current
// Codec returned by getCodec()
iwc.setCodec(new Lucene46Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
PostingsFormat p = getCodec().postingsFormat();
if (p instanceof PerFieldPostingsFormat) {
p = ((PerFieldPostingsFormat) p).getPostingsFormatForField(field);
}
final PostingsFormat defaultPostingsFormat = p;
final Thread mainThread = Thread.currentThread();
if (field.equals("body")) {
// A PF that counts up some stats and then in
// the end we verify the stats match what the
// final IndexReader says, just to exercise the
// new freedom of iterating the postings more
// than once at flush/merge:
return new PostingsFormat(defaultPostingsFormat.getName()) {
@Override
public FieldsConsumer fieldsConsumer(final SegmentWriteState state) throws IOException {
final FieldsConsumer fieldsConsumer = defaultPostingsFormat.fieldsConsumer(state);
return new FieldsConsumer() {
@Override
public void write(Fields fields) throws IOException {
fieldsConsumer.write(fields);
boolean isMerge = state.context.context == IOContext.Context.MERGE;
// We only use one thread for flushing
// in this test:
assert isMerge || Thread.currentThread() == mainThread;
// We iterate the provided TermsEnum
// twice, so we excercise this new freedom
// with the inverted API; if
// addOnSecondPass is true, we add up
// term stats on the 2nd iteration:
boolean addOnSecondPass = random().nextBoolean();
//System.out.println("write isMerge=" + isMerge + " 2ndPass=" + addOnSecondPass);
// Gather our own stats:
Terms terms = fields.terms("body");
assert terms != null;
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(termsEnum.next() != null) {
BytesRef term = termsEnum.term();
if (random().nextBoolean()) {
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_FREQS);
} else if (docs instanceof DocsAndPositionsEnum) {
docs = termsEnum.docsAndPositions(null, (DocsAndPositionsEnum) docs, 0);
} else {
docs = termsEnum.docsAndPositions(null, null, 0);
}
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
docFreq++;
totalTermFreq += docs.freq();
if (docs instanceof DocsAndPositionsEnum) {
DocsAndPositionsEnum posEnum = (DocsAndPositionsEnum) docs;
int limit = TestUtil.nextInt(random(), 1, docs.freq());
for(int i=0;i<limit;i++) {
posEnum.nextPosition();
}
}
}
String termString = term.utf8ToString();
// During merge we should only see terms
// we had already seen during a
// previous flush:
assertTrue(isMerge==false || termFreqs.containsKey(termString));
if (isMerge == false) {
if (addOnSecondPass == false) {
TermFreqs tf = termFreqs.get(termString);
if (tf == null) {
tf = new TermFreqs();
termFreqs.put(termString, tf);
}
tf.docFreq += docFreq;
tf.totalTermFreq += totalTermFreq;
sumDocFreq.addAndGet(docFreq);
sumTotalTermFreq.addAndGet(totalTermFreq);
} else if (termFreqs.containsKey(termString) == false) {
// Add placeholder (2nd pass will
// set its counts):
termFreqs.put(termString, new TermFreqs());
}
}
}
// Also test seeking the TermsEnum:
for(String term : termFreqs.keySet()) {
if (termsEnum.seekExact(new BytesRef(term))) {
if (random().nextBoolean()) {
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_FREQS);
} else if (docs instanceof DocsAndPositionsEnum) {
docs = termsEnum.docsAndPositions(null, (DocsAndPositionsEnum) docs, 0);
} else {
docs = termsEnum.docsAndPositions(null, null, 0);
}
int docFreq = 0;
long totalTermFreq = 0;
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
docFreq++;
totalTermFreq += docs.freq();
if (docs instanceof DocsAndPositionsEnum) {
DocsAndPositionsEnum posEnum = (DocsAndPositionsEnum) docs;
int limit = TestUtil.nextInt(random(), 1, docs.freq());
for(int i=0;i<limit;i++) {
posEnum.nextPosition();
}
}
}
if (isMerge == false && addOnSecondPass) {
TermFreqs tf = termFreqs.get(term);
assert tf != null;
tf.docFreq += docFreq;
tf.totalTermFreq += totalTermFreq;
sumDocFreq.addAndGet(docFreq);
sumTotalTermFreq.addAndGet(totalTermFreq);
}
//System.out.println(" term=" + term + " docFreq=" + docFreq + " ttDF=" + termToDocFreq.get(term));
assertTrue(docFreq <= termFreqs.get(term).docFreq);
assertTrue(totalTermFreq <= termFreqs.get(term).totalTermFreq);
}
}
}
};
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return defaultPostingsFormat.fieldsProducer(state);
}
};
} else {
return defaultPostingsFormat;
}
}
});
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
LineFileDocs docs = new LineFileDocs(random());
int bytesToIndex = atLeast(100) * 1024;
int bytesIndexed = 0;
while (bytesIndexed < bytesToIndex) {
Document doc = docs.nextDoc();
w.addDocument(doc);
bytesIndexed += RamUsageEstimator.sizeOf(doc);
}
IndexReader r = w.getReader();
w.close();
Terms terms = MultiFields.getTerms(r, "body");
assertEquals(sumDocFreq.get(), terms.getSumDocFreq());
assertEquals(sumTotalTermFreq.get(), terms.getSumTotalTermFreq());
TermsEnum termsEnum = terms.iterator(null);
long termCount = 0;
while(termsEnum.next() != null) {
BytesRef term = termsEnum.term();
termCount++;
assertEquals(termFreqs.get(term.utf8ToString()).docFreq, termsEnum.docFreq());
assertEquals(termFreqs.get(term.utf8ToString()).totalTermFreq, termsEnum.totalTermFreq());
}
assertEquals(termFreqs.size(), termCount);
r.close();
dir.close();
}
}