package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
public class TestFieldCache extends LuceneTestCase {
private static AtomicReader reader;
private static int NUM_DOCS;
private static int NUM_ORDS;
private static String[] unicodeStrings;
private static BytesRef[][] multiValued;
private static Directory directory;
@BeforeClass
public static void beforeClass() throws Exception {
NUM_DOCS = atLeast(500);
NUM_ORDS = atLeast(2);
directory = newDirectory();
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
long theLong = Long.MAX_VALUE;
double theDouble = Double.MAX_VALUE;
byte theByte = Byte.MAX_VALUE;
short theShort = Short.MAX_VALUE;
int theInt = Integer.MAX_VALUE;
float theFloat = Float.MAX_VALUE;
unicodeStrings = new String[NUM_DOCS];
multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
if (VERBOSE) {
System.out.println("TEST: setUp");
}
for (int i = 0; i < NUM_DOCS; i++){
Document doc = new Document();
doc.add(newStringField("theLong", String.valueOf(theLong--), Field.Store.NO));
doc.add(newStringField("theDouble", String.valueOf(theDouble--), Field.Store.NO));
doc.add(newStringField("theByte", String.valueOf(theByte--), Field.Store.NO));
doc.add(newStringField("theShort", String.valueOf(theShort--), Field.Store.NO));
doc.add(newStringField("theInt", String.valueOf(theInt--), Field.Store.NO));
doc.add(newStringField("theFloat", String.valueOf(theFloat--), Field.Store.NO));
if (i%2 == 0) {
doc.add(newStringField("sparse", String.valueOf(i), Field.Store.NO));
}
if (i%2 == 0) {
doc.add(new IntField("numInt", i, Field.Store.NO));
}
// sometimes skip the field:
if (random().nextInt(40) != 17) {
unicodeStrings[i] = generateString(i);
doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
}
// sometimes skip the field:
if (random().nextInt(10) != 8) {
for (int j = 0; j < NUM_ORDS; j++) {
String newValue = generateString(i);
multiValued[i][j] = new BytesRef(newValue);
doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
}
Arrays.sort(multiValued[i]);
}
writer.addDocument(doc);
}
IndexReader r = writer.getReader();
reader = SlowCompositeReaderWrapper.wrap(r);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
reader = null;
directory.close();
directory = null;
unicodeStrings = null;
multiValued = null;
}
public void testInfoStream() throws Exception {
try {
FieldCache cache = FieldCache.DEFAULT;
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
cache.setInfoStream(new PrintStream(bos, false, "UTF-8"));
cache.getDoubles(reader, "theDouble", false);
cache.getFloats(reader, "theDouble", false);
assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1);
} finally {
FieldCache.DEFAULT.purgeAllCaches();
}
}
public void test() throws IOException {
FieldCache cache = FieldCache.DEFAULT;
double [] doubles = cache.getDoubles(reader, "theDouble", random().nextBoolean());
assertSame("Second request to cache return same array", doubles, cache.getDoubles(reader, "theDouble", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", doubles, cache.getDoubles(reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, random().nextBoolean()));
assertTrue("doubles Size: " + doubles.length + " is not: " + NUM_DOCS, doubles.length == NUM_DOCS);
for (int i = 0; i < doubles.length; i++) {
assertTrue(doubles[i] + " does not equal: " + (Double.MAX_VALUE - i), doubles[i] == (Double.MAX_VALUE - i));
}
long [] longs = cache.getLongs(reader, "theLong", random().nextBoolean());
assertSame("Second request to cache return same array", longs, cache.getLongs(reader, "theLong", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", longs, cache.getLongs(reader, "theLong", FieldCache.DEFAULT_LONG_PARSER, random().nextBoolean()));
assertTrue("longs Size: " + longs.length + " is not: " + NUM_DOCS, longs.length == NUM_DOCS);
for (int i = 0; i < longs.length; i++) {
assertTrue(longs[i] + " does not equal: " + (Long.MAX_VALUE - i) + " i=" + i, longs[i] == (Long.MAX_VALUE - i));
}
byte [] bytes = cache.getBytes(reader, "theByte", random().nextBoolean());
assertSame("Second request to cache return same array", bytes, cache.getBytes(reader, "theByte", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", bytes, cache.getBytes(reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, random().nextBoolean()));
assertTrue("bytes Size: " + bytes.length + " is not: " + NUM_DOCS, bytes.length == NUM_DOCS);
for (int i = 0; i < bytes.length; i++) {
assertTrue(bytes[i] + " does not equal: " + (Byte.MAX_VALUE - i), bytes[i] == (byte) (Byte.MAX_VALUE - i));
}
short [] shorts = cache.getShorts(reader, "theShort", random().nextBoolean());
assertSame("Second request to cache return same array", shorts, cache.getShorts(reader, "theShort", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", shorts, cache.getShorts(reader, "theShort", FieldCache.DEFAULT_SHORT_PARSER, random().nextBoolean()));
assertTrue("shorts Size: " + shorts.length + " is not: " + NUM_DOCS, shorts.length == NUM_DOCS);
for (int i = 0; i < shorts.length; i++) {
assertTrue(shorts[i] + " does not equal: " + (Short.MAX_VALUE - i), shorts[i] == (short) (Short.MAX_VALUE - i));
}
int [] ints = cache.getInts(reader, "theInt", random().nextBoolean());
assertSame("Second request to cache return same array", ints, cache.getInts(reader, "theInt", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", ints, cache.getInts(reader, "theInt", FieldCache.DEFAULT_INT_PARSER, random().nextBoolean()));
assertTrue("ints Size: " + ints.length + " is not: " + NUM_DOCS, ints.length == NUM_DOCS);
for (int i = 0; i < ints.length; i++) {
assertTrue(ints[i] + " does not equal: " + (Integer.MAX_VALUE - i), ints[i] == (Integer.MAX_VALUE - i));
}
float [] floats = cache.getFloats(reader, "theFloat", random().nextBoolean());
assertSame("Second request to cache return same array", floats, cache.getFloats(reader, "theFloat", random().nextBoolean()));
assertSame("Second request with explicit parser return same array", floats, cache.getFloats(reader, "theFloat", FieldCache.DEFAULT_FLOAT_PARSER, random().nextBoolean()));
assertTrue("floats Size: " + floats.length + " is not: " + NUM_DOCS, floats.length == NUM_DOCS);
for (int i = 0; i < floats.length; i++) {
assertTrue(floats[i] + " does not equal: " + (Float.MAX_VALUE - i), floats[i] == (Float.MAX_VALUE - i));
}
Bits docsWithField = cache.getDocsWithField(reader, "theLong");
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "theLong"));
assertTrue("docsWithField(theLong) must be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(theLong) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertTrue(docsWithField.get(i));
}
docsWithField = cache.getDocsWithField(reader, "sparse");
assertSame("Second request to cache return same array", docsWithField, cache.getDocsWithField(reader, "sparse"));
assertFalse("docsWithField(sparse) must not be class Bits.MatchAllBits", docsWithField instanceof Bits.MatchAllBits);
assertTrue("docsWithField(sparse) Size: " + docsWithField.length() + " is not: " + NUM_DOCS, docsWithField.length() == NUM_DOCS);
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
// getTermsIndex
FieldCache.DocTermsIndex termsIndex = cache.getTermsIndex(reader, "theRandomUnicodeString");
assertSame("Second request to cache return same array", termsIndex, cache.getTermsIndex(reader, "theRandomUnicodeString"));
assertTrue("doubles Size: " + termsIndex.size() + " is not: " + NUM_DOCS, termsIndex.size() == NUM_DOCS);
final BytesRef br = new BytesRef();
for (int i = 0; i < NUM_DOCS; i++) {
final BytesRef term = termsIndex.getTerm(i, br);
final String s = term == null ? null : term.utf8ToString();
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
int nTerms = termsIndex.numOrd();
// System.out.println("nTerms="+nTerms);
TermsEnum tenum = termsIndex.getTermsEnum();
BytesRef val = new BytesRef();
for (int i=1; i<nTerms; i++) {
BytesRef val1 = tenum.next();
BytesRef val2 = termsIndex.lookup(i,val);
// System.out.println("i="+i);
assertEquals(val2, val1);
}
// seek the enum around (note this isn't a great test here)
int num = atLeast(100);
for (int i = 0; i < num; i++) {
int k = _TestUtil.nextInt(random(), 1, nTerms-1);
BytesRef val1 = termsIndex.lookup(k, val);
assertEquals(TermsEnum.SeekStatus.FOUND, tenum.seekCeil(val1));
assertEquals(val1, tenum.term());
}
// test bad field
termsIndex = cache.getTermsIndex(reader, "bogusfield");
// getTerms
FieldCache.DocTerms terms = cache.getTerms(reader, "theRandomUnicodeString");
assertSame("Second request to cache return same array", terms, cache.getTerms(reader, "theRandomUnicodeString"));
assertTrue("doubles Size: " + terms.size() + " is not: " + NUM_DOCS, terms.size() == NUM_DOCS);
for (int i = 0; i < NUM_DOCS; i++) {
final BytesRef term = terms.getTerm(i, br);
final String s = term == null ? null : term.utf8ToString();
assertTrue("for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i], unicodeStrings[i] == null || unicodeStrings[i].equals(s));
}
// test bad field
terms = cache.getTerms(reader, "bogusfield");
// getDocTermOrds
DocTermOrds termOrds = cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField");
TermsEnum termsEnum = termOrds.getOrdTermsEnum(reader);
assertSame("Second request to cache return same DocTermOrds", termOrds, cache.getDocTermOrds(reader, "theRandomUnicodeMultiValuedField"));
DocTermOrds.TermOrdsIterator reuse = null;
for (int i = 0; i < NUM_DOCS; i++) {
reuse = termOrds.lookup(i, reuse);
final int[] buffer = new int[5];
// This will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
List<BytesRef> values = new ArrayList<BytesRef>(new LinkedHashSet<BytesRef>(Arrays.asList(multiValued[i])));
for (;;) {
int chunk = reuse.read(buffer);
if (chunk == 0) {
for (int ord = 0; ord < values.size(); ord++) {
BytesRef term = values.get(ord);
assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
}
break;
}
for(int idx=0; idx < chunk; idx++) {
int key = buffer[idx];
termsEnum.seekExact((long) key);
String actual = termsEnum.term().utf8ToString();
String expected = values.get(idx).utf8ToString();
if (!expected.equals(actual)) {
reuse = termOrds.lookup(i, reuse);
reuse.read(buffer);
}
assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
}
if (chunk <= buffer.length) {
break;
}
}
}
// test bad field
termOrds = cache.getDocTermOrds(reader, "bogusfield");
FieldCache.DEFAULT.purge(reader);
}
public void testEmptyIndex() throws Exception {
Directory dir = newDirectory();
IndexWriter writer= new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(500));
writer.close();
IndexReader r = DirectoryReader.open(dir);
AtomicReader reader = SlowCompositeReaderWrapper.wrap(r);
FieldCache.DEFAULT.getTerms(reader, "foobar");
FieldCache.DEFAULT.getTermsIndex(reader, "foobar");
FieldCache.DEFAULT.purge(reader);
r.close();
dir.close();
}
private static String generateString(int i) {
String s = null;
if (i > 0 && random().nextInt(3) == 1) {
// reuse past string -- try to find one that's not null
for(int iter = 0; iter < 10 && s == null;iter++) {
s = unicodeStrings[random().nextInt(i)];
}
if (s == null) {
s = _TestUtil.randomUnicodeString(random());
}
} else {
s = _TestUtil.randomUnicodeString(random());
}
return s;
}
public void testDocsWithField() throws Exception {
FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
assertEquals(0, cache.getCacheEntries().length);
double[] doubles = cache.getDoubles(reader, "theDouble", true);
// The double[] takes two slots (one w/ null parser, one
// w/ real parser), and docsWithField should also
// have been populated:
assertEquals(3, cache.getCacheEntries().length);
Bits bits = cache.getDocsWithField(reader, "theDouble");
// No new entries should appear:
assertEquals(3, cache.getCacheEntries().length);
assertTrue(bits instanceof Bits.MatchAllBits);
int[] ints = cache.getInts(reader, "sparse", true);
assertEquals(6, cache.getCacheEntries().length);
Bits docsWithField = cache.getDocsWithField(reader, "sparse");
assertEquals(6, cache.getCacheEntries().length);
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints[i]);
} else {
assertFalse(docsWithField.get(i));
}
}
int[] numInts = cache.getInts(reader, "numInt", random().nextBoolean());
docsWithField = cache.getDocsWithField(reader, "numInt");
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, numInts[i]);
} else {
assertFalse(docsWithField.get(i));
}
}
}
public void testGetDocsWithFieldThreadSafety() throws Exception {
final FieldCache cache = FieldCache.DEFAULT;
cache.purgeAllCaches();
int NUM_THREADS = 3;
Thread[] threads = new Thread[NUM_THREADS];
final AtomicBoolean failed = new AtomicBoolean();
final AtomicInteger iters = new AtomicInteger();
final int NUM_ITER = 200 * RANDOM_MULTIPLIER;
final CyclicBarrier restart = new CyclicBarrier(NUM_THREADS,
new Runnable() {
@Override
public void run() {
cache.purgeAllCaches();
iters.incrementAndGet();
}
});
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX] = new Thread() {
@Override
public void run() {
try {
while(!failed.get()) {
final int op = random().nextInt(3);
if (op == 0) {
// Purge all caches & resume, once all
// threads get here:
restart.await();
if (iters.get() >= NUM_ITER) {
break;
}
} else if (op == 1) {
Bits docsWithField = cache.getDocsWithField(reader, "sparse");
for (int i = 0; i < docsWithField.length(); i++) {
assertEquals(i%2 == 0, docsWithField.get(i));
}
} else {
int[] ints = cache.getInts(reader, "sparse", true);
Bits docsWithField = cache.getDocsWithField(reader, "sparse");
for (int i = 0; i < docsWithField.length(); i++) {
if (i%2 == 0) {
assertTrue(docsWithField.get(i));
assertEquals(i, ints[i]);
} else {
assertFalse(docsWithField.get(i));
}
}
}
}
} catch (Throwable t) {
failed.set(true);
restart.reset();
throw new RuntimeException(t);
}
}
};
threads[threadIDX].start();
}
for(int threadIDX=0;threadIDX<NUM_THREADS;threadIDX++) {
threads[threadIDX].join();
}
assertFalse(failed.get());
}
}