/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.common.lucene; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NoDeletionPolicy; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.Bits; import org.elasticsearch.test.ESTestCase; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; public class LuceneTests extends ESTestCase { public void testWaitForIndex() throws Exception { final MockDirectoryWrapper dir = newMockDirectory(); final AtomicBoolean succeeded = new AtomicBoolean(false); final CountDownLatch latch = new CountDownLatch(1); // Create a shadow Engine, which will freak out because there is no // index yet Thread t = new Thread(new Runnable() { @Override public void run() { try { latch.await(); if (Lucene.waitForIndex(dir, 5000)) { succeeded.set(true); } else { fail("index should have eventually existed!"); } } catch (InterruptedException e) { // ignore interruptions } catch (Exception e) { fail("should have been able to create the engine! " + e.getMessage()); } } }); t.start(); // count down latch // now shadow engine should try to be created latch.countDown(); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); t.join(); writer.close(); dir.close(); assertTrue("index should have eventually existed", succeeded.get()); } public void testCleanIndex() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); try (DirectoryReader open = DirectoryReader.open(writer)) { assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); } writer.close(); if (random().nextBoolean()) { for (String file : dir.listAll()) { if (file.startsWith("_1")) { // delete a random file dir.deleteFile(file); break; } } } Lucene.cleanLuceneIndex(dir); if (dir.listAll().length > 0) { for (String file : dir.listAll()) { if (file.startsWith("extra") == false) { assertEquals(file, "write.lock"); } } } dir.close(); } public void testPruneUnreferencedFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); iwc.setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); doc = new Document(); doc.add(new TextField("id", "4", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.deleteDocuments(new Term("id", "2")); writer.commit(); DirectoryReader open = DirectoryReader.open(writer); assertEquals(3, open.numDocs()); assertEquals(1, open.numDeletedDocs()); assertEquals(4, open.maxDoc()); open.close(); writer.close(); SegmentInfos si = Lucene.pruneUnreferencedFiles(segmentCommitInfos.getSegmentsFileName(), dir); assertEquals(si.getSegmentsFileName(), segmentCommitInfos.getSegmentsFileName()); open = DirectoryReader.open(dir); assertEquals(3, open.numDocs()); assertEquals(0, open.numDeletedDocs()); assertEquals(3, open.maxDoc()); IndexSearcher s = new IndexSearcher(open); assertEquals(s.search(new TermQuery(new Term("id", "1")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "2")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "3")), 1).totalHits, 1); assertEquals(s.search(new TermQuery(new Term("id", "4")), 1).totalHits, 0); for (String file : dir.listAll()) { assertFalse("unexpected file: " + file, file.equals("segments_3") || file.startsWith("_2")); } open.close(); dir.close(); } public void testFiles() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); iwc.setMergePolicy(NoMergePolicy.INSTANCE); iwc.setMaxBufferedDocs(2); iwc.setUseCompoundFile(true); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); Set<String> files = new HashSet<>(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } final boolean simpleTextCFS = files.contains("_0.scf"); assertTrue(files.toString(), files.contains("segments_1")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); files.clear(); for (String f : Lucene.files(Lucene.readSegmentInfos(dir))) { files.add(f); } assertFalse(files.toString(), files.contains("segments_1")); assertTrue(files.toString(), files.contains("segments_2")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_0.cfs")); assertFalse(files.toString(), files.contains("_0.cfe")); } else { assertTrue(files.toString(), files.contains("_0.cfs")); assertTrue(files.toString(), files.contains("_0.cfe")); } assertTrue(files.toString(), files.contains("_0.si")); if (simpleTextCFS) { assertFalse(files.toString(), files.contains("_1.cfs")); assertFalse(files.toString(), files.contains("_1.cfe")); } else { assertTrue(files.toString(), files.contains("_1.cfs")); assertTrue(files.toString(), files.contains("_1.cfe")); } assertTrue(files.toString(), files.contains("_1.si")); writer.close(); dir.close(); } public void testNumDocs() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new TextField("id", "1", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); writer.commit(); SegmentInfos segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(1, Lucene.getNumDocs(segmentCommitInfos)); doc = new Document(); doc.add(new TextField("id", "2", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); doc = new Document(); doc.add(new TextField("id", "3", random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); writer.addDocument(doc); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(1, Lucene.getNumDocs(segmentCommitInfos)); writer.commit(); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(3, Lucene.getNumDocs(segmentCommitInfos)); writer.deleteDocuments(new Term("id", "2")); writer.commit(); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(2, Lucene.getNumDocs(segmentCommitInfos)); int numDocsToIndex = randomIntBetween(10, 50); List<Term> deleteTerms = new ArrayList<>(); for (int i = 0; i < numDocsToIndex; i++) { doc = new Document(); doc.add(new TextField("id", "extra_" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO)); deleteTerms.add(new Term("id", "extra_" + i)); writer.addDocument(doc); } int numDocsToDelete = randomIntBetween(0, numDocsToIndex); Collections.shuffle(deleteTerms, random()); for (int i = 0; i < numDocsToDelete; i++) { Term remove = deleteTerms.remove(0); writer.deleteDocuments(remove); } writer.commit(); segmentCommitInfos = Lucene.readSegmentInfos(dir); assertEquals(2 + deleteTerms.size(), Lucene.getNumDocs(segmentCommitInfos)); writer.close(); dir.close(); } public void testCount() throws Exception { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); try (DirectoryReader reader = w.getReader()) { // match_all does not match anything on an empty index IndexSearcher searcher = newSearcher(reader); assertFalse(Lucene.exists(searcher, new MatchAllDocsQuery())); } Document doc = new Document(); w.addDocument(doc); doc.add(new StringField("foo", "bar", Store.NO)); w.addDocument(doc); try (DirectoryReader reader = w.getReader()) { IndexSearcher searcher = newSearcher(reader); assertTrue(Lucene.exists(searcher, new MatchAllDocsQuery())); assertFalse(Lucene.exists(searcher, new TermQuery(new Term("baz", "bar")))); assertTrue(Lucene.exists(searcher, new TermQuery(new Term("foo", "bar")))); } w.deleteDocuments(new Term("foo", "bar")); try (DirectoryReader reader = w.getReader()) { IndexSearcher searcher = newSearcher(reader); assertFalse(Lucene.exists(searcher, new TermQuery(new Term("foo", "bar")))); } w.close(); dir.close(); } public void testAsSequentialAccessBits() throws Exception { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new KeywordAnalyzer())); Document doc = new Document(); doc.add(new StringField("foo", "bar", Store.NO)); w.addDocument(doc); doc = new Document(); w.addDocument(doc); doc = new Document(); doc.add(new StringField("foo", "bar", Store.NO)); w.addDocument(doc); try (DirectoryReader reader = DirectoryReader.open(w)) { IndexSearcher searcher = newSearcher(reader); Weight termWeight = new TermQuery(new Term("foo", "bar")).createWeight(searcher, false, 1f); assertEquals(1, reader.leaves().size()); LeafReaderContext leafReaderContext = searcher.getIndexReader().leaves().get(0); Bits bits = Lucene.asSequentialAccessBits(leafReaderContext.reader().maxDoc(), termWeight.scorer(leafReaderContext)); expectThrows(IndexOutOfBoundsException.class, () -> bits.get(-1)); expectThrows(IndexOutOfBoundsException.class, () -> bits.get(leafReaderContext.reader().maxDoc())); assertTrue(bits.get(0)); assertTrue(bits.get(0)); assertFalse(bits.get(1)); assertFalse(bits.get(1)); expectThrows(IllegalArgumentException.class, () -> bits.get(0)); assertTrue(bits.get(2)); assertTrue(bits.get(2)); expectThrows(IllegalArgumentException.class, () -> bits.get(1)); } w.close(); dir.close(); } /** * Test that the "unmap hack" is detected as supported by lucene. * This works around the following bug: https://bugs.openjdk.java.net/browse/JDK-4724038 * <p> * While not guaranteed, current status is "Critical Internal API": http://openjdk.java.net/jeps/260 * Additionally this checks we did not screw up the security logic around the hack. */ public void testMMapHackSupported() throws Exception { // add assume's here if needed for certain platforms, but we should know if it does not work. assertTrue("MMapDirectory does not support unmapping: " + MMapDirectory.UNMAP_NOT_SUPPORTED_REASON, MMapDirectory.UNMAP_SUPPORTED); } }