/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.index; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.StringWriter; import java.io.Writer; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.MergeInfo; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.TrackingDirectoryWrapper; import org.apache.lucene.util.Bits; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.Version; /** JUnit adaptation of an older test case DocTest. */ public class TestDoc extends LuceneTestCase { private Path workDir; private Path indexDir; private LinkedList<Path> files; /** Set the test case. This test case needs * a few text files created in the current working directory. */ @Override public void setUp() throws Exception { super.setUp(); if (VERBOSE) { System.out.println("TEST: setUp"); } workDir = createTempDir("TestDoc"); indexDir = createTempDir("testIndex"); Directory directory = newFSDirectory(indexDir); directory.close(); files = new LinkedList<>(); files.add(createOutput("test.txt", "This is the first test file" )); files.add(createOutput("test2.txt", "This is the second test file" )); } private Path createOutput(String name, String text) throws IOException { Writer fw = null; PrintWriter pw = null; try { Path path = workDir.resolve(name); Files.deleteIfExists(path); fw = new OutputStreamWriter(Files.newOutputStream(path), StandardCharsets.UTF_8); pw = new PrintWriter(fw); pw.println(text); return path; } finally { if (pw != null) pw.close(); if (fw != null) fw.close(); } } /** This test executes a number of merges and compares the contents of * the segments created when using compound file or not using one. * * TODO: the original test used to print the segment contents to System.out * for visual validation. To have the same effect, a new method * checkSegment(String name, ...) should be created that would * assert various things about the segment. */ public void testIndexAndMerge() throws Exception { StringWriter sw = new StringWriter(); PrintWriter out = new PrintWriter(sw, true); Directory directory = newFSDirectory(indexDir); if (directory instanceof MockDirectoryWrapper) { // We create unreferenced files (we don't even write // a segments file): ((MockDirectoryWrapper) directory).setAssertNoUnrefencedFilesOnClose(false); } IndexWriter writer = new IndexWriter( directory, newIndexWriterConfig(new MockAnalyzer(random())). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(-1). setMergePolicy(newLogMergePolicy(10)) ); SegmentCommitInfo si1 = indexDoc(writer, "test.txt"); printSegment(out, si1); SegmentCommitInfo si2 = indexDoc(writer, "test2.txt"); printSegment(out, si2); writer.close(); SegmentCommitInfo siMerge = merge(directory, si1, si2, "_merge", false); printSegment(out, siMerge); SegmentCommitInfo siMerge2 = merge(directory, si1, si2, "_merge2", false); printSegment(out, siMerge2); SegmentCommitInfo siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", false); printSegment(out, siMerge3); directory.close(); out.close(); sw.close(); String multiFileOutput = sw.toString(); //System.out.println(multiFileOutput); sw = new StringWriter(); out = new PrintWriter(sw, true); directory = newFSDirectory(indexDir); if (directory instanceof MockDirectoryWrapper) { // We create unreferenced files (we don't even write // a segments file): ((MockDirectoryWrapper) directory).setAssertNoUnrefencedFilesOnClose(false); } writer = new IndexWriter( directory, newIndexWriterConfig(new MockAnalyzer(random())). setOpenMode(OpenMode.CREATE). setMaxBufferedDocs(-1). setMergePolicy(newLogMergePolicy(10)) ); si1 = indexDoc(writer, "test.txt"); printSegment(out, si1); si2 = indexDoc(writer, "test2.txt"); printSegment(out, si2); writer.close(); siMerge = merge(directory, si1, si2, "_merge", true); printSegment(out, siMerge); siMerge2 = merge(directory, si1, si2, "_merge2", true); printSegment(out, siMerge2); siMerge3 = merge(directory, siMerge, siMerge2, "_merge3", true); printSegment(out, siMerge3); directory.close(); out.close(); sw.close(); String singleFileOutput = sw.toString(); assertEquals(multiFileOutput, singleFileOutput); } private SegmentCommitInfo indexDoc(IndexWriter writer, String fileName) throws Exception { Path path = workDir.resolve(fileName); Document doc = new Document(); InputStreamReader is = new InputStreamReader(Files.newInputStream(path), StandardCharsets.UTF_8); doc.add(new TextField("contents", is)); writer.addDocument(doc); writer.commit(); is.close(); return writer.newestSegment(); } private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception { IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))); SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context); SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context); final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(), context); MergeState mergeState = merger.merge(); r1.close(); r2.close();; si.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); if (useCompoundFile) { Collection<String> filesToDelete = si.files(); codec.compoundFormat().write(dir, si, context); si.setUseCompoundFile(true); for(String name : filesToDelete) { si1.info.dir.deleteFile(name); } } return new SegmentCommitInfo(si, 0, -1L, -1L, -1L); } private void printSegment(PrintWriter out, SegmentCommitInfo si) throws Exception { SegmentReader reader = new SegmentReader(si, Version.LATEST.major, newIOContext(random())); for (int i = 0; i < reader.numDocs(); i++) out.println(reader.document(i)); Fields fields = reader.fields(); for (String field : fields) { Terms terms = fields.terms(field); assertNotNull(terms); TermsEnum tis = terms.iterator(); while(tis.next() != null) { out.print(" term=" + field + ":" + tis.term()); out.println(" DF=" + tis.docFreq()); PostingsEnum positions = tis.postings(null, PostingsEnum.POSITIONS); final Bits liveDocs = reader.getLiveDocs(); while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (liveDocs != null && liveDocs.get(positions.docID()) == false) { continue; } out.print(" doc=" + positions.docID()); out.print(" TF=" + positions.freq()); out.print(" pos="); out.print(positions.nextPosition()); for (int j = 1; j < positions.freq(); j++) out.print("," + positions.nextPosition()); out.println(""); } } } reader.close(); } }