/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.codecs.compressing; import java.io.IOException; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.BaseStoredFieldsFormatTestCase; import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.Directory; import com.carrotsearch.randomizedtesting.generators.RandomNumbers; public class TestCompressingStoredFieldsFormat extends BaseStoredFieldsFormatTestCase { static final long SECOND = 1000L; static final long HOUR = 60 * 60 * SECOND; static final long DAY = 24 * HOUR; @Override protected Codec getCodec() { return CompressingCodec.randomInstance(random()); } public void testDeletePartiallyWrittenFilesIfAbort() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); iwConf.setCodec(CompressingCodec.randomInstance(random())); // disable CFS because this test checks file names iwConf.setMergePolicy(newLogMergePolicy(false)); iwConf.setUseCompoundFile(false); // Cannot use RIW because this test wants CFS to stay off: IndexWriter iw = new IndexWriter(dir, iwConf); final Document validDoc = new Document(); validDoc.add(new IntPoint("id", 0)); validDoc.add(new StoredField("id", 0)); iw.addDocument(validDoc); iw.commit(); // make sure that #writeField will fail to trigger an abort final Document invalidDoc = new Document(); FieldType fieldType = new FieldType(); fieldType.setStored(true); invalidDoc.add(new Field("invalid", fieldType) { @Override public String stringValue() { // TODO: really bad & scary that this causes IW to // abort the segment!! We should fix this. return null; } }); try { iw.addDocument(invalidDoc); iw.commit(); } catch(IllegalArgumentException iae) { // expected assertEquals(iae, iw.getTragicException()); } // Writer should be closed by tragedy assertFalse(iw.isOpen()); dir.close(); } public void testZFloat() throws Exception { byte buffer[] = new byte[5]; // we never need more than 5 bytes ByteArrayDataOutput out = new ByteArrayDataOutput(buffer); ByteArrayDataInput in = new ByteArrayDataInput(buffer); // round-trip small integer values for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { float f = (float) i; CompressingStoredFieldsWriter.writeZFloat(out, f); in.reset(buffer, 0, out.getPosition()); float g = CompressingStoredFieldsReader.readZFloat(in); assertTrue(in.eof()); assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g)); // check that compression actually works if (i >= -1 && i <= 123) { assertEquals(1, out.getPosition()); // single byte compression } out.reset(buffer); } // round-trip special values float special[] = { -0.0f, +0.0f, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY, Float.MIN_VALUE, Float.MAX_VALUE, Float.NaN, }; for (float f : special) { CompressingStoredFieldsWriter.writeZFloat(out, f); in.reset(buffer, 0, out.getPosition()); float g = CompressingStoredFieldsReader.readZFloat(in); assertTrue(in.eof()); assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g)); out.reset(buffer); } // round-trip random values Random r = random(); for (int i = 0; i < 100000; i++) { float f = r.nextFloat() * (random().nextInt(100) - 50); CompressingStoredFieldsWriter.writeZFloat(out, f); assertTrue("length=" + out.getPosition() + ", f=" + f, out.getPosition() <= ((Float.floatToIntBits(f) >>> 31) == 1 ? 5 : 4)); in.reset(buffer, 0, out.getPosition()); float g = CompressingStoredFieldsReader.readZFloat(in); assertTrue(in.eof()); assertEquals(Float.floatToIntBits(f), Float.floatToIntBits(g)); out.reset(buffer); } } public void testZDouble() throws Exception { byte buffer[] = new byte[9]; // we never need more than 9 bytes ByteArrayDataOutput out = new ByteArrayDataOutput(buffer); ByteArrayDataInput in = new ByteArrayDataInput(buffer); // round-trip small integer values for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { double x = (double) i; CompressingStoredFieldsWriter.writeZDouble(out, x); in.reset(buffer, 0, out.getPosition()); double y = CompressingStoredFieldsReader.readZDouble(in); assertTrue(in.eof()); assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); // check that compression actually works if (i >= -1 && i <= 124) { assertEquals(1, out.getPosition()); // single byte compression } out.reset(buffer); } // round-trip special values double special[] = { -0.0d, +0.0d, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, Double.MIN_VALUE, Double.MAX_VALUE, Double.NaN }; for (double x : special) { CompressingStoredFieldsWriter.writeZDouble(out, x); in.reset(buffer, 0, out.getPosition()); double y = CompressingStoredFieldsReader.readZDouble(in); assertTrue(in.eof()); assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); out.reset(buffer); } // round-trip random values Random r = random(); for (int i = 0; i < 100000; i++) { double x = r.nextDouble() * (random().nextInt(100) - 50); CompressingStoredFieldsWriter.writeZDouble(out, x); assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= (x < 0 ? 9 : 8)); in.reset(buffer, 0, out.getPosition()); double y = CompressingStoredFieldsReader.readZDouble(in); assertTrue(in.eof()); assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); out.reset(buffer); } // same with floats for (int i = 0; i < 100000; i++) { double x = (double) (r.nextFloat() * (random().nextInt(100) - 50)); CompressingStoredFieldsWriter.writeZDouble(out, x); assertTrue("length=" + out.getPosition() + ", d=" + x, out.getPosition() <= 5); in.reset(buffer, 0, out.getPosition()); double y = CompressingStoredFieldsReader.readZDouble(in); assertTrue(in.eof()); assertEquals(Double.doubleToLongBits(x), Double.doubleToLongBits(y)); out.reset(buffer); } } public void testTLong() throws Exception { byte buffer[] = new byte[10]; // we never need more than 10 bytes ByteArrayDataOutput out = new ByteArrayDataOutput(buffer); ByteArrayDataInput in = new ByteArrayDataInput(buffer); // round-trip small integer values for (int i = Short.MIN_VALUE; i < Short.MAX_VALUE; i++) { for (long mul : new long[] {SECOND, HOUR, DAY}) { long l1 = (long) i * mul; CompressingStoredFieldsWriter.writeTLong(out, l1); in.reset(buffer, 0, out.getPosition()); long l2 = CompressingStoredFieldsReader.readTLong(in); assertTrue(in.eof()); assertEquals(l1, l2); // check that compression actually works if (i >= -16 && i <= 15) { assertEquals(1, out.getPosition()); // single byte compression } out.reset(buffer); } } // round-trip random values Random r = random(); for (int i = 0; i < 100000; i++) { final int numBits = r.nextInt(65); long l1 = r.nextLong() & ((1L << numBits) - 1); switch (r.nextInt(4)) { case 0: l1 *= SECOND; break; case 1: l1 *= HOUR; break; case 2: l1 *= DAY; break; default: break; } CompressingStoredFieldsWriter.writeTLong(out, l1); in.reset(buffer, 0, out.getPosition()); long l2 = CompressingStoredFieldsReader.readTLong(in); assertTrue(in.eof()); assertEquals(l1, l2); out.reset(buffer); } } /** * writes some tiny segments with incomplete compressed blocks, * and ensures merge recompresses them. */ public void testChunkCleanup() throws IOException { Directory dir = newDirectory(); IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); iwConf.setMergePolicy(NoMergePolicy.INSTANCE); // we have to enforce certain things like maxDocsPerChunk to cause dirty chunks to be created // by this test. iwConf.setCodec(CompressingCodec.randomInstance(random(), 4*1024, 100, false, 8)); IndexWriter iw = new IndexWriter(dir, iwConf); DirectoryReader ir = DirectoryReader.open(iw); for (int i = 0; i < 5; i++) { Document doc = new Document(); doc.add(new StoredField("text", "not very long at all")); iw.addDocument(doc); // force flush DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); assertNotNull(ir2); ir.close(); ir = ir2; // examine dirty counts: for (LeafReaderContext leaf : ir2.leaves()) { CodecReader sr = (CodecReader) leaf.reader(); CompressingStoredFieldsReader reader = (CompressingStoredFieldsReader)sr.getFieldsReader(); assertEquals(1, reader.getNumChunks()); assertEquals(1, reader.getNumDirtyChunks()); } } iw.getConfig().setMergePolicy(newLogMergePolicy()); iw.forceMerge(1); DirectoryReader ir2 = DirectoryReader.openIfChanged(ir); assertNotNull(ir2); ir.close(); ir = ir2; CodecReader sr = (CodecReader) getOnlyLeafReader(ir); CompressingStoredFieldsReader reader = (CompressingStoredFieldsReader)sr.getFieldsReader(); // we could get lucky, and have zero, but typically one. assertTrue(reader.getNumDirtyChunks() <= 1); ir.close(); iw.close(); dir.close(); } }