TestLucene60PointsFormat.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.codecs.lucene60;


import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.PointsFormat;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.document.BinaryPoint;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.BasePointsFormatTestCase;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MockRandomMergePolicy;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;

/**
 * Tests Lucene60PointsFormat
 */
public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
  private final Codec codec;
  private final int maxPointsInLeafNode;
  
  public TestLucene60PointsFormat() {
    // standard issue
    Codec defaultCodec = TestUtil.getDefaultCodec();
    if (random().nextBoolean()) {
      // randomize parameters
      maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
      double maxMBSortInHeap = 3.0 + (3*random().nextDouble());
      if (VERBOSE) {
        System.out.println("TEST: using Lucene60PointsFormat with maxPointsInLeafNode=" + maxPointsInLeafNode + " and maxMBSortInHeap=" + maxMBSortInHeap);
      }

      // sneaky impersonation!
      codec = new FilterCodec(defaultCodec.getName(), defaultCodec) {
        @Override
        public PointsFormat pointsFormat() {
          return new PointsFormat() {
            @Override
            public PointsWriter fieldsWriter(SegmentWriteState writeState) throws IOException {
              return new Lucene60PointsWriter(writeState, maxPointsInLeafNode, maxMBSortInHeap);
            }

            @Override
            public PointsReader fieldsReader(SegmentReadState readState) throws IOException {
              return new Lucene60PointsReader(readState);
            }
          };
        }
      };
    } else {
      // standard issue
      codec = defaultCodec;
      maxPointsInLeafNode = BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
    }
  }

  @Override
  protected Codec getCodec() {
    return codec;
  }

  @Override
  public void testMergeStability() throws Exception {
    assumeFalse("TODO: mess with the parameters and test gets angry!", codec instanceof FilterCodec);
    super.testMergeStability();
  }

  public void testEstimatePointCount() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = newIndexWriterConfig();
    // Avoid mockRandomMP since it may cause non-optimal merges that make the
    // number of points per leaf hard to predict
    while (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
      iwc.setMergePolicy(newMergePolicy());
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    byte[] pointValue = new byte[3];
    byte[] uniquePointValue = new byte[3];
    random().nextBytes(uniquePointValue);
    final int numDocs = atLeast(10000); // make sure we have several leaves
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      if (i == numDocs / 2) {
        doc.add(new BinaryPoint("f", uniquePointValue));
      } else {
        do {
          random().nextBytes(pointValue);
        } while (Arrays.equals(pointValue, uniquePointValue));
        doc.add(new BinaryPoint("f", pointValue));
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");

    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = (int) Math.ceil((double) numDocs / maxPointsInLeafNode);
    assertEquals(numLeaves * maxPointsInLeafNode,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          
          @Override
          public void visit(int docID) throws IOException {}
          
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
          }
        }));

    // Return 0 if no points match
    assertEquals(0,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          
          @Override
          public void visit(int docID) throws IOException {}
          
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
          }
        }));

    // If only one point matches, then the point count is (maxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          
          @Override
          public void visit(int docID) throws IOException {}
          
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            if (StringHelper.compare(3, uniquePointValue, 0, maxPackedValue, 0) > 0 ||
                StringHelper.compare(3, uniquePointValue, 0, minPackedValue, 0) < 0) {
              return Relation.CELL_OUTSIDE_QUERY;
            }
            return Relation.CELL_CROSSES_QUERY;
          }
        });
    assertTrue(""+pointCount,
        pointCount == (maxPointsInLeafNode + 1) / 2 || // common case
        pointCount == 2*((maxPointsInLeafNode + 1) / 2)); // if the point is a split value

    r.close();
    dir.close();
  }

  // The tree is always balanced in the N dims case, and leaves are
  // not all full so things are a bit different
  public void testEstimatePointCount2Dims() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    byte[][] pointValue = new byte[2][];
    pointValue[0] = new byte[3];
    pointValue[1] = new byte[3];
    byte[][] uniquePointValue = new byte[2][];
    uniquePointValue[0] = new byte[3];
    uniquePointValue[1] = new byte[3];
    random().nextBytes(uniquePointValue[0]);
    random().nextBytes(uniquePointValue[1]);
    final int numDocs = atLeast(10000); // make sure we have several leaves
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      if (i == numDocs / 2) {
        doc.add(new BinaryPoint("f", uniquePointValue));
      } else {
        do {
          random().nextBytes(pointValue[0]);
          random().nextBytes(pointValue[1]);
        } while (Arrays.equals(pointValue[0], uniquePointValue[0]) || Arrays.equals(pointValue[1], uniquePointValue[1]));
        doc.add(new BinaryPoint("f", pointValue));
      }
      w.addDocument(doc);
    }
    w.forceMerge(1);
    final IndexReader r = DirectoryReader.open(w);
    w.close();
    final LeafReader lr = getOnlyLeafReader(r);
    PointValues points = lr.getPointValues("f");

    // With >1 dims, the tree is balanced
    int actualMaxPointsInLeafNode = numDocs;
    while (actualMaxPointsInLeafNode > maxPointsInLeafNode) {
      actualMaxPointsInLeafNode = (actualMaxPointsInLeafNode + 1) / 2;
    }

    // If all points match, then the point count is numLeaves * maxPointsInLeafNode
    final int numLeaves = Integer.highestOneBit((numDocs - 1) / actualMaxPointsInLeafNode) << 1;
    assertEquals(numLeaves * actualMaxPointsInLeafNode,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          
          @Override
          public void visit(int docID) throws IOException {}
          
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_INSIDE_QUERY;
          }
        }));

    // Return 0 if no points match
    assertEquals(0,
        points.estimatePointCount(new IntersectVisitor() {
          @Override
          public void visit(int docID, byte[] packedValue) throws IOException {}
          
          @Override
          public void visit(int docID) throws IOException {}
          
          @Override
          public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_OUTSIDE_QUERY;
          }
        }));

    // If only one point matches, then the point count is (actualMaxPointsInLeafNode + 1) / 2
    // in general, or maybe 2x that if the point is a split value
    final long pointCount = points.estimatePointCount(new IntersectVisitor() {
        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {}
        
        @Override
        public void visit(int docID) throws IOException {}
        
        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
          for (int dim = 0; dim < 2; ++dim) {
            if (StringHelper.compare(3, uniquePointValue[dim], 0, maxPackedValue, dim * 3) > 0 ||
                StringHelper.compare(3, uniquePointValue[dim], 0, minPackedValue, dim * 3) < 0) {
              return Relation.CELL_OUTSIDE_QUERY;
            }
          }
          return Relation.CELL_CROSSES_QUERY;
        }
      });
    assertTrue(""+pointCount,
        pointCount == (actualMaxPointsInLeafNode + 1) / 2 || // common case
        pointCount == 2*((actualMaxPointsInLeafNode + 1) / 2)); // if the point is a split value

    r.close();
    dir.close();
  }
}