TestMSBRadixSorter.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

public class TestMSBRadixSorter extends LuceneTestCase {

  private void test(BytesRef[] refs, int len) {
    BytesRef[] expected = Arrays.copyOf(refs, len);
    Arrays.sort(expected);

    int maxLength = 0;
    for (int i = 0; i < len; ++i) {
      BytesRef ref = refs[i];
      maxLength = Math.max(maxLength, ref.length);
    }
    switch (random().nextInt(3)) {
      case 0:
        maxLength += TestUtil.nextInt(random(), 1, 5);
        break;
      case 1:
        maxLength = Integer.MAX_VALUE;
        break;
      default:
        // leave unchanged
        break;
    }

    final int finalMaxLength = maxLength;
    new MSBRadixSorter(maxLength) {

      @Override
      protected int byteAt(int i, int k) {
        assertTrue(k < finalMaxLength);
        BytesRef ref = refs[i];
        if (ref.length <= k) {
          return -1;
        }
        return ref.bytes[ref.offset + k] & 0xff;
      }

      @Override
      protected void swap(int i, int j) {
        BytesRef tmp = refs[i];
        refs[i] = refs[j];
        refs[j] = tmp;
      }
    }.sort(0, len);
    BytesRef[] actual = Arrays.copyOf(refs, len);
    assertArrayEquals(expected, actual);
  }

  public void testEmpty() {
    test(new BytesRef[random().nextInt(5)], 0);
  }

  public void testOneValue() {
    BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
    test(new BytesRef[] { bytes }, 1);
  }

  public void testTwoValues() {
    BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
    BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
    test(new BytesRef[] { bytes1, bytes2 }, 2);
  }

  private void testRandom(int commonPrefixLen, int maxLen) {
    byte[] commonPrefix = new byte[commonPrefixLen];
    random().nextBytes(commonPrefix);
    final int len = random().nextInt(100000);
    BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
    for (int i = 0; i < len; ++i) {
      byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
      random().nextBytes(b);
      System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
      bytes[i] = new BytesRef(b);
    }
    test(bytes, len);
  }

  public void testRandom() {
    for (int iter = 0; iter < 10; ++iter) {
      testRandom(0, 10);
    }
  }

  public void testRandomWithLotsOfDuplicates() {
    for (int iter = 0; iter < 10; ++iter) {
      testRandom(0, 2);
    }
  }

  public void testRandomWithSharedPrefix() {
    for (int iter = 0; iter < 10; ++iter) {
      testRandom(TestUtil.nextInt(random(), 1, 30), 10);
    }
  }

  public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
    for (int iter = 0; iter < 10; ++iter) {
      testRandom(TestUtil.nextInt(random(), 1, 30), 2);
    }
  }

  public void testRandom2() {
    // how large our alphabet is
    int letterCount = TestUtil.nextInt(random(), 2, 10);

    // how many substring fragments to use
    int substringCount = TestUtil.nextInt(random(), 2, 10);
    Set<BytesRef> substringsSet = new HashSet<>();

    // how many strings to make
    int stringCount = atLeast(10000);

    //System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + " stringCount=" + stringCount);
    while(substringsSet.size() < substringCount) {
      int length = TestUtil.nextInt(random(), 2, 10);
      byte[] bytes = new byte[length];
      for(int i=0;i<length;i++) {
        bytes[i] = (byte) random().nextInt(letterCount);
      }
      BytesRef br = new BytesRef(bytes);
      substringsSet.add(br);
      //System.out.println("add substring count=" + substringsSet.size() + ": " + br);
    }

    BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
    double[] chance = new double[substrings.length];
    double sum = 0.0;
    for(int i=0;i<substrings.length;i++) {
      chance[i] = random().nextDouble();
      sum += chance[i];
    }

    // give each substring a random chance of occurring:
    double accum = 0.0;
    for(int i=0;i<substrings.length;i++) {
      accum += chance[i]/sum;
      chance[i] = accum;
    }

    Set<BytesRef> stringsSet = new HashSet<>();
    int iters = 0;
    while (stringsSet.size() < stringCount && iters < stringCount*5) {
      int count = TestUtil.nextInt(random(), 1, 5);
      BytesRefBuilder b = new BytesRefBuilder();
      for(int i=0;i<count;i++) {
        double v = random().nextDouble();
        accum = 0.0;
        for(int j=0;j<substrings.length;j++) {
          accum += chance[j];
          if (accum >= v) {
            b.append(substrings[j]);
            break;
          }
        }
      }
      BytesRef br = b.toBytesRef();
      stringsSet.add(br);
      //System.out.println("add string count=" + stringsSet.size() + ": " + br);
      iters++;
    }

    test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
  }
}