/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.testsupport.textbuilder; import java.io.Serializable; import java.util.Arrays; import java.util.Comparator; import java.util.Random; import java.util.Set; /** * Test utility meant to build a consistent dictionary of words. * This is not just a random generator: like in natural * languages shorter terms have a higher frequency in a text corpus * and the dictionary size is limited. * * @author Sanne Grinovero */ public class WordDictionary { private static final Random r = new Random( 12L ); private final String[] positionalWords; private final int maxSize; private final double gaussFactor; public WordDictionary(Set<String> words) { this.positionalWords = words.toArray( new String[0] ); //sort by String length. Languages use shorter terms more often. Arrays.sort( positionalWords, new StringLengthComparator() ); maxSize = positionalWords.length; gaussFactor = ( (double) maxSize + 1 ) / 4d; } private static class StringLengthComparator implements Comparator<String>, Serializable { @Override public int compare(String o1, String o2) { return o1.length() - o2.length(); } } public String randomWord() { int position = Math.abs( (int) ( r.nextGaussian() * gaussFactor ) ); if ( position < maxSize ) { return positionalWords[position]; } else { return randomWord(); } } }