/*
* Hibernate, Relational Persistence for Idiomatic Java
*
* Copyright (c) 2010, Red Hat, Inc. and/or its affiliates or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors. All third-party contributions are
* distributed under license by Red Hat, Inc.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.hibernate.search.test.util.textbuilder;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Random;
import java.util.Set;
/**
* Test utility meant to build a consistent dictionary of words.
* This is not just a random generator: like in natural
* languages shorter terms have a higher frequency in a text corpus
* and the dictionary size is limited.
*
* @author Sanne Grinovero
*/
public class WordDictionary {
private final String[] positionalWords;
private final int maxSize;
private final double gaussFactor;
private static final Random r = new Random( 12L );
public WordDictionary(Set<String> words) {
this.positionalWords = words.toArray( new String[0] );
//sort by String length. Languages use shorter terms more often.
Arrays.sort( positionalWords, new StringLengthComparator() );
maxSize = positionalWords.length;
gaussFactor = ((double)maxSize +1 ) / 4d ;
}
private static class StringLengthComparator implements Comparator<String>, Serializable {
public int compare(String o1, String o2) {
return o1.length()-o2.length();
}
}
public String randomWord() {
int position = Math.abs((int) ( r.nextGaussian() * gaussFactor ) );
if ( position < maxSize ) {
return positionalWords[position];
}
else {
return randomWord();
}
}
}