/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.usergrid.utils;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Transforms words to singular, plural, humanized (human readable), underscore, camel case, or ordinal form. This is
* inspired by the <a href="http://api.rubyonrails.org/classes/Inflector.html">Inflector</a> class in <a
* href="http://www.rubyonrails.org">Ruby on Rails</a>, which is distributed under the <a
* href="http://wiki.rubyonrails.org/rails/pages/License">Rails license</a>.
*/
public class Inflector {
protected static final Inflector INSTANCE = new Inflector();
public static Inflector getInstance() {
return INSTANCE;
}
protected class Rule {
protected final String expression;
protected final Pattern expressionPattern;
protected final String replacement;
protected Rule( String expression, String replacement ) {
this.expression = expression;
this.replacement = replacement != null ? replacement : "";
expressionPattern = Pattern.compile( this.expression, Pattern.CASE_INSENSITIVE );
}
/**
* Apply the rule against the input string, returning the modified string or null if the rule didn't apply (and
* no modifications were made)
*
* @param input the input string
*
* @return the modified string if this rule applied, or null if the input was not modified by this rule
*/
protected String apply( String input ) {
Matcher matcher = expressionPattern.matcher( input );
if ( !matcher.find() ) {
return null;
}
return matcher.replaceAll( replacement );
}
@Override
public int hashCode() {
return expression.hashCode();
}
@Override
public boolean equals( Object obj ) {
if ( obj == this ) {
return true;
}
if ( ( obj != null ) && ( obj.getClass() == this.getClass() ) ) {
final Rule that = ( Rule ) obj;
if ( expression.equalsIgnoreCase( that.expression ) ) {
return true;
}
}
return false;
}
@Override
public String toString() {
return expression + ", " + replacement;
}
}
private final LinkedList<Rule> plurals = new LinkedList<Rule>();
private final LinkedList<Rule> singulars = new LinkedList<Rule>();
/**
* The lowercase words that are to be excluded and not processed. This map can be modified by the users via {@link
* #getUncountables()}.
*/
private final Set<String> uncountables = new HashSet<String>();
public Inflector() {
initialize();
}
protected Inflector( Inflector original ) {
plurals.addAll( original.plurals );
singulars.addAll( original.singulars );
uncountables.addAll( original.uncountables );
}
@Override
@SuppressWarnings("all")
public Inflector clone() {
return new Inflector( this );
}
// ------------------------------------------------------------------------------------------------
// Usage functions
// ------------------------------------------------------------------------------------------------
/**
* Returns the plural form of the word in the string. <p> Examples: <p/>
* <pre>
* inflector.pluralize("post") #=> "posts"
* inflector.pluralize("octopus") #=> "octopi"
* inflector.pluralize("sheep") #=> "sheep"
* inflector.pluralize("words") #=> "words"
* inflector.pluralize("the blue mailman") #=> "the blue mailmen"
* inflector.pluralize("CamelOctopus") #=> "CamelOctopi"
* </pre>
* <p/> </p> <p> Note that if the {@link Object#toString()} is called on the supplied object, so this method works
* for non-strings, too. </p>
*
* @param word the word that is to be pluralized.
*
* @return the pluralized form of the word, or the word itself if it could not be pluralized
*
* @see #singularize(Object)
*/
public String pluralize( Object word ) {
if ( word == null ) {
return null;
}
String wordStr = word.toString().trim();
if ( wordStr.length() == 0 ) {
return wordStr;
}
if ( isUncountable( wordStr ) ) {
return wordStr;
}
for ( Rule rule : plurals ) {
String result = rule.apply( wordStr );
if ( result != null ) {
return result;
}
}
return wordStr;
}
public String pluralize( Object word, int count ) {
if ( word == null ) {
return null;
}
if ( ( count == 1 ) || ( count == -1 ) ) {
return word.toString();
}
return pluralize( word );
}
public boolean isPlural( Object word ) {
if ( word == null ) {
return false;
}
return pluralize( word ).equals( word.toString() );
}
/**
* Returns the singular form of the word in the string. <p> Examples: <p/>
* <pre>
* inflector.singularize("posts") #=> "post"
* inflector.singularize("octopi") #=> "octopus"
* inflector.singularize("sheep") #=> "sheep"
* inflector.singularize("words") #=> "word"
* inflector.singularize("the blue mailmen") #=> "the blue mailman"
* inflector.singularize("CamelOctopi") #=> "CamelOctopus"
* </pre>
* <p/> </p> <p> Note that if the {@link Object#toString()} is called on the supplied object, so this method works
* for non-strings, too. </p>
*
* @param word the word that is to be pluralized.
*
* @return the pluralized form of the word, or the word itself if it could not be pluralized
*
* @see #pluralize(Object)
*/
public String singularize( Object word ) {
if ( word == null ) {
return null;
}
String wordStr = word.toString().trim();
if ( wordStr.length() == 0 ) {
return wordStr;
}
if ( isUncountable( wordStr ) ) {
return wordStr;
}
for ( Rule rule : singulars ) {
String result = rule.apply( wordStr );
if ( result != null ) {
return result;
}
}
return wordStr;
}
public boolean isSingular( Object word ) {
if ( word == null ) {
return false;
}
return singularize( word ).equals( word.toString() );
}
/**
* By default, this method converts strings to UpperCamelCase. If the <code>uppercaseFirstLetter</code> argument to
* false, then this method produces lowerCamelCase. This method will also use any extra delimiter characters to
* identify word boundaries. <p> Examples: <p/>
* <pre>
* inflector.camelCase("active_record",false) #=> "activeRecord"
* inflector.camelCase("active_record",true) #=> "ActiveRecord"
* inflector.camelCase("first_name",false) #=> "firstName"
* inflector.camelCase("first_name",true) #=> "FirstName"
* inflector.camelCase("name",false) #=> "name"
* inflector.camelCase("name",true) #=> "Name"
* </pre>
* <p/> </p>
*
* @param lowerCaseAndUnderscoredWord the word that is to be converted to camel case
* @param uppercaseFirstLetter true if the first character is to be uppercased, or false if the first character is
* to be lowercased
* @param delimiterChars optional characters that are used to delimit word boundaries
*
* @return the camel case version of the word
*
* @see #underscore(String, char[])
*/
public String camelCase( String lowerCaseAndUnderscoredWord, boolean uppercaseFirstLetter,
char... delimiterChars ) {
if ( lowerCaseAndUnderscoredWord == null ) {
return null;
}
lowerCaseAndUnderscoredWord = lowerCaseAndUnderscoredWord.trim();
if ( lowerCaseAndUnderscoredWord.length() == 0 ) {
return "";
}
if ( uppercaseFirstLetter ) {
String result = lowerCaseAndUnderscoredWord;
// Replace any extra delimiters with underscores (before the
// underscores are converted in the next step)...
if ( delimiterChars != null ) {
for ( char delimiterChar : delimiterChars ) {
result = result.replace( delimiterChar, '_' );
}
}
// Change the case at the beginning at after each underscore ...
return replaceAllWithUppercase( result, "(^|_)(.)", 2 );
}
if ( lowerCaseAndUnderscoredWord.length() < 2 ) {
return lowerCaseAndUnderscoredWord;
}
return "" + Character.toLowerCase( lowerCaseAndUnderscoredWord.charAt( 0 ) ) + camelCase(
lowerCaseAndUnderscoredWord, true, delimiterChars ).substring( 1 );
}
/**
* Makes an underscored form from the expression in the string (the reverse of the {@link #camelCase(String,
* boolean, char[]) camelCase} method. Also changes any characters that match the supplied delimiters into
* underscore. <p> Examples: <p/>
* <pre>
* inflector.underscore("activeRecord") #=> "active_record"
* inflector.underscore("ActiveRecord") #=> "active_record"
* inflector.underscore("firstName") #=> "first_name"
* inflector.underscore("FirstName") #=> "first_name"
* inflector.underscore("name") #=> "name"
* inflector.underscore("The.firstName") #=> "the_first_name"
* </pre>
* <p/> </p>
*
* @param camelCaseWord the camel-cased word that is to be converted;
* @param delimiterChars optional characters that are used to delimit word boundaries (beyond capitalization)
*
* @return a lower-cased version of the input, with separate words delimited by the underscore character.
*/
public String underscore( String camelCaseWord, char... delimiterChars ) {
if ( camelCaseWord == null ) {
return null;
}
String result = camelCaseWord.trim();
if ( result.length() == 0 ) {
return "";
}
result = result.replaceAll( "([A-Z]+)([A-Z][a-z])", "$1_$2" );
result = result.replaceAll( "([a-z\\d])([A-Z])", "$1_$2" );
result = result.replace( '-', '_' );
if ( delimiterChars != null ) {
for ( char delimiterChar : delimiterChars ) {
result = result.replace( delimiterChar, '_' );
}
}
return result.toLowerCase();
}
/**
* Returns a copy of the input with the first character converted to uppercase and the remainder to lowercase.
*
* @param words the word to be capitalized
*
* @return the string with the first character capitalized and the remaining characters lowercased
*/
public String capitalize( String words ) {
if ( words == null ) {
return null;
}
String result = words.trim();
if ( result.length() == 0 ) {
return "";
}
if ( result.length() == 1 ) {
return result.toUpperCase();
}
return "" + Character.toUpperCase( result.charAt( 0 ) ) + result.substring( 1 ).toLowerCase();
}
// ------------------------------------------------------------------------------------------------
// Management methods
// ------------------------------------------------------------------------------------------------
/**
* Determine whether the supplied word is considered uncountable by the {@link #pluralize(Object) pluralize} and
* {@link #singularize(Object) singularize} methods.
*
* @param word the word
*
* @return true if the plural and singular forms of the word are the same
*/
public boolean isUncountable( String word ) {
if ( word == null ) {
return false;
}
String trimmedLower = word.trim().toLowerCase();
return uncountables.contains( trimmedLower );
}
/**
* Get the set of words that are not processed by the Inflector. The resulting map is directly modifiable.
*
* @return the set of uncountable words
*/
public Set<String> getUncountables() {
return uncountables;
}
public void addPluralize( String rule, String replacement ) {
final Rule pluralizeRule = new Rule( rule, replacement );
plurals.addFirst( pluralizeRule );
}
public void addSingularize( String rule, String replacement ) {
final Rule singularizeRule = new Rule( rule, replacement );
singulars.addFirst( singularizeRule );
}
public void addIrregular( String singular, String plural ) {
if ( org.apache.commons.lang.StringUtils.isEmpty( singular ) ) {
throw new IllegalArgumentException( "singular rule may not be empty" );
}
if ( org.apache.commons.lang.StringUtils.isEmpty( plural ) ) {
throw new IllegalArgumentException( "plural rule may not be empty" );
}
String singularRemainder = singular.length() > 1 ? singular.substring( 1 ) : "";
String pluralRemainder = plural.length() > 1 ? plural.substring( 1 ) : "";
addPluralize( "(" + singular.charAt( 0 ) + ")" + singularRemainder + "$", "$1" + pluralRemainder );
addSingularize( "(" + plural.charAt( 0 ) + ")" + pluralRemainder + "$", "$1" + singularRemainder );
}
public void addUncountable( String... words ) {
if ( ( words == null ) || ( words.length == 0 ) ) {
return;
}
for ( String word : words ) {
if ( word != null ) {
uncountables.add( word.trim().toLowerCase() );
}
}
}
/**
* Utility method to replace all occurrences given by the specific backreference with its uppercased form, and
* remove all other backreferences. <p> The Java {@link Pattern regular expression processing} does not use the
* preprocessing directives <code>\l</code>, <code>\u</code>, <code>\L</code>, and <code>\U</code>. If so, such
* directives could be used in the replacement string to uppercase or lowercase the backreferences. For example,
* <code>\L1</code> would lowercase the first backreference, and <code>\u3</code> would uppercase the 3rd
* backreference. </p>
*
* @return the input string with the appropriate characters converted to upper-case
*/
protected static String replaceAllWithUppercase( String input, String regex, int groupNumberToUppercase ) {
Pattern underscoreAndDotPattern = Pattern.compile( regex );
Matcher matcher = underscoreAndDotPattern.matcher( input );
StringBuffer sb = new StringBuffer();
while ( matcher.find() ) {
matcher.appendReplacement( sb, matcher.group( groupNumberToUppercase ).toUpperCase() );
}
matcher.appendTail( sb );
return sb.toString();
}
/** Completely remove all rules within this inflector. */
public void clear() {
uncountables.clear();
plurals.clear();
singulars.clear();
}
protected void initialize() {
Inflector inflect = this;
inflect.addPluralize( "$", "s" );
inflect.addPluralize( "s$", "s" );
inflect.addPluralize( "(ax|test)is$", "$1es" );
inflect.addPluralize( "(octop|vir)us$", "$1i" );
inflect.addPluralize( "(octop|vir)i$", "$1i" ); // already plural
inflect.addPluralize( "(alias|status)$", "$1es" );
inflect.addPluralize( "(bu)s$", "$1ses" );
inflect.addPluralize( "(buffal|tomat)o$", "$1oes" );
inflect.addPluralize( "([ti])um$", "$1a" );
inflect.addPluralize( "([ti])a$", "$1a" ); // already plural
inflect.addPluralize( "sis$", "ses" );
inflect.addPluralize( "(?:([^f])fe|([lr])f)$", "$1$2ves" );
inflect.addPluralize( "(hive)$", "$1s" );
inflect.addPluralize( "([^aeiouy]|qu)y$", "$1ies" );
inflect.addPluralize( "(x|ch|ss|sh)$", "$1es" );
inflect.addPluralize( "(matr|vert|ind)ix|ex$", "$1ices" );
inflect.addPluralize( "([m|l])ouse$", "$1ice" );
inflect.addPluralize( "([m|l])ice$", "$1ice" );
inflect.addPluralize( "^(ox)$", "$1en" );
inflect.addPluralize( "(quiz)$", "$1zes" );
// Need to check for the following words that are already pluralized:
inflect.addPluralize( "(people|men|children|sexes|moves|stadiums)$", "$1" ); // irregulars
inflect.addPluralize( "(oxen|octopi|viri|aliases|quizzes)$", "$1" ); // special
// rules
inflect.addSingularize( "s$", "" );
inflect.addSingularize( "(s|si|u)s$", "$1s" ); // '-us' and '-ss' are
// already singular
inflect.addSingularize( "(n)ews$", "$1ews" );
inflect.addSingularize( "([ti])a$", "$1um" );
inflect.addSingularize( "((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis" );
inflect.addSingularize( "(^analy)ses$", "$1sis" );
inflect.addSingularize( "(^analy)sis$", "$1sis" ); // already singular,
// but ends in 's'
inflect.addSingularize( "([^f])ves$", "$1fe" );
inflect.addSingularize( "(hive)s$", "$1" );
inflect.addSingularize( "(tive)s$", "$1" );
inflect.addSingularize( "([lr])ves$", "$1f" );
inflect.addSingularize( "([^aeiouy]|qu)ies$", "$1y" );
inflect.addSingularize( "(s)eries$", "$1eries" );
inflect.addSingularize( "(m)ovies$", "$1ovie" );
inflect.addSingularize( "(x|ch|ss|sh)es$", "$1" );
inflect.addSingularize( "([m|l])ice$", "$1ouse" );
inflect.addSingularize( "(bus)es$", "$1" );
inflect.addSingularize( "(o)es$", "$1" );
inflect.addSingularize( "(shoe)s$", "$1" );
inflect.addSingularize( "(cris|ax|test)is$", "$1is" ); // already
// singular, but
// ends in 's'
inflect.addSingularize( "(cris|ax|test)es$", "$1is" );
inflect.addSingularize( "(octop|vir)i$", "$1us" );
inflect.addSingularize( "(octop|vir)us$", "$1us" ); // already singular,
// but ends in 's'
inflect.addSingularize( "(alias|status)es$", "$1" );
inflect.addSingularize( "(alias|status)$", "$1" ); // already singular,
// but ends in 's'
inflect.addSingularize( "^(ox)en", "$1" );
inflect.addSingularize( "(vert|ind)ices$", "$1ex" );
inflect.addSingularize( "(matr)ices$", "$1ix" );
inflect.addSingularize( "(quiz)zes$", "$1" );
inflect.addIrregular( "person", "people" );
inflect.addIrregular( "man", "men" );
inflect.addIrregular( "child", "children" );
inflect.addIrregular( "sex", "sexes" );
inflect.addIrregular( "move", "moves" );
inflect.addIrregular( "stadium", "stadiums" );
inflect.addUncountable( "equipment", "information", "rice", "money", "species", "series", "fish", "sheep",
"data", "analytics" );
}
}