/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.usergrid.persistence.collection.serialization.impl.util; import java.util.HashSet; import java.util.LinkedList; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Transforms words to singular, plural, humanized (human readable), underscore, camel case, or ordinal form. This is * inspired by the <a href="http://api.rubyonrails.org/classes/Inflector.html">Inflector</a> class in <a * href="http://www.rubyonrails.org">Ruby on Rails</a>, which is distributed under the <a * href="http://wiki.rubyonrails.org/rails/pages/License">Rails license</a>. */ public class Inflector { protected static final Inflector INSTANCE = new Inflector(); public static Inflector getInstance() { return INSTANCE; } protected class Rule { protected final String expression; protected final Pattern expressionPattern; protected final String replacement; protected Rule( String expression, String replacement ) { this.expression = expression; this.replacement = replacement != null ? replacement : ""; expressionPattern = Pattern.compile( this.expression, Pattern.CASE_INSENSITIVE ); } /** * Apply the rule against the input string, returning the modified string or null if the rule didn't apply (and * no modifications were made) * * @param input the input string * * @return the modified string if this rule applied, or null if the input was not modified by this rule */ protected String apply( String input ) { Matcher matcher = expressionPattern.matcher( input ); if ( !matcher.find() ) { return null; } return matcher.replaceAll( replacement ); } @Override public int hashCode() { return expression.hashCode(); } @Override public boolean equals( Object obj ) { if ( obj == this ) { return true; } if ( ( obj != null ) && ( obj.getClass() == this.getClass() ) ) { final Rule that = ( Rule ) obj; if ( expression.equalsIgnoreCase( that.expression ) ) { return true; } } return false; } @Override public String toString() { return expression + ", " + replacement; } } private final LinkedList<Rule> plurals = new LinkedList<Rule>(); private final LinkedList<Rule> singulars = new LinkedList<Rule>(); /** * The lowercase words that are to be excluded and not processed. This map can be modified by the users via {@link * #getUncountables()}. */ private final Set<String> uncountables = new HashSet<String>(); public Inflector() { initialize(); } protected Inflector( Inflector original ) { plurals.addAll( original.plurals ); singulars.addAll( original.singulars ); uncountables.addAll( original.uncountables ); } @Override @SuppressWarnings("all") public Inflector clone() { return new Inflector( this ); } // ------------------------------------------------------------------------------------------------ // Usage functions // ------------------------------------------------------------------------------------------------ /** * Returns the plural form of the word in the string. <p> Examples: <p/> * <pre> * inflector.pluralize("post") #=> "posts" * inflector.pluralize("octopus") #=> "octopi" * inflector.pluralize("sheep") #=> "sheep" * inflector.pluralize("words") #=> "words" * inflector.pluralize("the blue mailman") #=> "the blue mailmen" * inflector.pluralize("CamelOctopus") #=> "CamelOctopi" * </pre> * <p/> </p> <p> Note that if the {@link Object#toString()} is called on the supplied object, so this method works * for non-strings, too. </p> * * @param word the word that is to be pluralized. * * @return the pluralized form of the word, or the word itself if it could not be pluralized * * @see #singularize(Object) */ public String pluralize( Object word ) { if ( word == null ) { return null; } String wordStr = word.toString().trim(); if ( wordStr.length() == 0 ) { return wordStr; } if ( isUncountable( wordStr ) ) { return wordStr; } for ( Rule rule : plurals ) { String result = rule.apply( wordStr ); if ( result != null ) { return result; } } return wordStr; } public String pluralize( Object word, int count ) { if ( word == null ) { return null; } if ( ( count == 1 ) || ( count == -1 ) ) { return word.toString(); } return pluralize( word ); } /** * Returns the singular form of the word in the string. <p> Examples: <p/> * <pre> * inflector.singularize("posts") #=> "post" * inflector.singularize("octopi") #=> "octopus" * inflector.singularize("sheep") #=> "sheep" * inflector.singularize("words") #=> "word" * inflector.singularize("the blue mailmen") #=> "the blue mailman" * inflector.singularize("CamelOctopi") #=> "CamelOctopus" * </pre> * <p/> </p> <p> Note that if the {@link Object#toString()} is called on the supplied object, so this method works * for non-strings, too. </p> * * @param word the word that is to be pluralized. * * @return the pluralized form of the word, or the word itself if it could not be pluralized * * @see #pluralize(Object) */ public String singularize( Object word ) { if ( word == null ) { return null; } String wordStr = word.toString().trim(); if ( wordStr.length() == 0 ) { return wordStr; } if ( isUncountable( wordStr ) ) { return wordStr; } for ( Rule rule : singulars ) { String result = rule.apply( wordStr ); if ( result != null ) { return result; } } return wordStr; } // ------------------------------------------------------------------------------------------------ // Management methods // ------------------------------------------------------------------------------------------------ /** * Determine whether the supplied word is considered uncountable by the {@link #pluralize(Object) pluralize} and * {@link #singularize(Object) singularize} methods. * * @param word the word * * @return true if the plural and singular forms of the word are the same */ public boolean isUncountable( String word ) { if ( word == null ) { return false; } String trimmedLower = word.trim().toLowerCase(); return uncountables.contains( trimmedLower ); } public void addPluralize( String rule, String replacement ) { final Rule pluralizeRule = new Rule( rule, replacement ); plurals.addFirst( pluralizeRule ); } public void addSingularize( String rule, String replacement ) { final Rule singularizeRule = new Rule( rule, replacement ); singulars.addFirst( singularizeRule ); } public void addIrregular( String singular, String plural ) { if ( org.apache.commons.lang.StringUtils.isEmpty( singular ) ) { throw new IllegalArgumentException( "singular rule may not be empty" ); } if ( org.apache.commons.lang.StringUtils.isEmpty( plural ) ) { throw new IllegalArgumentException( "plural rule may not be empty" ); } String singularRemainder = singular.length() > 1 ? singular.substring( 1 ) : ""; String pluralRemainder = plural.length() > 1 ? plural.substring( 1 ) : ""; addPluralize( "(" + singular.charAt( 0 ) + ")" + singularRemainder + "$", "$1" + pluralRemainder ); addSingularize( "(" + plural.charAt( 0 ) + ")" + pluralRemainder + "$", "$1" + singularRemainder ); } public void addUncountable( String... words ) { if ( ( words == null ) || ( words.length == 0 ) ) { return; } for ( String word : words ) { if ( word != null ) { uncountables.add( word.trim().toLowerCase() ); } } } /** Completely remove all rules within this inflector. */ public void clear() { uncountables.clear(); plurals.clear(); singulars.clear(); } protected void initialize() { Inflector inflect = this; inflect.addPluralize( "$", "s" ); inflect.addPluralize( "s$", "s" ); inflect.addPluralize( "(ax|test)is$", "$1es" ); inflect.addPluralize( "(octop|vir)us$", "$1i" ); inflect.addPluralize( "(octop|vir)i$", "$1i" ); // already plural inflect.addPluralize( "(alias|status)$", "$1es" ); inflect.addPluralize( "(bu)s$", "$1ses" ); inflect.addPluralize( "(buffal|tomat)o$", "$1oes" ); inflect.addPluralize( "([ti])um$", "$1a" ); inflect.addPluralize( "([ti])a$", "$1a" ); // already plural inflect.addPluralize( "sis$", "ses" ); inflect.addPluralize( "(?:([^f])fe|([lr])f)$", "$1$2ves" ); inflect.addPluralize( "(hive)$", "$1s" ); inflect.addPluralize( "([^aeiouy]|qu)y$", "$1ies" ); inflect.addPluralize( "(x|ch|ss|sh)$", "$1es" ); inflect.addPluralize( "(matr|vert|ind)ix|ex$", "$1ices" ); inflect.addPluralize( "([m|l])ouse$", "$1ice" ); inflect.addPluralize( "([m|l])ice$", "$1ice" ); inflect.addPluralize( "^(ox)$", "$1en" ); inflect.addPluralize( "(quiz)$", "$1zes" ); // Need to check for the following words that are already pluralized: inflect.addPluralize( "(people|men|children|sexes|moves|stadiums)$", "$1" ); // irregulars inflect.addPluralize( "(oxen|octopi|viri|aliases|quizzes)$", "$1" ); // special // rules inflect.addSingularize( "s$", "" ); inflect.addSingularize( "(s|si|u)s$", "$1s" ); // '-us' and '-ss' are // already singular inflect.addSingularize( "(n)ews$", "$1ews" ); inflect.addSingularize( "([ti])a$", "$1um" ); inflect.addSingularize( "((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$", "$1$2sis" ); inflect.addSingularize( "(^analy)ses$", "$1sis" ); inflect.addSingularize( "(^analy)sis$", "$1sis" ); // already singular, // but ends in 's' inflect.addSingularize( "([^f])ves$", "$1fe" ); inflect.addSingularize( "(hive)s$", "$1" ); inflect.addSingularize( "(tive)s$", "$1" ); inflect.addSingularize( "([lr])ves$", "$1f" ); inflect.addSingularize( "([^aeiouy]|qu)ies$", "$1y" ); inflect.addSingularize( "(s)eries$", "$1eries" ); inflect.addSingularize( "(m)ovies$", "$1ovie" ); inflect.addSingularize( "(x|ch|ss|sh)es$", "$1" ); inflect.addSingularize( "([m|l])ice$", "$1ouse" ); inflect.addSingularize( "(bus)es$", "$1" ); inflect.addSingularize( "(o)es$", "$1" ); inflect.addSingularize( "(shoe)s$", "$1" ); inflect.addSingularize( "(cris|ax|test)is$", "$1is" ); // already // singular, but // ends in 's' inflect.addSingularize( "(cris|ax|test)es$", "$1is" ); inflect.addSingularize( "(octop|vir)i$", "$1us" ); inflect.addSingularize( "(octop|vir)us$", "$1us" ); // already singular, // but ends in 's' inflect.addSingularize( "(alias|status)es$", "$1" ); inflect.addSingularize( "(alias|status)$", "$1" ); // already singular, // but ends in 's' inflect.addSingularize( "^(ox)en", "$1" ); inflect.addSingularize( "(vert|ind)ices$", "$1ex" ); inflect.addSingularize( "(matr)ices$", "$1ix" ); inflect.addSingularize( "(quiz)zes$", "$1" ); inflect.addIrregular( "person", "people" ); inflect.addIrregular( "man", "men" ); inflect.addIrregular( "child", "children" ); inflect.addIrregular( "sex", "sexes" ); inflect.addIrregular( "move", "moves" ); inflect.addIrregular( "stadium", "stadiums" ); inflect.addUncountable( "equipment", "information", "rice", "money", "species", "series", "fish", "sheep", "data", "analytics" ); } }