/*
* This software is distributed under the terms of the FSF
* Gnu Lesser General Public License (see lgpl.txt).
*
* This program is distributed WITHOUT ANY WARRANTY. See the
* GNU General Public License for more details.
*/
package com.scooterframework.common.util;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
/**
* <p>Conversion between singular and plural form of a noun word.</p>
*
* @author (Fei) John Chen
*/
public class WordUtil {
private static final Map<String, String> resolvedSingle2Plurals = new HashMap<String, String>();
private static final List<String> resolvedPlurals = new ArrayList<String>();
private static final Map<String, String> resolvedPlural2Singles = new HashMap<String, String>();
private static final List<String> resolvedSingles = new ArrayList<String>();
public static final Map<String, String> single2plurals = new HashMap<String, String>();
public static final List<String> plurals = new ArrayList<String>();
public static final Map<String, String> plural2singles = new HashMap<String, String>();
public static final List<String> singles = new ArrayList<String>();
static {
//Irregular plurals:
single2plurals.put("child", "children");
single2plurals.put("corpus", "corpora");
single2plurals.put("foot", "feet");
single2plurals.put("goose", "geese");
single2plurals.put("louse", "lice");
single2plurals.put("man", "men");
single2plurals.put("mouse", "mice");
single2plurals.put("ox", "oxen");
single2plurals.put("person", "people");
single2plurals.put("tooth", "teeth");
single2plurals.put("woman", "women");
//Some nouns do not change at all:
single2plurals.put("cod", "cod");
single2plurals.put("deer", "deer");
single2plurals.put("fish", "fish");
single2plurals.put("offspring", "offspring");
single2plurals.put("perch", "perch");
single2plurals.put("sheep", "sheep");
single2plurals.put("trout", "trout");
single2plurals.put("species", "species");
single2plurals.put("series", "series");
//Other nouns that do not change:
single2plurals.put("data", "data");
single2plurals.put("dice", "dice");
single2plurals.put("media", "media");
//Singular ends in -us, plural ends in -i: alumnus/alumni, focus/foci, nucleus/nuclei,
//octopus/octopi, radius/radii, stimulus/stimuli, virus/viri
//Exceptions to the above
single2plurals.put("bus", "buses");
//Singular ends in -ex, plural ends in -ices: appendix/appendices, index/indices
single2plurals.put("index", "indices");
single2plurals.put("vertex", "vertices");
//These include nouns that are traditionally plural, but are also used for singular forms:
single2plurals.put("barracks", "barracks");
single2plurals.put("crossroads", "crossroads");
single2plurals.put("die", "dice");
single2plurals.put("gallows", "gallows");
single2plurals.put("headquarters", "headquarters");
single2plurals.put("means", "means");
single2plurals.put("series", "series");
single2plurals.put("species", "species");
//Exception to Rule 6: Some nouns ending in f or fe are made plural
//by changing f or fe to ves. with the following exceptions:
single2plurals.put("chief", "chiefs");
single2plurals.put("chef", "chefs");
single2plurals.put("dwarf", "dwarfs");
single2plurals.put("hoof", "hoofs");
single2plurals.put("kerchief", "kerchiefs");
single2plurals.put("fife", "fifes");
single2plurals.put("proof", "proofs");//m-w.com
single2plurals.put("roof", "roofs");
single2plurals.put("safe", "safes");
single2plurals.put("mischief", "mischiefs");
single2plurals.put("grief", "griefs");
//Rule 7b: All musical terms ending in -o have plurals ending in just -s.
single2plurals.put("cello", "cellos");
single2plurals.put("photo", "photos");
single2plurals.put("solo", "solos");
single2plurals.put("soprano", "sopranos");
single2plurals.put("studio", "studios");
//Exception to Rule 7: Most nouns ending in o preceded by a consonant
//is formed into a plural by adding es with the following exceptions:
single2plurals.put("canto", "cantos");
single2plurals.put("lasso", "lassos");
single2plurals.put("halo", "halos");
single2plurals.put("memento", "mementos");
single2plurals.put("photo", "photos");
single2plurals.put("sirocco", "siroccos");
//Rule 7c: Plural forms of words ending in -o (-os):
single2plurals.put("albino", "albinos");
single2plurals.put("armadillo", "armadillos");
single2plurals.put("auto", "autos");
single2plurals.put("bravo", "bravos");
single2plurals.put("bronco", "broncos");
single2plurals.put("canto", "cantos");
single2plurals.put("casino", "casinos");
single2plurals.put("combo", "combos");
single2plurals.put("gazebo", "gazebos");
single2plurals.put("inferno", "infernos");
single2plurals.put("kangaroo", "kangaroos");
single2plurals.put("kilo", "kilos");
single2plurals.put("kimono", "kimonos");
single2plurals.put("logo", "logos");
single2plurals.put("maraschino", "maraschinos");
single2plurals.put("memo", "memos");
single2plurals.put("photo", "photos");
single2plurals.put("pimento", "pimentos");
single2plurals.put("poncho", "ponchos");
single2plurals.put("pro", "pros");
single2plurals.put("sombrero", "sombreros");
single2plurals.put("taco", "tacos");
single2plurals.put("tattoo", "tattoos");
single2plurals.put("torso", "torsos");
single2plurals.put("tobacco", "tobaccos");
single2plurals.put("typo", "typos");
//Rule 7c: Plural forms of words ending in -o (-oes):
single2plurals.put("echo", "echoes");
single2plurals.put("embargo", "embargoes");
single2plurals.put("hero", "heroes");
single2plurals.put("potato", "potatoes");
single2plurals.put("tomato", "tomatoes");
single2plurals.put("torpedo", "torpedoes");
single2plurals.put("veto", "vetoes");
//Rule 7c: Plural forms of words ending in -o (-os or -oes):
single2plurals.put("avocado", "avocados");
single2plurals.put("buffalo", "buffaloes");
single2plurals.put("cargo", "cargoes");
single2plurals.put("desperado", "desperadoes");
single2plurals.put("dodo", "dodoes");
single2plurals.put("domino", "dominoes");
single2plurals.put("ghetto", "ghettos");
single2plurals.put("grotto", "grottoes");
single2plurals.put("hobo", "hoboes");
single2plurals.put("innuendo", "innuendoes");
single2plurals.put("lasso", "lassos");
single2plurals.put("mango", "mangoes");
single2plurals.put("mosquito", "mosquitoes");
single2plurals.put("motto", "mottoes");
single2plurals.put("mulatto", "mulattos");
single2plurals.put("no", "noes");
single2plurals.put("peccadillo", "peccadilloes");
single2plurals.put("tornado", "tornadoes");
single2plurals.put("volcano", "volcanoes");
single2plurals.put("zero", "zeros");
//others
single2plurals.put("forum", "forums");
//Things that come in pairs
plurals.add("binoculars");
plurals.add("forceps");
plurals.add("jeans");
plurals.add("glasses");
plurals.add("pajamas");
plurals.add("pants");
plurals.add("scissors");
plurals.add("shorts");
plurals.add("tongs");
plurals.add("trousers");
plurals.add("tweezers");
//Nouns that end in -s but have no singular (aggregate nouns)
plurals.add("accommodations");
plurals.add("amends");
plurals.add("archives");
plurals.add("arms");
plurals.add("bellows");
plurals.add("bowels");
plurals.add("brains");
plurals.add("clothes");
plurals.add("communications");
plurals.add("congratulations");
plurals.add("contents");
plurals.add("dregs");
plurals.add("goods");
plurals.add("measles");
plurals.add("mumps");
plurals.add("oats");
plurals.add("pinchers");
plurals.add("shears");
plurals.add("snuffers");
plurals.add("stairs");
plurals.add("thanks");
plurals.add("vespers");
plurals.add("victuals");
//Nouns that are plural but do not end in -s
plurals.add("children");
plurals.add("cattle");
plurals.add("corpora");
plurals.add("data");
plurals.add("men");
plurals.add("people");
plurals.add("police");
plurals.add("women");
//Nouns that are always singular -- uncountable
singles.add("cooper");
singles.add("corn");
singles.add("cotton");
singles.add("gold");
singles.add("information");
singles.add("money");
singles.add("news");
singles.add("rice");
singles.add("silver");
singles.add("sugar");
singles.add("wheat");
//plural2singles.put("data", "data");
//plural2singles.put("media", "media");
plural2singles.put("dice", "dice");
plural2singles.put("indices", "index");
plural2singles.put("vertices", "vertex");
plural2singles.put("movies", "movie");
plural2singles.put("viri", "virus");
plural2singles.put("axes", "axis");
plural2singles.put("crises", "crisis");
plural2singles.put("analyses", "analysis");
plural2singles.put("diagnoses", "diagnosis");
plural2singles.put("synopses", "synopsis");
plural2singles.put("theses", "thesis");
plural2singles.put("moves", "move");
plural2singles.put("caves", "cave");
plural2singles.put("toes", "toe");
//merge plural2singles with single2plurals
for (Map.Entry<String, String> entry : single2plurals.entrySet()) {
String sk = entry.getKey();
String sv = entry.getValue();
String pv = plural2singles.get(sv);
if (pv == null) {
plural2singles.put(sv, sk);
}
}
//merge single2plurals with plural2singles
for (Map.Entry<String, String> entry : plural2singles.entrySet()) {
String pk = entry.getKey();
String pv = entry.getValue();
String sv = single2plurals.get(pv);
if (sv == null) {
single2plurals.put(pv, pk);
}
}
}
/**
* Returns a pluralized word.
*
* @param word the word to be converted to plural form
* @return pluralized string
*/
public static String pluralize(String word) {
if (word == null || "".equals(word)) return word;
String plform = resolvedSingle2Plurals.get(word);
if (plform == null && (resolvedPlurals.contains(word) || resolvedPlural2Singles.containsKey(word))) {
plform = word;
}
if (plform != null) return plform;
String tmp = word.toLowerCase();
plform = single2plurals.get(tmp);
if (plform == null && (plurals.contains(tmp) || singles.contains(tmp) || plural2singles.containsKey(tmp))) {
plform = tmp;
}
if (plform != null) {
;
}
//Rule #5: For words that end in -is, change the -is to -es to make the plural form
else if (tmp.endsWith("is")) {
plform = replaceLast(tmp, "is", "es");
}
//Singular ends in -ix, plural ends in -ices: appendix/appendices, index/indices
else if (tmp.endsWith("ix")) {
plform = replaceLast(tmp, "ix", "ices");
}
//Singular ends in -us, plural ends in -i: alumnus/alumni, focus/foci, nucleus/nuclei,
//octopus/octopi, radius/radii, stimulus/stimuli, virus/viri
else if (tmp.endsWith("us")) {
plform = replaceLast(tmp, "us", "i");
}
//Rule #2: For words that end in a "hissing" sound (-s, -z, -x, -ch, -sh), add an -es to form the plural.
//Note: I removed tmp.endsWith("s") || as this cause "posts"->"postses".
else if (!tmp.endsWith("es") && (tmp.endsWith("z") ||
tmp.endsWith("x") || tmp.endsWith("ch") || tmp.endsWith("sh"))) {
plform = tmp + "es";
}
else if (tmp.endsWith("y")) {
//Rule #3: If the word ends in a vowel plus -y (-ay, -ey, -iy, -oy, -uy), add an -s to the word.
if (tmp.endsWith("ay") || tmp.endsWith("ey") || tmp.endsWith("iy") ||
tmp.endsWith("oy") || tmp.endsWith("uy")) {
plform = word + "s";
}
//Rule #4: If the word ends in a consonant plus -y, change the -y into -ie and add an -s to form the plural.
else {
plform = replaceLast(tmp, "y", "ies");
}
}
//Rule #6: Some words that end in -f or -fe have plurals that end in -ves.
else if (tmp.endsWith("f")) {
plform = replaceLast(tmp, "f", "ves");
}
else if (tmp.endsWith("fe")) {
plform = replaceLast(tmp, "fe", "ves");
}
//Rule #7: The plurals of words ending in -o are formed by either adding -s or by adding -es
else if (tmp.endsWith("o")) {
//All words that end in a vowel plus -o (-ao, -eo, -io, -oo, -uo) have plurals that end in just -s:
if (tmp.endsWith("ao") || tmp.endsWith("eo") || tmp.endsWith("io") ||
tmp.endsWith("oo") || tmp.endsWith("uo")) {
plform = word + "s";
}
//All musical terms ending in -o have plurals ending in just -s.
//Most others by adding -es with exceptions
else {
plform = word + "es";
}
}
//Singular ends in -um, plural ends in -a: datum/data, curriculum/curricula
else if (tmp.endsWith("um")) {
plform = replaceLast(tmp, "um", "a");
}
//Singular ends in -on, plural ends in -a: criterion/criteria, phenomenon/phenomena
else if (tmp.endsWith("on") && !tmp.endsWith("ation")) {
plform = replaceLast(tmp, "on", "a");
}
//Singular ends in -a, plural ends in -ae: alumna/alumnae, formula/formulae, antenna/antennae
else if (tmp.endsWith("a")) {
plform = replaceLast(tmp, "a", "ae");
}
//Singular ends in -eau, plural ends in -eaux: bureau/bureaux, beau/beaux
else if (tmp.endsWith("eau")) {
plform = replaceLast(tmp, "eau", "eaux");
}
//special
else if (tmp.endsWith("man")) {
plform = replaceLast(tmp, "man", "men");
}
//Rule #1: Add an -s to form the plural of most words.
else if (!tmp.endsWith("s")){
plform = word + "s";
}
//Rule #8: The plurals of single capital letters, acronyms, and Arabic numerals
//(1,2,3,...) take an -s WITHOUT an apostrophe:
else if (word.toUpperCase().equals(word)) {
plform = word + "s";
}
else {
plform = tmp;
resolvedPlurals.add(word);
return word;
}
//check cases
boolean caseChanged = false;
int wl = word.length();
int pl = plform.length();
char[] pChars = plform.toCharArray();
int length = (wl < pl)?wl:pl;
for (int i = 0; i < length; i++) {
char wChar = word.charAt(i);
char pChar = plform.charAt(i);
if (((int)wChar - (int)pChar) == -32) {
pChars[i] = wChar;
caseChanged = true;
}
}
if (caseChanged) plform = new String(pChars);
if (!plform.equalsIgnoreCase(word)) {
resolvedSingle2Plurals.put(word, plform);
resolvedPlural2Singles.put(plform, word);
}
return plform;
}
/**
* Returns a singularized word from a plural word.
*
* @param word the word to be converted to singular form
* @return singularized string
*/
public static String singularize(String word) {
if (word == null || "".equals(word)) return word;
String sgform = resolvedPlural2Singles.get(word);
if (sgform == null && (resolvedSingles.contains(word) || resolvedSingle2Plurals.containsKey(word))) {
sgform = word;
}
if (sgform != null) return sgform;
String tmp = word.toLowerCase();
sgform = plural2singles.get(tmp);
if (sgform == null && (plurals.contains(tmp) || singles.contains(tmp) || single2plurals.containsKey(tmp))) {
sgform = tmp;
}
if (sgform != null) {
;
}
else if (tmp.endsWith("ices")) {
sgform = replaceLast(tmp, "ices", "ix");
}
else if (tmp.endsWith("i")) {
sgform = replaceLast(tmp, "i", "us");
}
else if (tmp.endsWith("ses") && !tmp.endsWith("bases") ||
tmp.endsWith("zes") || tmp.endsWith("xes") ||
tmp.endsWith("ches") || tmp.endsWith("shes")) {
sgform = replaceLast(tmp, "es", "");
}
else if (tmp.endsWith("ays") || tmp.endsWith("eys") || tmp.endsWith("iys") ||
tmp.endsWith("oys") || tmp.endsWith("uys")) {
sgform = replaceLast(tmp, "ys", "y");
}
else if (tmp.endsWith("ies")) {
sgform = replaceLast(tmp, "ies", "y");
}
//Rule #7
else if (tmp.endsWith("aos") || tmp.endsWith("eos") || tmp.endsWith("ios") ||
tmp.endsWith("oos") || tmp.endsWith("uos")) {
sgform = replaceLast(tmp, "os", "o");
}
//Rule #7
else if (tmp.endsWith("oes")) {
sgform = replaceLast(tmp, "oes", "o");
}
else if (tmp.endsWith("ives")) {
sgform = replaceLast(tmp, "ves", "fe");
}
else if (tmp.endsWith("lves") || tmp.endsWith("rves") || tmp.endsWith("aves")) {
sgform = replaceLast(tmp, "ves", "f");
}
else if (tmp.endsWith("ae")) {
sgform = replaceLast(tmp, "ae", "a");
}
else if (tmp.endsWith("eaux")) {
sgform = replaceLast(tmp, "eaux", "eau");
}
else if (tmp.endsWith("men")) {
sgform = replaceLast(tmp, "men", "man");
}
else if (tmp.endsWith("s")) {
sgform = replaceLast(tmp, "s", "");
}
else {
sgform = tmp;
resolvedSingles.add(word);
return word;
}
//check cases
boolean caseChanged = false;
int wl = word.length();
int pl = sgform.length();
char[] sChars = sgform.toCharArray();
int length = (wl < pl)?wl:pl;
for (int i = 0; i < length; i++) {
char wChar = word.charAt(i);
char pChar = sgform.charAt(i);
if (((int)wChar - (int)pChar) == -32) {
sChars[i] = wChar;
caseChanged = true;
}
}
if (caseChanged) sgform = new String(sChars);
if (!sgform.equalsIgnoreCase(word)) {
resolvedPlural2Singles.put(word, sgform);
resolvedSingle2Plurals.put(sgform, word);
}
return sgform;
}
/**
* Replaces the last occurance of an old symbol with a new symbol.
*
* @param data the original string
* @param oldSymbol the old symbols to be replaced
* @param newSymbol the corresponding new symbol
* @return a new string
*/
public static String replaceLast(String data, String oldSymbol, String newSymbol) {
if (data == null || data.indexOf(oldSymbol) == -1) return data;
int lastIndex = data.lastIndexOf(oldSymbol);
int oldLength = oldSymbol.length();
String result = data.substring(0, lastIndex) + newSymbol +
data.substring(lastIndex + oldLength);
return result;
}
/**
* Adds more pairs of single and plural words.
*
* @param single singular form of the word
* @param plural plural form of the word
*/
public static void addPlural(String single, String plural) {
resolvedSingle2Plurals.put(single, plural);
resolvedPlural2Singles.put(plural, single);
}
/**
* Converts string to Camel case.
*
* @param word the word to be converted to camelized form
* @return a camelized string
*/
public static String camelize(String word) {
return camelize(word, false);
}
/**
* Converts string to Camel case. If <tt>firstLetterInLowerCase</tt>
* is true, then the first letter of the result string is in lower case.
*
* <pre>
* Examples:
* camelize("hello") ==> "Hello"
* camelize("hello world") ==> "Hello world"
* camelize("active_record") ==> "ActiveRecord"
* camelize("active_record", true) ==> "activeRecord"
* </pre>
*
* @param word the word to be converted to camelized form
* @param firstLetterInLowerCase true if the first character should be in lower case
* @return a camelized string
*/
public static String camelize(String word, boolean firstLetterInLowerCase) {
if (word == null || "".equals(word)) return word;
String result = "";
if (word.indexOf('_') != -1) {
StringBuilder sb = new StringBuilder();
int count = 0;
StringTokenizer st = new StringTokenizer(word, "_");
while(st.hasMoreTokens()) {
String token = st.nextToken();
count++;
if (count == 1) {
sb.append(camelizeOneWord(token, firstLetterInLowerCase));
}
else {
sb.append(camelizeOneWord(token, false));
}
}
result = sb.toString();
}
else {
result = camelizeOneWord(word, firstLetterInLowerCase);
}
return result;
}
private static String camelizeOneWord(String word, boolean firstLetterInLowerCase) {
if (word == null || "".equals(word)) return word;
String firstChar = word.substring(0,1);
String result = (firstLetterInLowerCase)?firstChar.toLowerCase():firstChar.toUpperCase();
if (word.length() > 1) {
result += word.substring(1);
}
return result;
}
/**
* <tt>underscore</tt> is the reverse of <tt>camelize</tt> method.
*
* <pre>
* Examples:
* underscore("Hello world") ==> "hello world"
* underscore("ActiveRecord") ==> "active_record"
* underscore("The RedCross") ==> "the red_cross"
* underscore("ABCD") ==> "abcd"
* </pre>
*
* @param phase the original string
* @return an underscored string
*/
public static String underscore(String phase) {
if (phase == null || "".equals(phase)) return phase;
phase = phase.replace('-', '_');
StringBuilder sb = new StringBuilder();
int total = phase.length();
for (int i = 0; i < total; i++) {
char c = phase.charAt(i);
if (i == 0) {
if (isInA2Z(c)) {
sb.append(("" + c).toLowerCase());
}
else {
sb.append(c);
}
}
else {
if (isInA2Z(c)) {
if (isIna2z(phase.charAt(i-1))) {
sb.append(("_" + c).toLowerCase());
}
else {
sb.append(("" + c).toLowerCase());
}
}
else {
sb.append(c);
}
}
}
return sb.toString();
}
private static String A2Z = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
private static String a2z = "abcdefghijklmnopqrstuvwxyz";
private static boolean isInA2Z(char c) {
return (A2Z.indexOf(c) != -1)?true:false;
}
private static boolean isIna2z(char c) {
return (a2z.indexOf(c) != -1)?true:false;
}
/**
* Replaces all dashes and underscores by spaces and capitalizes all the words.
*
* <pre>
* Examples:
* titleize("ch 1: Java-ActiveRecordIsFun") ==> "Ch 1: Java Active Record Is Fun"
* </pre>
*
* @param phase the original string
* @return a titleized string
*/
public static String titleize(String phase) {
if (phase == null || "".equals(phase)) return phase;
phase = humanize(phase);
StringBuilder sb = new StringBuilder();
int total = phase.length();
for (int i = 0; i < total; i++) {
char c = phase.charAt(i);
if (i == 0) {
if (isIna2z(c)) {
sb.append(("" + c).toUpperCase());
}
else {
sb.append(c);
}
}
else {
if (isIna2z(c) && ' ' == phase.charAt(i-1)) {
sb.append(("" + c).toUpperCase());
}
else {
sb.append(c);
}
}
}
return sb.toString();
}
/**
* Replaces all dashes and underscores by spaces and capitalizes the first
* word. Also removes
*
* <pre>
* Examples:
* humanize("active_record") ==> "Active record"
* humanize("post_id") ==> "Post"
* </pre>
*
* @param phase the original string
* @return a humanized string
*/
public static String humanize(String phase) {
if (phase == null || "".equals(phase)) return phase;
phase = underscore(phase);
if (phase.endsWith("_id")) phase += " ";
return camelize(phase.replaceAll("_id ", " ").replace('_', ' ').trim());
}
/**
* Returns a database table name corresponding to the input model class
* name.
*
* <pre>
* Examples:
* tableize("Person") ==> "people"
* tableize("LineItem") ==> "line_items"
* </pre>
*
* @param modelClassName
* @return the table name of the java model class name
*/
public static String tableize(String modelClassName) {
return pluralize(underscore(modelClassName));
}
/**
* Returns a model class name corresponding to the input database
* table name.
*
* <pre>
* Examples:
* classify("people") ==> "Person"
* classify("line_items") ==> "LineItem"
* </pre>
*
* @param tableName java class name of the model
* @return a java model class name
*/
public static String classify(String tableName) {
return camelize(singularize(tableName));
}
/**
* Returns an ordinalized string.
*
* <pre>
* Examples:
* ordinalize(100) ==> "100th"
* ordinalize(1003) ==> "1003rd"
* </pre>
*
* @param number the number
* @return an ordinalized string for the number
*/
public static String ordinalize(int number) {
String result = "" + number;
if (result.endsWith("1")) result = result + "st";
else if (result.endsWith("2")) result = result + "nd";
else if (result.endsWith("3")) result = result + "rd";
else result = result + "th";
return result;
}
}