/*
* RHQ Management Platform
* Copyright (C) 2005-2008 Red Hat, Inc.
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation version 2 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
package org.rhq.enterprise.gui.util;
import java.util.List;
import java.util.Map;
import java.util.Arrays;
import java.util.Locale;
import java.util.regex.Matcher;
import org.jvnet.inflector.rule.SuffixInflectionRule;
import org.jvnet.inflector.rule.RegexReplacementRule;
import org.jvnet.inflector.rule.CategoryInflectionRule;
import org.jvnet.inflector.rule.AbstractRegexReplacementRule;
import org.jvnet.inflector.rule.IrregularMappingRule;
import static org.jvnet.inflector.rule.AbstractRegexReplacementRule.disjunction;
import static org.jvnet.inflector.rule.IrregularMappingRule.toMap;
import org.jvnet.inflector.Rule;
import org.jvnet.inflector.RuleBasedPluralizer;
/**
* A tweaked copy of {@link org.jvnet.inflector.lang.en.NounPluralizer}. Comments labeled with "[RHQ]" describe the
* tweaks.
*
* @author Ian Springer
*/
public class CustomEnglishPluralizer extends RuleBasedPluralizer {
// TODO understand this regex better and compare to Perl!
private static final String POSTFIX_ADJECTIVE_REGEX =
"(" +
"(?!major|lieutenant|brigadier|adjutant)\\S+(?=(?:-|\\s+)general)|" +
"court(?=(?:-|\\s+)martial)" +
")(.*)";
private static final String[] PREPOSITIONS = {
"about", "above", "across", "after", "among", "around", "at", "athwart", "before", "behind",
"below", "beneath", "beside", "besides", "between", "betwixt", "beyond", "but", "by",
"during", "except", "for", "from", "in", "into", "near", "of", "off", "on", "onto", "out", "over",
"since", "till", "to", "under", "until", "unto", "upon", "with",
};
private static final Map<String, String> NOMINATIVE_PRONOUNS = toMap(new String[][]{
// nominative reflexive
{ "i", "we" }, { "myself", "ourselves" },
{ "you", "you" }, { "yourself", "yourselves" },
{ "she", "they" }, { "herself", "themselves" },
{ "he", "they" }, { "himself", "themselves" },
{ "it", "they" }, { "itself", "themselves" },
{ "they", "they" }, { "themself", "themselves" },
// possessive
{ "mine", "ours" },
{ "yours", "yours" },
{ "hers", "theirs" },
{ "his", "theirs" },
{ "its", "theirs" },
{ "theirs", "theirs" },
});
private static final Map<String, String> ACCUSATIVE_PRONOUNS = toMap(new String[][]{
// accusative reflexive
{ "me", "us" }, { "myself", "ourselves" },
{ "you", "you" }, { "yourself", "yourselves" },
{ "her", "them" }, { "herself", "themselves" },
{ "him", "them" }, { "himself", "themselves" },
{ "it", "them" }, { "itself", "themselves" },
{ "them", "them" }, { "themself", "themselves" },
});
private static final Map<String, String> IRREGULAR_NOUNS = toMap(new String[][]{
{ "child", "children" },
{ "brother", "brothers" }, // irregular classical form
{ "loaf", "loaves" },
{ "hoof", "hoofs" }, // irregular classical form
{ "beef", "beefs" }, // irregular classical form
{ "money", "monies" },
{ "mongoose", "mongooses" },
{ "ox", "oxen" },
{ "cow", "cows" }, // irregular classical form
{ "soliloquy", "soliloquies" },
{ "graffito", "graffiti" },
{ "prima donna", "prima donnas" }, // irregular classical form
{ "octopus", "octopuses" }, // irregular classical form
{ "genie", "genies" }, // irregular classical form
{ "ganglion", "ganglions" }, // irregular classical form
{ "trilby", "trilbys" },
{ "turf", "turfs" }, // irregular classical form
{ "numen", "numina" },
{ "atman", "atmas" },
{ "occiput", "occiputs" }, // irregular classical form
// Words ending in -s
{ "corpus", "corpuses" }, // irregular classical form
{ "opus", "opuses" }, // irregular classical form
{ "genus", "genera" },
{ "mythos", "mythoi" },
{ "penis", "penises" }, // irregular classical form
{ "testis", "testes" },
{ "atlas", "atlases" }, // irregular classical form
});
private static final String[] CATEGORY_UNINFLECTED_NOUNS = {
// Fish and herd animals
".*fish", "tuna", "salmon", "mackerel", "trout",
"bream", "sea[- ]bass", "carp", "cod", "flounder", "whiting",
".*deer", ".*sheep",
// Nationals ending in -ese
"Portuguese", "Amoyese", "Borghese", "Congoese", "Faroese",
"Foochowese", "Genevese", "Genoese", "Gilbertese", "Hottentotese",
"Kiplingese", "Kongoese", "Lucchese", "Maltese", "Nankingese",
"Niasese", "Pekingese", "Piedmontese", "Pistoiese", "Sarawakese",
"Shavese", "Vermontese", "Wenchowese", "Yengeese",
".*[nrlm]ese",
// Diseases
".*pox",
// Other oddities
"graffiti", "djinn",
// Words ending in -s
// Pairs or groups subsumed to a singular
"breeches", "britches", "clippers", "gallows", "hijinks",
"headquarters", "pliers", "scissors", "testes", "herpes",
"pincers", "shears", "proceedings", "trousers",
// Unassimilated Latin 4th declension
"cantus", "coitus", "nexus",
// Recent imports
"contretemps", "corps", "debris",
".*ois", "siemens",
// Diseases
".*measles", "mumps",
// Others
"diabetes", "jackanapes", "series", "species", "rabies",
"chassis", "innings", "news", "mews",
};
private static final String[] CATEGORY_MAN_MANS_RULE = {
"human",
"Alabaman", "Bahaman", "Burman", "German",
"Hiroshiman", "Liman", "Nakayaman", "Oklahoman",
"Panaman", "Selman", "Sonaman", "Tacoman", "Yakiman",
"Yokohaman", "Yuman",
};
private static final String[] CATEGORY_EX_ICES_RULE = {
"codex", "murex", "silex",
};
private static final String[] CATEGORY_IX_ICES_RULE = {
"radix", "helix",
};
private static final String[] CATEGORY_UM_A_RULE = {
"bacterium", "agendum", "desideratum", "erratum",
"stratum", "datum", "ovum", "extremum",
"candelabrum",
};
private static final String[] CATEGORY_US_I_RULE = {
"alumnus", "alveolus", "bacillus", "bronchus",
"locus", "nucleus", "stimulus", "meniscus",
};
private static final String[] CATEGORY_ON_A_RULE = {
"criterion", "perihelion", "aphelion",
"phenomenon", "prolegomenon", "noumenon",
"organon", "asyndeton", "hyperbaton",
};
private static final String[] CATEGORY_A_AE_RULE = {
"alumna", "alga", "vertebra", "persona"
};
private static final String[] CATEGORY_O_OS_RULE = {
"albino", "archipelago", "armadillo",
"commando", "crescendo", "fiasco",
"ditto", "dynamo", "embryo",
"ghetto", "guano", "inferno",
"jumbo", "lumbago", "magneto",
"manifesto", "medico", "octavo",
"photo", "pro", "quarto",
"canto", "lingo", "generalissimo",
"stylo", "rhino", "casino",
"auto", "macro", "zero",
"solo", "soprano", "basso", "alto",
"contralto", "tempo", "piano", "virtuoso",
};
private static final String[] CATEGORY_SINGULAR_S_RULE = {
".*ss",
"acropolis", "aegis", "alias", "asbestos", "bathos", "bias",
"bronchitis", "bursitis", "caddis", "cannabis",
"canvas", "chaos", "cosmos", "dais", "digitalis",
"epidermis", "ethos", "eyas", "gas", "glottis",
"hubris", "ibis", "lens", "mantis", "marquis", "metropolis",
"pathos", "pelvis", "polis", "rhinoceros",
"sassafras", "trellis", ".*us", "[A-Z].*es",
"ephemeris", "iris", "clitoris",
"chrysalis", "epididymis",
// Inflammations
".*itis",
};
// References to Steps are to those in Conway's paper
private final List<Rule> rules = Arrays.asList(new Rule[] {
// Blank word
new RegexReplacementRule("^(\\s)$", "$1"),
// Nouns that do not inflect in the plural (such as "fish") [Step 2]
new CategoryInflectionRule(CATEGORY_UNINFLECTED_NOUNS, "-", "-"),
// Compounds [Step 12]
new AbstractRegexReplacementRule("(?i)^(?:" + POSTFIX_ADJECTIVE_REGEX + ")$") {
@Override public String replace(Matcher m) {
return pluralize(m.group(1)) + m.group(2);
}
},
new AbstractRegexReplacementRule(
"(?i)(.*?)((?:-|\\s+)(?:" + disjunction(PREPOSITIONS) + "|d[eu])(?:-|\\s+))a(?:-|\\s+)(.*)") {
@Override public String replace(Matcher m) {
return pluralize(m.group(1)) +
m.group(2) +
pluralize(m.group(3));
}
},
new AbstractRegexReplacementRule(
"(?i)(.*?)((-|\\s+)(" + disjunction(PREPOSITIONS) + "|d[eu])((-|\\s+)(.*))?)") {
@Override public String replace(Matcher m) {
return pluralize(m.group(1)) + m.group(2);
}
},
// Pronouns [Step 3]
new IrregularMappingRule(NOMINATIVE_PRONOUNS, "(?i)" + disjunction(NOMINATIVE_PRONOUNS.keySet())),
new IrregularMappingRule(ACCUSATIVE_PRONOUNS, "(?i)" + disjunction(ACCUSATIVE_PRONOUNS.keySet())),
new IrregularMappingRule(ACCUSATIVE_PRONOUNS,
"(?i)(" + disjunction(PREPOSITIONS) + "\\s)" +
"(" + disjunction(ACCUSATIVE_PRONOUNS.keySet()) + ")") {
@Override public String replace(Matcher m) {
return m.group(1) + mappings.get(m.group(2).toLowerCase());
}
},
// Standard irregular plurals (such as "children") [Step 4]
new IrregularMappingRule(IRREGULAR_NOUNS, "(?i)(.*)\\b" + disjunction(IRREGULAR_NOUNS.keySet()) + "$"),
new CategoryInflectionRule(CATEGORY_MAN_MANS_RULE, "-man", "-mans"),
new RegexReplacementRule("(?i)(\\S*)(person)$", "$1people"),
// Families of irregular plurals for common suffixes (such as "-men") [Step 5]
new SuffixInflectionRule("-man", "-man", "-men"),
new SuffixInflectionRule("-[lm]ouse", "-ouse", "-ice"),
new SuffixInflectionRule("-tooth", "-tooth", "-teeth"),
new SuffixInflectionRule("-goose", "-goose", "-geese"),
new SuffixInflectionRule("-foot", "-foot", "-feet"),
// Assimilated irregular plurals [Step 6]
new SuffixInflectionRule("-ceps", "-", "-"),
new SuffixInflectionRule("-zoon", "-zoon", "-zoa"),
new SuffixInflectionRule("-[csx]is", "-is", "-es"),
new CategoryInflectionRule(CATEGORY_EX_ICES_RULE, "-ex", "-ices"),
new CategoryInflectionRule(CATEGORY_IX_ICES_RULE, "-ix", "-ices"),
new CategoryInflectionRule(CATEGORY_UM_A_RULE, "-um", "-a"),
new CategoryInflectionRule(CATEGORY_US_I_RULE, "-us", "-i"),
new CategoryInflectionRule(CATEGORY_ON_A_RULE, "-on", "-a"),
new CategoryInflectionRule(CATEGORY_A_AE_RULE, "-a", "-ae"),
// Classical irregular plurals [Step 7]
// Classical plurals have not been implemented
// Nouns ending in sibilants (such as "churches") [Step 8]
new CategoryInflectionRule(CATEGORY_SINGULAR_S_RULE, "-s", "-ses"),
new RegexReplacementRule("^([A-Z].*s)$", "$1es"),
new SuffixInflectionRule("-[cs]h", "-h", "-hes"),
new SuffixInflectionRule("-x", "-x", "-xes"),
new SuffixInflectionRule("-z", "-z", "-zes"),
// Nouns ending with "-f" or "-fe" take "-ves" in the plural (such as "halves") [Step 9]
new SuffixInflectionRule("-[aeo]lf", "-f", "-ves"),
new SuffixInflectionRule("-[^d]eaf", "-f", "-ves"),
new SuffixInflectionRule("-arf", "-f", "-ves"),
new SuffixInflectionRule("-[nlw]ife", "-fe", "-ves"),
// Nouns ending with "-y" [Step 10]
new SuffixInflectionRule("-[aeiou]y", "-y", "-ys"),
// NOTE: [RHQ] Comment out the below rule, because it will pluralize capitalized words ending in "y"
// (e.g. ConnectionFactory) by appending "s" (e.g. ConnectionFactorys) rather than "ies". (ips, 04/24/08)
//new RegexReplacementRule("^([A-Z].*y)$", "$1s"),
new SuffixInflectionRule("-y", "-y", "-ies"),
// Nouns ending with "-o" [Step 11]
new CategoryInflectionRule(CATEGORY_O_OS_RULE, "-o", "-os"),
new SuffixInflectionRule("-[aeiou]o", "-o", "-os"),
new SuffixInflectionRule("-o", "-o", "-oes"),
// Default rule: add "s" [Step 13]
new SuffixInflectionRule("-", "-s"),
});
public CustomEnglishPluralizer() {
setRules(this.rules);
setLocale(Locale.ENGLISH);
}
@Override
protected String postProcess(String trimmedWord, String pluralizedWord) {
// NOTE: [RHQ] Don't call super.postprocess(), since it will pluralize acronyms (e.g. CPU) as all caps (e.g.
// CPUS). (ips, 04/24/08)
return pluralizedWord;
}
}