package edu.northwestern.at.utils.corpuslinguistics.inflector.pluralizer;
/* Please see the license information in the header below. */
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.northwestern.at.utils.*;
import edu.northwestern.at.utils.corpuslinguistics.inflector.wordrule.*;
/** A {@link Pluralizer} implemented using an ordered list of {@link edu.northwestern.at.utils.corpuslinguistics.inflector.wordrule.WordRule}s.
*
* <p>
* You may specify a fallback {@link Pluralizer} that is invoked when
* none of the rules match. This allows you to override some rules of
* another {@link Pluralizer}.
* </p>
*
* <p>
* This class preserves leading and trailing whitespace, so individual
* rules don't need to explicitly handle whitespace.
* </p>
*
* <p>
* Case is also preserved -- that is, the output of all uppercase input
* is automatically uppercased, and the output of titlecase input is
* automatically titlecased. This means rules can act in a
* case-insensitive manner.
* </p>
*
* <p>
* Original code written by Tom White under the Apache v2 license.
* Modified by Philip R. Burns for integration into MorphAdorner.
* </p>
*/
public class RuleBasedPluralizer
implements Pluralizer
{
protected static final Pluralizer NOOP_PLURALIZER = new NoopPluralizer();
protected List<WordRule> rules;
protected Locale locale;
protected Pluralizer fallbackPluralizer;
/** Construct a pluralizer with an empty list of rules.
*/
public RuleBasedPluralizer()
{
this.rules = ListFactory.createNewList();
this.locale = Locale.getDefault();
}
/** Construct a pluralizer with a list of rules.
*
* @param rules The rules to apply, in order.
* @param locale The locale specifying the language of the pluralizer.
*
* <p>
* A noop pluralizer is used as the fall back pluralizer when none of
* the specified rules applies.
* </p>
*/
public RuleBasedPluralizer( List<WordRule> rules , Locale locale )
{
this( rules , locale , NOOP_PLURALIZER );
}
/** Construct a pluralizer with a list of rules and a backup pluralizer.
*
* @param rules The rules to apply, in order.
* @param locale The locale specifying the language of the pluralizer.
* @param fallbackPluralizer The pluralizer to use when no rules match.
*
* <p>
* The fall back pluralizer is invoked when none of the specified rules
* applies.
* </p>
*/
public RuleBasedPluralizer
(
List<WordRule> rules ,
Locale locale ,
Pluralizer fallbackPluralizer
)
{
this.rules = rules;
this.locale = locale;
this.fallbackPluralizer = fallbackPluralizer;
}
/** Get fall back pluralizer.
*
* @return The fall back pluralizer.
*/
public Pluralizer getFallbackPluralizer()
{
return fallbackPluralizer;
}
/** Set the fall back pluralizer.
*
* @param fallbackPluralizer The fall back pluralizer.
*/
public void setFallbackPluralizer( Pluralizer fallbackPluralizer )
{
this.fallbackPluralizer = fallbackPluralizer;
}
/** Get the locale.
*
* @return The pluralizer locale.
*/
public Locale getLocale()
{
return locale;
}
/** Set the pluralizer locale.
*
* @param locale The pluralizer locale.
*/
public void setLocale( Locale locale )
{
this.locale = locale;
}
/** Get the pluralizer rules.
*
* @return The pluralizer rules.
*/
public List<WordRule> getRules()
{
return rules;
}
/** Set the pluralizer rules.
*
* @param rules The pluralizer rules.
*/
public void setRules( List<WordRule> rules )
{
this.rules = rules;
}
/** Pluralize a noun or pronoun.
*
* @param nounOrPronoun The singular form of the noun or pronoun.
*
* @return The plural form of the noun or pronoun.
*/
public String pluralize( String nounOrPronoun )
{
return pluralize( nounOrPronoun , 2 );
}
/** Pluralize a noun or pronoun.
*
* @param nounOrPronoun The singular form of the noun or pronoun.
* @param number The number for the noun or pronoun.
*
* @return The form of the noun or pronoun for the specified
* number.
*/
public String pluralize( String nounOrPronoun , int number )
{
if ( number == 1 )
{
return nounOrPronoun;
}
Pattern pattern = Pattern.compile( "\\A(\\s*)(.+?)(\\s*)\\Z" );
Matcher matcher = pattern.matcher( nounOrPronoun );
if ( matcher.matches() )
{
String pre = matcher.group( 1 );
String trimmedWord = matcher.group( 2 );
String post = matcher.group( 3 );
String plural = pluralizeInternal( trimmedWord );
if ( plural == null )
{
return fallbackPluralizer.pluralize( nounOrPronoun , number );
}
return pre + postProcess( trimmedWord , plural ) + post;
}
return nounOrPronoun;
}
/** Apply list of rules to a noun or pronoun.
*
* @param nounOrPronoun Singular noun or pronoun.
*
* @return Plural form of the noun or pronoun, or <code>null</code>
* when no rule matches.
*/
protected String pluralizeInternal( String nounOrPronoun )
{
for ( WordRule rule : rules )
{
if ( rule.applies( nounOrPronoun ) )
{
return rule.apply( nounOrPronoun );
}
}
return null;
}
/** Fix case of pluralized word.
*
* @param trimmedWord The input word, with leading and trailing
* whitespace removed.
* @param pluralizedWord The pluralized word.
*
* @return The <code>pluralizedWord</code> after
* processing.
*
* <p>
* If <code>trimmedWord</code> is all uppercase, then
* <code>pluralizedWord</code> is uppercased.
* If <code>trimmedWord</code> is titlecase, then
* <code>pluralizedWord</code> is titlecased.
* </p>
*/
protected String postProcess
(
String trimmedWord ,
String pluralizedWord
)
{
if ( trimmedWord.matches( "^\\p{Lu}+$" ) )
{
return pluralizedWord.toUpperCase( locale );
}
else if ( trimmedWord.matches( "^\\p{Lu}.*" ) )
{
return pluralizedWord.substring( 0 , 1 ).toUpperCase( locale ) +
pluralizedWord.substring( 1 );
}
return pluralizedWord;
}
}