package edu.northwestern.at.utils.corpuslinguistics.inflector.wordrule; /* Please see the license information in the header below. */ import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** An abstract rule specified using a regular expression and replacement. * * <p> * Subclasses must implement {@link #replace} to perform the actual * replacement. * </p> * * <p> * Original code written by Tom White under the Apache v2 license. * Modified by Philip R. Burns for integration into MorphAdorner. * </p> */ public abstract class AbstractRegexReplacementRule implements WordRule { protected final Pattern pattern; /** Construct a rule using the given regular expression. * * @param regex The regular expression used to match words. * Match information is available to subclasses in the * {@link #replace} method. */ public AbstractRegexReplacementRule( String regex ) { this.pattern = Pattern.compile( regex ); } /** Tests to see if this rule applies for the given word. * * @param word The word that is being tested * * @return <code>true</code> if this rule should be applied, * <code>false</code> otherwise */ public boolean applies( String word ) { return pattern.matcher( word ).matches(); } /** Applies this rule to the word, and transforming it into a new form. * * @param word The word to which to apply this rule. * * @return The transformed word. * * @throws IllegalArgumentException * when the word does not match the pattern. */ public String apply( String word ) { Matcher matcher = pattern.matcher( word ); if ( !matcher.matches() ) { throw new IllegalArgumentException( "Word '" + word + "' does not match regex: " + pattern.pattern() ); } return replace( matcher ); } /** Form the disjunction of the given regular expression patterns. * * @param patterns An array of regular expression patterns. * * @return A pattern that matches if any of the input * patterns match. * * <p> * For example, if "patterns" contains "a" and "b", then the * disjunction is "(a|b)", that is, "a or b". * </p> */ public static String disjunction( String[] patterns ) { String regex = ""; for ( int i = 0; i < patterns.length; i++ ) { regex += patterns[ i ]; if ( i < patterns.length - 1 ) { regex += "|"; } } return "(?:" + regex + ")"; } /** Form the disjunction of the given regular expression patterns. * * @param patterns A set of regular expression patterns. * * @return A pattern that matches if any of the input * patterns match. * * <p> * For example, if "patterns" contains "a" and "b", then the * disjunction is "(a|b)", that is, "a or b". * </p> */ public static String disjunction( Set<String> patterns ) { return disjunction ( patterns.toArray( new String[ patterns.size() ] ) ); } /** Use the state in the given {@link Matcher} to perform a replacement. * * @param matcher The matcher used to match the word. * * @return The transformed word. */ public abstract String replace( Matcher matcher ); }