/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.util; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * This class provides a repository for common regex patterns so * that we don't keep recompiling them over and over again. Some * convenience methods are provided to make the interface more * similar to the convenience functions on String. The convenience * methods on String are deprecated except for one-shot patterns * (which, by definition, are not in loops). * * @author wren ng thornton <wren@users.sourceforge.net> * @version $LastChangedDate: 2009-03-28 07:40:25 -0400 (Sat, 28 Mar 2009) $ */ public class Regex { // Alas, Pattern is final, thus no subclassing and this indirection private final Pattern pattern; //=============================================================== // Singletons -- add all common patterns here //=============================================================== /** * A pattern to match if the complete string is empty except * for whitespace and end-of-line comments beginning with * an octothorpe (<code>#</code>). */ public static final Regex commentOrEmptyLine = new Regex("^\\s*(?:\\#.*)?$"); // BUG: this should be replaced by a real regex for numbers. // Perhaps "^[\\+\\-]?\\d+(?:\\.\\d+)?$" is enough. // This is only used by JoshuaDecoder.writeConfigFile so far. /** * A pattern to match floating point numbers. (Current * implementation is overly permissive.) */ public static final Regex floatingNumber = new Regex("^[\\d\\.\\-\\+]+"); // Common patterns for splitting /** * A pattern for splitting on one or more whitespace. */ public static final Regex spaces = new Regex("\\s+"); /** * A pattern for splitting on one or more whitespace. */ public static final Regex tabs = new Regex("\\t+"); /** * A pattern for splitting on the equals character, with * optional whitespace on each side. */ public static final Regex equalsWithSpaces = new Regex("\\s*=\\s*"); /** * A pattern for splitting on three vertical pipes, with * one or more whitespace on each side. */ public static final Regex threeBarsWithSpace = new Regex("\\s\\|{3}\\s"); //=============================================================== // Constructor //=============================================================== public Regex(String regex) throws PatternSyntaxException { this.pattern = Pattern.compile(regex); } //=============================================================== // Convenience Methods //=============================================================== /** * Returns whether the input string matches this * <code>Regex</code>. */ public final boolean matches(String input) { return this.pattern.matcher(input).matches(); } /** * Split a character sequence, removing instances of this * <code>Regex</code>. */ public final String[] split(CharSequence input) { return this.pattern.split(input); } /** * Split a character sequence, removing instances of this * <code>Regex</code>, up to a limited number of segments. */ public final String[] split(CharSequence input, int limit) { return this.pattern.split(input, limit); } /** * Replace all substrings of the input which match this * <code>Regex</code> with the specified replacement string. */ public final String replaceAll(String input, String replacement) { return this.pattern.matcher(input).replaceAll(replacement); } /** * Replace the first substring of the input which matches * this <code>Regex</code> with the specified replacement * string. */ public final String replaceFirst(String input, String replacement) { return this.pattern.matcher(input).replaceFirst(replacement); } }