/*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version. You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
package org.aitools.programd.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import org.aitools.util.runtime.Errors;
/**
* Provides utility methods for pattern-oriented tasks.
*
* @author <a href="mailto:noel@aitools.org">Noel Bush</a>
*/
public class PatternArbiter {
/** The regular expression that defines AIML pattern syntax. */
private static final Pattern AIML_PATTERN = Pattern
.compile("(\\*|_|[\\p{javaUpperCase}\\p{javaDigit}]+)( (\\*|_|[\\p{javaUpperCase}\\p{javaDigit}]+))*");
/** The generic normalization regex that matches any nonalphanumeric. */
private static final Pattern NON_ALPHANUMERIC = Pattern
.compile("[^\\p{javaUpperCase}\\p{javaLowerCase}\\p{javaWhitespace}\\p{javaDigit} ]+");
/** A pattern that matches multiple consecutive spaces. */
private static final Pattern MLC_SPACES = Pattern.compile(" +");
/**
* Translates the given AIML pattern to a regular expression and compiles it into a Pattern object. Useful if you need
* to do a ton of tests with a pattern.
*
* @param pattern the pattern to compile
* @param ignoreCase whether to ignore case in matching
* @return the compiled pattern (translated to regex)
*
* @throws NotAnAIMLPatternException if the pattern is not a valid AIML pattern (conditioned by
* <code>ignoreCase</code>
*/
public static Pattern compile(String pattern, boolean ignoreCase) throws NotAnAIMLPatternException {
// Check the pattern for validity. If it is invalid, throw an exception with a helpful message.
if (!isValidAIMLPattern(pattern)) {
throw new NotAnAIMLPatternException(String.format("\"%s\" does not match the definition of AIML pattern.",
pattern), pattern);
}
return Pattern.compile(pattern.replaceAll("(\\*|_)", "[^ ]+( [^ ]+)*"), ignoreCase ? Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE : 0);
}
/**
* Applies a generic set of normalizations to an input, to prepare it for pattern matching.
*
* @param string the input to normalize
* @return the normalized input
*/
public static String genericallyNormalize(String string) {
return MLC_SPACES.matcher(NON_ALPHANUMERIC.matcher(string).replaceAll(" ")).replaceAll(" ").trim();
}
/**
* Determines whether a given string is a valid AIML pattern.
*
* @param pattern the string to check
* @return whether the string is a valid AIML pattern
*/
public static boolean isValidAIMLPattern(String pattern) {
return AIML_PATTERN.matcher(pattern).matches();
}
/**
* For testing.
*
* @param args not used
*/
@SuppressWarnings("boxing")
public static void main(String[] args) {
BufferedReader console = new BufferedReader(new InputStreamReader(System.in));
String literal = null;
String pattern = null;
boolean ignoreCase = false;
boolean prediction = false;
boolean matched;
StringTokenizer tokenizer;
int failures = 0;
int successes = 0;
console: while (true) {
String theLine = null;
try {
theLine = console.readLine();
}
catch (IOException e) {
System.out.println("Cannot read from console!");
return;
}
if (theLine == null) {
break;
}
if (theLine.toLowerCase().equals("exit")) {
System.out.println("Exiting.");
return;
}
if (!theLine.startsWith(";") && theLine.trim().length() > 0) {
tokenizer = new StringTokenizer(theLine, "|");
try {
literal = tokenizer.nextToken();
pattern = tokenizer.nextToken();
ignoreCase = tokenizer.nextToken().equals("y") ? true : false;
prediction = tokenizer.nextToken().equals("t") ? true : false;
}
catch (NoSuchElementException e) {
System.out.println("Improperly formatted input. Use: literal|PATTERN|(y/n)|(t/f)");
continue console;
}
long time = new Date().getTime();
try {
matched = matches(literal, pattern, ignoreCase);
}
catch (NotAnAIMLPatternException e) {
System.out.println(String.format("Exception: \"%s\"", Errors.describe(e)));
matched = false;
}
time = new Date().getTime() - time;
System.out.print("TEST ");
if (matched == prediction) {
successes++;
System.out.print("PASSED] ");
}
else {
failures++;
System.out.print("FAILED] ");
}
if (!matched) {
System.out.print("no ");
}
System.out.print(String.format("match: %s | %s%s (%d ms)", literal, pattern, ignoreCase ? " (ignoreCase)" : "",
time));
}
else {
System.out.println(theLine);
}
}
System.out
.println(String.format("%d tests: %d successes, %d failures.", successes + failures, successes, failures));
}
/**
* Decides whether a given pattern matches a given literal, in an isolated context, according to the AIML
* pattern-matching specification.
*
* Indicates whether the given literal is matched by the given pattern. Note that the mechanism here is very simple:
* the AIML pattern is converted into an equivalent regular expression, and a match test is performed. This appears to
* be much more reliable than an old method that "manually" checked the match.
*
* This method uses a generic normalization that removes all punctuation from the input.
*
* @param literal the literal string to check
* @param pattern the pattern to try to match against it
* @param ignoreCase whether or not to ignore case
* @return <code>true</code> if <code>pattern</code> matches <code>literal</code>,<code>false</code> if not
* @throws NotAnAIMLPatternException if the pattern is not a valid AIML pattern (conditioned by
* <code>ignoreCase</code>
*/
public static boolean matches(String literal, String pattern, boolean ignoreCase) throws NotAnAIMLPatternException {
Pattern regex = compile(pattern, ignoreCase);
return regex.matcher(genericallyNormalize(literal)).matches();
}
}