package client.net.sf.saxon.ce.regex; import client.net.sf.saxon.ce.om.SequenceIterator; import client.net.sf.saxon.ce.trans.XPathException; import java.util.List; /** * Glue class to interface the Jakarta regex engine to Saxon */ public class ARegularExpression implements RegularExpression { UnicodeString rawPattern; String rawFlags; REProgram regex; public ARegularExpression(CharSequence pattern, String flags, String hostLanguage, List<String> warnings) throws XPathException { rawFlags = flags; REFlags reFlags; try { reFlags = new REFlags(flags, hostLanguage); } catch (RESyntaxException err) { throw new XPathException(err.getMessage(), "FORX0001"); } try { rawPattern = GeneralUnicodeString.makeUnicodeString(pattern); RECompiler comp2 = new RECompiler(); comp2.setFlags(reFlags); regex = comp2.compile(rawPattern); if (warnings != null) { for (String s : comp2.getWarnings()) { warnings.add(s); } } } catch (RESyntaxException err) { throw new XPathException(err.getMessage(), "FORX0002"); } } /** * Determine whether the regular expression matches a given string in its entirety * * @param input the string to match * @return true if the string matches, false otherwise */ public boolean matches(CharSequence input) { if (input.length() == 0) { return regex.isNullable(); } REMatcher matcher = new REMatcher(regex); return matcher.anchoredMatch(GeneralUnicodeString.makeUnicodeString(input)); } /** * Determine whether the regular expression contains a match of a given string * * @param input the string to match * @return true if the string matches, false otherwise */ public boolean containsMatch(CharSequence input) { REMatcher matcher = new REMatcher(regex); return matcher.match(GeneralUnicodeString.makeUnicodeString(input), 0); } /** * Use this regular expression to tokenize an input string. * * @param input the string to be tokenized * @return a SequenceIterator containing the resulting tokens, as objects of type StringValue */ public SequenceIterator tokenize(CharSequence input) { return new ATokenIterator(GeneralUnicodeString.makeUnicodeString(input), new REMatcher(regex)); } /** * Use this regular expression to analyze an input string, in support of the XSLT * analyze-string instruction. The resulting RegexIterator provides both the matching and * non-matching substrings, and allows them to be distinguished. It also provides access * to matched subgroups. * * @param input the character string to be analyzed using the regular expression * @return an iterator over matched and unmatched substrings */ public RegexIterator analyze(CharSequence input) { return new ARegexIterator(GeneralUnicodeString.makeUnicodeString(input), rawPattern, new REMatcher(regex)); } /** * Replace all substrings of a supplied input string that match the regular expression * with a replacement string. * * @param input the input string on which replacements are to be performed * @param replacement the replacement string in the format of the XPath replace() function * @return the result of performing the replacement * @throws XPathException if the replacement string is invalid */ public CharSequence replace(CharSequence input, CharSequence replacement) throws XPathException { REMatcher matcher = new REMatcher(regex); if (matcher.match("")) { throw new XPathException("The regular expression must not be one that matches a zero-length string", "FORX0003"); } UnicodeString in = GeneralUnicodeString.makeUnicodeString(input); UnicodeString rep = GeneralUnicodeString.makeUnicodeString(replacement); try { return matcher.subst(in, rep); } catch (RESyntaxException err) { throw new XPathException(err.getMessage(), "FORX0004"); } } } // This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.