/* * Copyright (c) 2002-2012 Alibaba Group Holding Limited. * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.alibaba.citrus.util.regex; import static com.alibaba.citrus.util.Assert.*; import static com.alibaba.citrus.util.BasicConstant.*; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** * 这个类将一个包含通配符的类名, 编译成正则表达式. 格式描述如下: * <ul> * <li>合法的<em>类名字符</em>包括: 字母/数字/下划线/'$';</li> * <li>合法的<em>类名分隔符</em>为小数点".";</li> * <li>"*"代表0个或多个<em>类名字符</em>;</li> * <li>"?"代表1个<em>类名字符</em>;</li> * <li>"**"代表0个或多个<em>类名字符</em>或<em>类名分隔符</em>;</li> * <li>不能连续出现3个"*";</li> * <li>不能连续出现2个<em>类名分隔符</em>;</li> * <li>"**"的前后只能是<em>类名分隔符</em>.</li> * </ul> * <p> * 转换后的正则表达式, 对每一个通配符建立<em>引用变量</em>, 依次为<code>$1</code>, <code>$2</code>, ... * </p> * * @author Michael Zhou */ public class ClassNameWildcardCompiler { /** 强制从头匹配 */ public static final int MATCH_PREFIX = 0x1000; // 私有常量 private static final char ESCAPE_CHAR = '\\'; private static final char DOT = '.'; private static final char UNDERSCORE = '_'; private static final char DOLLAR = '$'; private static final char STAR = '*'; private static final char QUESTION = '?'; private static final String REGEX_MATCH_PREFIX = "^"; private static final String REGEX_WORD_BOUNDARY = "\\b"; private static final String REGEX_DOT = "\\."; private static final String REGEX_DOT_NO_DUP = "\\.(?!\\.)"; private static final String REGEX_CLASS_NAME_CHAR = "[\\w\\$]"; private static final String REGEX_CLASS_NAME_SINGLE_CHAR = "(" + REGEX_CLASS_NAME_CHAR + ")"; private static final String REGEX_CLASS_NAME = "(" + REGEX_CLASS_NAME_CHAR + "*)"; private static final String REGEX_CLASS_NAME_FULL = "(" + REGEX_CLASS_NAME_CHAR + "+(?:" + REGEX_DOT_NO_DUP + REGEX_CLASS_NAME_CHAR + "*)*(?=" + REGEX_DOT + "|$)|)" + REGEX_DOT + "?"; private static final String REGEX_END_OF_NAME = "(?=" + REGEX_DOT + "|$)"; // 上一个token的状态 private static final int LAST_TOKEN_START = 0; private static final int LAST_TOKEN_DOT = 1; private static final int LAST_TOKEN_CLASS_NAME = 2; private static final int LAST_TOKEN_STAR = 3; private static final int LAST_TOKEN_DOUBLE_STAR = 4; private static final int LAST_TOKEN_QUESTION = 5; private ClassNameWildcardCompiler() { } /** 将包含通配符的类名, 编译成正则表达式. */ public static Pattern compileClassName(String pattern) throws PatternSyntaxException { return compileClassName(pattern, 0); } /** 将包含通配符的类名, 编译成正则表达式. */ public static Pattern compileClassName(String pattern, int options) throws PatternSyntaxException { return Pattern.compile(classNameToRegex(pattern, options), options); } /** * 取得相关度数值。 * <p> * 所谓相关度数值,即除去分隔符和通配符以后,剩下的字符长度。 * 相关度数值可用来对匹配结果排序。例如:a.b.c既匹配a又匹配*,但显然前者为更“相关”的匹配。 * </p> */ public static int getClassNameRelevancy(String pattern) { pattern = normalizeClassName(pattern); if (pattern == null) { return 0; } int relevant = 0; for (int i = 0; i < pattern.length(); i++) { switch (pattern.charAt(i)) { case DOT: case STAR: case QUESTION: continue; default: relevant++; } } return relevant; } /** 将包含通配符的类名, 编译成正则表达式. */ public static String classNameToRegex(String pattern, int options) throws PatternSyntaxException { pattern = assertNotNull(normalizeClassName(pattern), "pattern"); int lastToken = LAST_TOKEN_START; StringBuilder buf = new StringBuilder(pattern.length() * 2); boolean matchPrefix = (options & MATCH_PREFIX) != 0; if (matchPrefix) { buf.append(REGEX_MATCH_PREFIX); } for (int i = 0; i < pattern.length(); i++) { char ch = pattern.charAt(i); switch (ch) { case DOT: // dot后面不能是dot, dot不能作为字符串的开始 if (lastToken == LAST_TOKEN_DOT || lastToken == LAST_TOKEN_START) { throw new PatternSyntaxException("Syntax Error", pattern, i); } // 因为**已经包括了dot, 所以不需要额外地匹配dot if (lastToken != LAST_TOKEN_DOUBLE_STAR) { buf.append(REGEX_DOT_NO_DUP); } lastToken = LAST_TOKEN_DOT; break; case STAR: int j = i + 1; if (j < pattern.length() && pattern.charAt(j) == STAR) { i = j; // **前面只能是dot if (lastToken != LAST_TOKEN_START && lastToken != LAST_TOKEN_DOT) { throw new PatternSyntaxException("Syntax Error", pattern, i); } lastToken = LAST_TOKEN_DOUBLE_STAR; buf.append(REGEX_CLASS_NAME_FULL); } else { // *前面不能是*或** if (lastToken == LAST_TOKEN_STAR || lastToken == LAST_TOKEN_DOUBLE_STAR) { throw new PatternSyntaxException("Syntax Error", pattern, i); } lastToken = LAST_TOKEN_STAR; buf.append(REGEX_CLASS_NAME); } break; case QUESTION: if (lastToken == LAST_TOKEN_START) { buf.append(REGEX_WORD_BOUNDARY).append(REGEX_CLASS_NAME_SINGLE_CHAR); // 前边界 } else if (i + 1 == pattern.length()) { buf.append(REGEX_CLASS_NAME_SINGLE_CHAR).append(REGEX_END_OF_NAME); // 后边界 } else { buf.append(REGEX_CLASS_NAME_SINGLE_CHAR); } lastToken = LAST_TOKEN_QUESTION; break; default: // **后只能是dot if (lastToken == LAST_TOKEN_DOUBLE_STAR) { throw new PatternSyntaxException("Syntax Error", pattern, i); } if (Character.isLetterOrDigit(ch) || ch == UNDERSCORE) { // 加上word边界, 进行整字匹配 if (lastToken == LAST_TOKEN_START) { buf.append(REGEX_WORD_BOUNDARY).append(ch); // 前边界 } else if (i + 1 == pattern.length()) { buf.append(ch).append(REGEX_WORD_BOUNDARY); // 后边界 } else { buf.append(ch); } } else if (ch == DOLLAR) { buf.append(ESCAPE_CHAR).append(DOLLAR); } else { throw new PatternSyntaxException("Syntax Error", pattern, i); } lastToken = LAST_TOKEN_CLASS_NAME; } } return buf.toString(); } /** * 规格化类名。 * <ul> * <li>除去两端空白</li> * <li>将"/"和"\\"转换成"."</li> * <li>将重复的"."转换成单个的"."</li> * <li>除去首尾的"."</li> * </ul> */ public static String normalizeClassName(String name) { if (name == null) { return null; } name = name.trim(); name = name.replaceAll("[/\\\\\\.]+", "."); name = name.replaceAll("^\\.|\\.$", EMPTY_STRING); return name; } /** * 将类名转化成路径名。 * <ul> * <li>规格化类名</li> * <li>将"."转换成"/"</li> * </ul> */ public static String classNameToPathName(String name) { name = normalizeClassName(name); if (name == null) { return null; } name = name.replace('.', '/'); return name; } }