/* license-start * * Copyright (C) 2008 - 2013 Crispico, <http://www.crispico.com/>. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 3. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details, at <http://www.gnu.org/licenses/>. * * Contributors: * Crispico - Initial API and implementation * * license-end */ package org.flowerplatform.common.regex; /** * @author Sorin */ public class RegexUtil { /** * In regex there are special characters so actually to find a character X, that is considered special, \X must be used. * But in java to write to \ character in a string you must double it so to write regex in java to find X character "\\X" must be inside a string. * * NOTE : understand the difference between greedy and lazy search (e.g * and *?) */ public static final String ANY_CHAR = "."; public static final String WORD = "\\w"; // letter or digit public static final String WHITESPACE = "\\s"; // any character like space, tab , \r , \n // Regex Operators public static final String STOP_BEFORE = "?="; public static final String DONT_CAPTURE = "?:"; public static final String EXCLUDE = "^"; public static final String MULTIPLE_TIMES = "*"; // It will try to stop as late as the next regex part can be matched , e.g "/* abc */ def*/" stops at the last "*/" public static final String MULTIPLE_TIMES_END_AS_SOON_AS_POSSIBLE = "*?"; // It will try to stop as soon as the next regex part can be matched, e.g "/* abc */ def*/" stops at the first "*/" public static final String NOT_MORE_THAN_ONCE = "?"; // by default greedy, (if it can parse it then take it) public static final String AT_LEAST_ONCE_END_AS_SOON_AS_POSSIBLE = "+?"; // Regex formated characters and group of characters public static final String COMMA = ","; public static final String OPEN_BRACKET = "\\{"; public static final String CLOSE_BRACKET = "\\}"; public static final String OPEN_PARENTHESIS = "\\("; public static final String CLOSE_PARENTHESIS = "\\)"; public static final String OPEN_ANGLE_PARENTHESIS = "\\<"; public static final String CLOSE_ANGLE_PARENTHESIS = "\\>"; public static final String OPEN_SQUARE_PARENTHESIS = "\\["; public static final String CLOSE_SQUARE_PARENTHESIS = "\\]"; public static final String SLASH_R = "\\r"; public static final String SLASH_N = "\\n"; public static final String DOLLAR = "\\$"; public static final String EXCLAMATION = "\\!"; public static final String UNDERSCORE = "_"; public static final String SLASH = "/"; // represent the character used to open or close a comment / public static final String STAR = "\\*"; // represents the second character used to open or close a multiline comment * public static final String MINUS = "\\-"; public static final String IDENTIFIER_BEGGINING_CHAR = "[a-zA-Z" + UNDERSCORE + DOLLAR + "]"; // letter , _ , $ public static final String IDENTIFIER_AFTER_BEGGINING_CHAR = "[" + WORD + UNDERSCORE + DOLLAR + "]"; // letter, digit, _ , $ public static final String STOP_BEFORE_OPEN_BRACKET_CHAR = "(" + STOP_BEFORE + OPEN_BRACKET + ")"; // matcher will stop before open bracket public static final String IDENTIFIER = IDENTIFIER_BEGGINING_CHAR + IDENTIFIER_AFTER_BEGGINING_CHAR + MULTIPLE_TIMES; // longest sequence of identifier characters, with at least a character public static final String CAPTURE_IDENTIFIER = "(" + IDENTIFIER + ")"; public static final String CLASS_KEYWORD = "\\bclass\\b"; // word that starts and ends with class public static final String INTERFACE_KEYWORD = "\\binterface\\b"; // word that starts and ends with interface public static final String XML_CDATA_START = OPEN_ANGLE_PARENTHESIS + EXCLAMATION + OPEN_SQUARE_PARENTHESIS + "CDATA" + OPEN_SQUARE_PARENTHESIS; // <![CDATA[ public static final String XML_CDATA_END = CLOSE_SQUARE_PARENTHESIS + CLOSE_ANGLE_PARENTHESIS + CLOSE_ANGLE_PARENTHESIS; // ]]> // Regex comment utilities public static final String MULTI_LINE_COMMENT = // something like /* ... */ SLASH + STAR + // start with /* ANY_CHAR + MULTIPLE_TIMES_END_AS_SOON_AS_POSSIBLE + // it needs greedy to match as soon as the */ is found STAR + SLASH; // end with */ public static final String XML_MULTI_LINE_COMMENT = // something like <!-- ... --> OPEN_ANGLE_PARENTHESIS + EXCLAMATION + MINUS + MINUS + // start with <!-- ANY_CHAR + MULTIPLE_TIMES_END_AS_SOON_AS_POSSIBLE + // it needs greedy to match as soon as the */ is found MINUS + MINUS + CLOSE_ANGLE_PARENTHESIS; // end with --> public static final String SINGLE_LINE_COMMENT = // something like // .... \r\n SLASH + SLASH + // start with // "[" + EXCLUDE + SLASH_R + EXCLUDE + SLASH_N + "]" + MULTIPLE_TIMES + // any char except \r or \n SLASH_R + NOT_MORE_THAN_ONCE + SLASH_N + NOT_MORE_THAN_ONCE; // \r or \n or \r\n public static final String SPACE_OR_COMMENT = "(" + DONT_CAPTURE + WHITESPACE + "|" + MULTI_LINE_COMMENT + "|" + SINGLE_LINE_COMMENT + ")" + AT_LEAST_ONCE_END_AS_SOON_AS_POSSIBLE; public static final String SPACES_OR_COMMENTS_OPTIONAL = // tries to pass over all whitespaces or comments if there are any "(" + DONT_CAPTURE + WHITESPACE + "|" + MULTI_LINE_COMMENT + "|" + SINGLE_LINE_COMMENT + ")" + MULTIPLE_TIMES; }