package project.scangen.regex; import java.util.HashSet; import java.util.Set; /* * Methodology for Regex Expansion into (,),*,|'s * * @author Chad Stewart */ public class RegexExpander { private static int i = 0; public static String expandRegex(String s) { i = 0; while (i < s.length()) { int u; int v; if (s.charAt(i) == ' ') { // Encounter a space if (s.charAt(i - 1) == '\\') { i += 2; } else { s = s.substring(0, i) + s.substring(i + 1); } continue; } else if (s.charAt(i) == '\\') { // Encounter a Escape i += 2; continue; } else if (s.charAt(i) == ']') { // Encounter an OR block u = s.lastIndexOf('[', i); v = i + 1; String sub = s.substring(u, v); if (sub.charAt(1) == '^') { // Negation Or Block sub = s.substring(u, s.indexOf("]", v + 1) + 1); sub = negate(sub); return sub; } // Split the OR block to deal with it String[] strs = sub.split("-"); String[] ls = new String[strs.length - 1]; for (int i = 0; i < ls.length; i++) { if (i > 0) { ls[i] = "[" + strs[i].charAt(strs[i].length() - 1) + "-" + strs[i + 1].charAt(0) + "]"; } else ls[i] = "[" + strs[i].charAt(strs[i].length() - 1) + "-" + strs[i + 1].charAt(0) + "]"; } for (String m : ls) { // Add the OR blocks together if (m.length() == 5) { sub = sub.replace(m.substring(1, m.length() - 1), ""); } else sub = sub.replace(m.substring(2, m.length() - 1), ""); } sub = OrThisShit(sub); // Evaluate the first OR block for (String m : ls) { if (!sub.isEmpty()) sub = sub + "|" + expand(m); // Evaluate the spread OR // blocks } sub = sub.replace("()|", ""); s = s.substring(0, u) + "(" + sub + ")" + s.substring(v); i--; } else if (s.charAt(i) == '+') { // Evaluate a + if (s.charAt(i - 1) == ']') { // with hard brackets u = s.lastIndexOf('[', i); v = i; i += s.substring(u, v).length() + 1; s = s.substring(0, v) + s.substring(u, v) + "*" + s.substring(v + 2); } else if (s.charAt(i - 1) == ')') { // with parens String sub = findSub(s.substring(0, i)); s = s.substring(0, i) + sub + "*" + s.substring(i + 1, s.length()); i += sub.length(); } else { // Anything else if (s.charAt(i - 1) == '\\') { // Escape the + s = s.subSequence(0, i) + "(" + s.substring(i - 1, i) + s.substring(i - 1, i) + "*" + ")" + s.substring(i + 1); } else { s = s.subSequence(0, i) + "(" + s.substring(i - 1, i) + "*" + ")" + s.substring(i + 1); i++; } } } i++; } // s = stripOuterParens(s); return s; } public static String stripOuterParens(String s) { int firstParen = s.indexOf("("); if (firstParen < 0) { return s; } int numParens = 1; int maxParens = 1; int idx = firstParen + 1; for (; idx < s.length(); ++idx) { char c = s.charAt(idx); if (c == '(') { ++numParens; ++maxParens; } else { break; } } idx = s.indexOf(")", idx); for (; idx < s.length(); ++idx) { char c = s.charAt(idx); if (c == ')') { --numParens; if (numParens == 0) { StringBuilder sb = new StringBuilder(); sb.append(s.substring(0, firstParen)); sb.append("("); sb.append(s.substring(firstParen + maxParens, idx - maxParens + 1)); sb.append(")"); sb.append(s.substring(idx + 1)); return sb.toString(); } } else { return s; } } return s; } /* * Negation String Handler */ private static String negate(String sub) { String mainSet = sub.substring(sub.lastIndexOf("["), sub.length()); String[] strs = mainSet.split("-"); // for( String s : strs) // System.out.println(s); String[] ls = new String[strs.length - 1]; for (int i = 0; i < ls.length; i++) { if (i > 0) { ls[i] = "[" + strs[i].charAt(strs[i].length() - 1) + "-" + strs[i + 1].charAt(0) + "]"; } else ls[i] = "[" + strs[i].charAt(strs[i].length() - 1) + "-" + strs[i + 1].charAt(0) + "]"; } // for (String m : ls) { // if (m.length() == 5) { // mainSet = mainSet.replace(m.substring(1, m.length() - 1), ""); // } else // mainSet = mainSet.replace(m.substring(2, m.length() - 1), ""); // } mainSet = OrThisShit(mainSet); for (String m : ls) { if (!mainSet.isEmpty()) mainSet = mainSet + "|" + expand(m); } mainSet = mainSet.replace("()|", ""); mainSet = mainSet.replace(")|(", "|"); Set<Character> chars = new HashSet<Character>(); int n = 1; while (n < mainSet.length()) { if (mainSet.charAt(n) == '\\') chars.add(mainSet.charAt(++n)); else chars.add(mainSet.charAt(n)); n += 2; } String rem = sub.substring(0, sub.indexOf("]", 0) + 1); rem = "[" + rem.substring(2); strs = rem.split("-"); ls = new String[strs.length - 1]; for (int i = 0; i < ls.length; i++) { if (i > 0) { ls[i] = "[" + strs[i].charAt(strs[i].length() - 1) + "-" + strs[i + 1].charAt(0) + "]"; } else ls[i] = "[" + strs[i].charAt(strs[i].length() - 1) + "-" + strs[i + 1].charAt(0) + "]"; } for (String m : ls) { if (m.length() == 5) { rem = rem.replace(m.substring(1, m.length() - 1), ""); } else rem = rem.replace(m.substring(2, m.length() - 1), ""); } rem = OrThisShit(rem); for (String m : ls) { if (!rem.isEmpty()) rem = rem + "|" + expand(m); } rem = rem.replace("()|", ""); rem = rem.replace(")|(", "|"); n = 1; while (n < rem.length()) { if (rem.charAt(n) == '\\') chars.remove((rem.charAt(++n))); else chars.remove((rem.charAt(n))); n += 2; } StringBuilder sb = new StringBuilder(); sb.append('('); for (char c : chars) { if (c == ' ' || c == '\\' || c == '*' || c == '+' || c == '|' || c == '[' || c == ']' || c == '(' || c == ')' || c == '.' || c == '"' || c == '\'') { sb.append('\\'); } sb.append(c); sb.append('|'); } if (!chars.isEmpty()) sb.deleteCharAt(sb.length() - 1); sb.append(')'); return sb.toString(); } /* * Finds substrings (adds parens) */ private static String findSub(String s) { int i = s.length() - 2; int counter = 1; while (counter != 0) { if (s.charAt(i) == ')') counter++; else if (s.charAt(i) == '(') counter--; i--; } return s.substring(i + 1, s.length()); } /* * Pulls apart an OR block */ private static String OrThisShit(String s) { s = s.substring(1, s.length() - 1); // Strip outer brackets StringBuilder sb = new StringBuilder(); sb.append('('); for (char c : s.toCharArray()) { if (c == ' ' || c == '\\' || c == '*' || c == '+' || c == '|' || c == '[' || c == ']' || c == '(' || c == ')' || c == '.' || c == '"' || c == '\'') { sb.append('\\'); } sb.append(c); sb.append('|'); } if (!s.isEmpty()) sb.deleteCharAt(sb.length() - 1); // Remove trailing | sb.append(')'); return sb.toString(); } /* * Expands and OR block with a spread */ private static String expand(String sub) throws StringIndexOutOfBoundsException { char lb = sub.charAt(1); char ub = sub.charAt(sub.length() - 2); int indx = 1; while (indx < sub.length() - 1) { if (lb == ub) { if (lb == ' ' || lb == '\\' || lb == '*' || lb == '+' || lb == '|' || lb == '[' || lb == ']' || lb == '(' || lb == ')' || lb == '.' || lb == '"' || lb == '\'') { sub = "(\\" + sub.substring(1, indx) + sub.substring(indx + 1, sub.length() - 1) + ")"; i += 1; } else sub = "(" + sub.substring(1, indx) + sub.substring(indx + 1, sub.length() - 1) + ")"; i += indx - 2; return sub; } else if (indx == 1) { if (lb == ' ' || lb == '\\' || lb == '*' || lb == '+' || lb == '|' || lb == '[' || lb == ']' || lb == '(' || lb == ')' || lb == '.' || lb == '"' || lb == '\'') { sub = sub.substring(0, indx) + "\\" + lb + "|" + sub.substring(indx + 1, sub.length()); indx += 1; } else sub = sub.substring(0, indx) + lb + "|" + sub.substring(indx + 1, sub.length()); lb += 1; indx += 2; } else { if (lb == ' ' || lb == '\\' || lb == '*' || lb == '+' || lb == '|' || lb == '[' || lb == ']' || lb == '(' || lb == ')' || lb == '.' || lb == '"' || lb == '\'') { sub = sub.substring(0, indx) + "\\" + lb + "|" + sub.substring(indx, sub.length()); indx += 1; } else sub = sub.substring(0, indx) + lb + "|" + sub.substring(indx, sub.length()); lb += 1; indx += 2; } } i += indx; return sub; } }