/* * JBoss, Home of Professional Open Source. * Copyright 2014, Red Hat Middleware LLC, and individual contributors * as indicated by the @author tags. See the copyright.txt file in the * distribution for a full listing of individual contributors. * * This is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This software is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this software; if not, write to the Free * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA, or see the FSF site: http://www.fsf.org. */ package org.jboss.wsf.stack.cxf.addressRewrite; import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; final class SEDProcessor { private static enum SEDCommand { SUBSTITUTE, TRANSLATE } private static final int[] EMPTY_OCCURRENCE = new int[0]; private final SEDCommand command; private final String replacement; private final int[] occurrences; private final Pattern regexp; private final SedArguments args; private final CharMap charMap; private SEDProcessor(String script, SEDCommand command) { this.command = command; this.args = deriveArgs(script); if (args.isIgnoreCase()) { this.regexp = Pattern.compile(getRegexp(args), Pattern.CASE_INSENSITIVE); } else { this.regexp = Pattern.compile(getRegexp(args)); } this.replacement = getReplacement(args); this.occurrences = args.isOccurrenceSet() ? args.getOccurrence() : EMPTY_OCCURRENCE; for (int i = 0; i < occurrences.length; i++) { if (occurrences[i] <= 0) { throw new IllegalArgumentException("invalid occurrence index " + occurrences[i] + " in sed command"); } } Arrays.sort(occurrences); if (command == SEDCommand.TRANSLATE) { this.charMap = new CharMap(args.getString1(), args.getString2()); } else { this.charMap = null; } } /** * Returns the regexp operand from args, either called "regexp" or * "string1". If none of the two is set, an empty string is returned. * * @param args * the args with operand values * @return the regexp argument from "regexp" or "string1" or an empty string * of none of the two operands is set */ private static String getRegexp(SedArguments args) { if (args.isRegexpSet()) { return args.getRegexp(); } if (args.isString1Set()) { return args.getString1(); } return ""; } /** * Returns the replacement operand from args, either called "replacement" or * "string2". If none of the two is set, an empty string is returned. * * @param args * the args with operand values * @return the replacement argument from "replacement" or "string2" or an * empty string of none of the two operands is set */ private static String getReplacement(SedArguments args) { if (args.isReplacementSet()) { return args.getReplacement(); } if (args.isString2Set()) { return args.getString2(); } return ""; } private SedArguments deriveArgs(String script) { final int start = findStartTrimWhitespace(script) + 1; final int mid = indexOfNextDelimiter(script, start); final int end = indexOfNextDelimiter(script, mid); if (mid < 0 || end < 0) { throw new IllegalArgumentException("invalid script for sed command: " + script); } if (command == SEDCommand.SUBSTITUTE) { SedArguments args = parseSubstituteFlags(script, end + 1); args.setRegexp(script.substring(start + 1, mid)); args.setReplacement(script.substring(mid + 1, end)); return args; } else if (command == SEDCommand.TRANSLATE) { SedArguments args = new SedArguments(); args.setScript(script); args.setTranslate(true); final int scriptEnd = findEndTrimWhitespace(script); if (end + 1 < scriptEnd) { throw new IllegalArgumentException("non-whitespace characters found after " + command + " command in sed script: " + script); } args.setString1(script.substring(start + 1, mid)); args.setString2(script.substring(mid + 1, end)); return args; } else { throw new IllegalStateException(); } } private static SedArguments parseSubstituteFlags(String script, int start) { final int end = findWhitespace(script, start); if (end < findEndTrimWhitespace(script)) { throw new IllegalArgumentException("extra non-whitespace characters found after substitute command in sed script: " + script); } SedArguments args = new SedArguments(); args.setScript(script); args.setSubstitute(true); if (start < end) { //g, I flags int index; for (index = end - 1; index >= start; index--) { final char flag = script.charAt(index); if (flag == 'g') { args.setGlobal(true); } else if (flag == 'I') { args.setIgnoreCase(true); } else { break; } } //occurrence index if (index >= start) { final String occurrenceStr = script.substring(start, index + 1); final int occurrence; try { occurrence = Integer.parseInt(occurrenceStr); } catch (NumberFormatException e) { throw new IllegalArgumentException("invalid substitute flags in sed script: " + script, e); } if (occurrence <= 0) { throw new IllegalArgumentException("invalid occurrence index " + occurrence + " in sed script: " + script); } args.setOccurrence(occurrence); } } return args; } public String processLine(String line) { if (command == SEDCommand.SUBSTITUTE) { final Matcher matcher = regexp.matcher(line); if (matcher.find()) { boolean matches = true; final StringBuffer changed = new StringBuffer();//cannot use StringBuilder here since matcher does not support it if (occurrences.length > 0) { int current = 1; for (int i = 0; i < occurrences.length; i++) { final int occurrence = occurrences[i]; while (matches && current < occurrence) { matches = matcher.find(); current++; } if (matches) { matcher.appendReplacement(changed, replacement); } else { break; } } if (matches && occurrences.length == 1 && args.isGlobal()) { matches = matcher.find(); while (matches) { matcher.appendReplacement(changed, replacement); matches = matcher.find(); } } } else { while (matches) { matcher.appendReplacement(changed, replacement); matches = args.isGlobal() && matcher.find(); } } matcher.appendTail(changed); return changed.toString(); } else { return line; } } else if (command == SEDCommand.TRANSLATE) { char[] changed = null; final int len = line.length(); for (int i = 0; i < len; i++) { final char src = line.charAt(i); final char dst = charMap.map(src); if (dst != 0) { if (changed == null) { changed = new char[len]; for (int j = 0; j < i; j++) { changed[j] = line.charAt(j); } } changed[i] = dst; } else { if (changed != null) { changed[i] = src; } } } return changed != null ? String.valueOf(changed) : line; } else { return null; } } /** * Returns the index of the next delimiter in the given sed script. The * character at {@code indexOfPreviousDelimiter} is taken as delimiter. The * method handles escaped delimiters and returns -1 if no further delimiter * is found. * * @param script * the script to analyze * @param indexOfPreviousDelimiter * the index of the previous delimiter * @return the index of the next delimiter after * {@code indexOfPreviousDelimiter}, or -1 if no further delimiter * exists of if {@code indexOfNextDelimiter < 0} */ private static int indexOfNextDelimiter(String script, int indexOfPreviousDelimiter) { if (indexOfPreviousDelimiter < 0 || script.length() <= indexOfPreviousDelimiter) { return -1; } final char delim = script.charAt(indexOfPreviousDelimiter); if (delim == '\\') { throw new IllegalArgumentException("invalid delimiter '\\' in sed script: " + script); } int index = indexOfPreviousDelimiter; do { index = script.indexOf(delim, index + 1); } while (index >= 0 && isEscaped(script, index)); return index; } private static boolean isEscaped(String script, int index) { int backslashCount = 0; index--; while (index >= 0 && script.charAt(index) == '\\') { backslashCount++; index--; } // an uneven count of backslashes means that the character at position // index is escaped (an even count of backslashes escapes backslashes) return backslashCount % 2 == 1; } /** * Finds and returns the start of the given sequence after trimming * whitespace characters from the left. * * @param s * the character sequence * @return the index containing the first non-whitespace character, or the * length of the character sequence if all characters are blank */ private static int findStartTrimWhitespace(CharSequence s) { final int len = s.length(); for (int i = 0; i < len; i++) { if (!Character.isWhitespace(s.charAt(i))) { return i; } } return len; } /** * Finds and returns the first whitespace character in the given sequence at * or after start. Returns the length of the string if no whitespace is * found. * * @param s * the character sequence * @param start * the first index to consider in the char sequence * @return the index containing the first whitespace character at or after * start, or the length of the character sequence if all characters * are blank */ private static int findWhitespace(CharSequence s, int start) { final int len = s.length(); for (int i = start; i < len; i++) { if (Character.isWhitespace(s.charAt(i))) { return i; } } return len; } /** * Finds and returns the end of the given character sequence after trimming * white space characters from the right. Whitespace characters are defined * by {@link Character#isWhitespace(char)}. . * * @param s * the character sequence * @return the index after the last non-whitespace character, or zero if all * characters are blank */ private static int findEndTrimWhitespace(CharSequence s) { for (int i = s.length(); i > 0; i--) { if (!Character.isWhitespace(s.charAt(i - 1))) { return i; } } return 0; } public static SEDProcessor newInstance(String script) { final int len = script.length(); final int scriptStart = findStartTrimWhitespace(script); if (scriptStart < len) { final char firstChar = script.charAt(scriptStart); if (firstChar == 's') { return new SEDProcessor(script, SEDCommand.SUBSTITUTE); } else if (firstChar == 'y') { return new SEDProcessor(script, SEDCommand.TRANSLATE); } } throw new IllegalArgumentException("invalid script"); } }