package com.floreysoft.jmte.util;
import java.util.ArrayList;
import java.util.List;
/**
* Parser for embedded mini languages.
*
* <p>
* <ul>
* <li>Solves Demarcation: Where does an embedded language begin and where does
* it end
* <ul>
* <li>Escaping
* <li>Quotation
* <li>Graceful reaction to and recovery from invalid input
* </ul>
* </li>
* <li>Lays ground for common patterns of mini langauge processing
* <ul>
* <li>all kinds of nested brackets
* <li>segmentation of data
* <li>not loosing context
* <li>context sensitive parsing aka lexer modes/states
* </ul>
* </li>
* </ul>
* </p>
*
* Not thread safe.
*
* @author olli
*
*/
public final class MiniParser {
public final static char DEFAULT_ESCAPE_CHAR = '\\';
public final static char DEFAULT_QUOTE_CHAR = '"';
public static MiniParser defaultInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, false,
false, false);
}
public static MiniParser trimmedInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, false,
true, false);
}
public static MiniParser ignoreCaseInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, true,
false, false);
}
public static MiniParser fullRawInstance() {
return new MiniParser((char) -1, (char) -1, false, false, true);
}
public static MiniParser rawOutputInstance() {
return new MiniParser(DEFAULT_ESCAPE_CHAR, DEFAULT_QUOTE_CHAR, false,
false, true);
}
private final char escapeChar;
private final char quoteChar;
private final boolean ignoreCase;
private final boolean trim;
private final boolean rawOutput;
private transient boolean escaped = false;
private transient boolean quoted = false;
public MiniParser(final char escapeChar, final char quoteChar,
final boolean ignoreCase, final boolean trim,
final boolean rawOutput) {
this.escapeChar = escapeChar;
this.quoteChar = quoteChar;
this.ignoreCase = ignoreCase;
this.trim = trim;
this.rawOutput = rawOutput;
}
public String replace(final String input, final String oldString,
final String newString) {
try {
if (oldString == null || oldString.equals("")) {
return input;
}
StringBuilder buffer = new StringBuilder();
for (int index = 0; index < input.length(); index++) {
if (input.regionMatches(ignoreCase, index, oldString, 0,
oldString.length())) {
buffer.append(newString);
index += oldString.length() - 1;
} else {
char c = input.charAt(index);
append(buffer, c);
}
}
return buffer.toString();
} finally {
escaped = false;
quoted = false;
}
}
public List<String> split(final String input, final char separator) {
return split(input, separator, Integer.MAX_VALUE);
}
public List<String> split(final String input, final char separator,
final int maxSegments) {
return splitInternal(input, false, separator, null, maxSegments);
}
public List<String> split(final String input, final String separatorSet) {
return split(input, separatorSet, Integer.MAX_VALUE);
}
public List<String> split(final String input, final String separatorSet,
final int maxSegments) {
return splitInternal(input, false, (char) -1, separatorSet, maxSegments);
}
public List<String> splitOnWhitespace(final String input,
final int maxSegments) {
return splitInternal(input, true, (char) -1, null, maxSegments);
}
public List<String> splitOnWhitespace(final String input) {
return splitOnWhitespace(input, Integer.MAX_VALUE);
}
// Common implementation for single char separator and string set separator.
// Has the benefit of shared code and caliper mini benchmarks showed no
// measurable performance penalty for additional check which separator to
// use
private List<String> splitInternal(final String input,
final boolean splitOnWhitespace, final char separator,
final String separatorSet, final int maxSegments) {
if (input == null) {
return null;
}
try {
final List<String> segments = new ArrayList<String>();
StringBuilder buffer = new StringBuilder();
for (int index = 0; index < input.length(); index++) {
final char c = input.charAt(index);
boolean separatedByWhitespace = false;
if (splitOnWhitespace) {
for (; index < input.length()
&& Character.isWhitespace(input.charAt(index)); index++) {
separatedByWhitespace = true;
}
if (separatedByWhitespace) {
index--;
}
}
final boolean separates = separatedByWhitespace
|| (separatorSet != null ? separatorSet.indexOf(c) != -1
: c == separator);
// in case we are not already in the last segment and there is
// an
// unsecaped, unquoted separator, this segment is now done
if (segments.size() != maxSegments - 1 && separates
&& !isEscaped()) {
finish(segments, buffer);
buffer = new StringBuilder();
} else {
append(buffer, c);
}
}
if (!splitOnWhitespace || buffer.length() != 0) {
finish(segments, buffer);
}
return segments;
} finally {
escaped = false;
quoted = false;
}
}
private void finish(final List<String> segments, StringBuilder buffer) {
String string = buffer.toString();
segments.add(trim ? string.trim() : string);
}
public int lastIndexOf(final String input, final String substring) {
return indexOfInternal(input, substring, true);
}
public int indexOf(final String input, final String substring) {
return indexOfInternal(input, substring, false);
}
private int indexOfInternal(final String input, final String substring,
boolean last) {
int resultIndex = -1;
for (int index = 0; index < input.length(); index++) {
if (input.regionMatches(ignoreCase, index, substring, 0, substring
.length())
&& !isEscaped()) {
resultIndex = index;
if (!last) {
break;
}
}
}
return resultIndex;
}
public List<String> scan(final String input, final String splitStart,
final String splitEnd) {
return scan(input, splitStart, splitEnd, false);
}
public List<String> greedyScan(final String input, final String splitStart,
final String splitEnd) {
return scan(input, splitStart, splitEnd, true);
}
public List<String> scan(final String input, final String splitStart,
final String splitEnd, boolean greedy) {
if (input == null) {
return null;
}
try {
final List<String> segments = new ArrayList<String>();
StringBuilder buffer = new StringBuilder();
boolean started = false;
int lastIndexOfEnd = greedy ? lastIndexOfEnd = lastIndexOf(input,
splitEnd) : -1;
char c;
int index = 0;
while (index < input.length()) {
c = input.charAt(index);
final boolean greedyCond = !started || !greedy
|| index == lastIndexOfEnd;
final String separator = started ? splitEnd : splitStart;
if (input.regionMatches(ignoreCase, index, separator, 0,
separator.length())
&& !isEscaped() && greedyCond) {
finish(segments, buffer);
buffer = new StringBuilder();
started = !started;
index += separator.length();
} else {
append(buffer, c);
index++;
}
}
// add trailing element to result
if (buffer.length() != 0) {
finish(segments, buffer);
}
return segments;
} finally {
escaped = false;
quoted = false;
}
}
public String unescape(final String input) {
final StringBuilder unescaped = new StringBuilder();
for (int i = 0; i < input.length(); i++) {
final char c = input.charAt(i);
append(unescaped, c);
}
return unescaped.toString();
}
// the heart of it all
private void append(StringBuilder buffer, char c) {
// version manually simplified
// final boolean shouldAppend = rawOutput || escaped
// || (c != quoteChar && c != escapeChar);
// final boolean newEscaped = c == escapeChar && !escaped;
// final boolean newQuoted = (c == quoteChar && !escaped) ? !quoted
// : quoted;
// side-effect free version directly extracted from if
// final boolean shouldAppend = (c == escapeChar && (escaped ||
// rawOutput))
// || (c == quoteChar && (escaped || rawOutput))
// || !(c == quoteChar || c == escapeChar);
// final boolean newEscaped = c == escapeChar ? !escaped
// : (c == quoteChar ? false : false);
// final boolean newQuoted = c == escapeChar ? quoted
// : (c == quoteChar ? (!escaped ? !quoted : quoted) : quoted);
// if (shouldAppend) {
// buffer.append(c);
// }
//
// escaped = newEscaped;
// quoted = newQuoted;
// original version
// XXX needed to revert to this original version as micro benchmark
// tests
// showed a slow down of more than 100%
if (c == escapeChar) {
if (escaped || rawOutput) {
buffer.append(c);
}
escaped = !escaped;
} else if (c == quoteChar) {
if (escaped) {
buffer.append(c);
escaped = false;
} else {
quoted = !quoted;
if (rawOutput) {
buffer.append(c);
}
}
} else {
buffer.append(c);
escaped = false;
}
}
private boolean isEscaped() {
return escaped || quoted;
}
}