package com.smartandroid.sa.tag.parser;
import com.smartandroid.sa.tag.helper.StringUtil;
import com.smartandroid.sa.tag.helper.Validate;
/**
* A character queue with parsing helpers.
*
* @author Jonathan Hedley
*/
public class TokenQueue {
private String queue;
private int pos = 0;
private static final char ESC = '\\'; // escape char for chomp balanced.
/**
* Create a new TokenQueue.
*
* @param data
* string of data to back queue.
*/
public TokenQueue(String data) {
Validate.notNull(data);
queue = data;
}
/**
* Is the queue empty?
*
* @return true if no data left in queue.
*/
public boolean isEmpty() {
return remainingLength() == 0;
}
private int remainingLength() {
return queue.length() - pos;
}
/**
* Retrieves but does not remove the first character from the queue.
*
* @return First character, or 0 if empty.
*/
public char peek() {
return isEmpty() ? 0 : queue.charAt(pos);
}
/**
* Add a character to the start of the queue (will be the next character
* retrieved).
*
* @param c
* character to add
*/
public void addFirst(Character c) {
addFirst(c.toString());
}
/**
* Add a string to the start of the queue.
*
* @param seq
* string to add.
*/
public void addFirst(String seq) {
// not very performant, but an edge case
queue = seq + queue.substring(pos);
pos = 0;
}
/**
* Tests if the next characters on the queue match the sequence. Case
* insensitive.
*
* @param seq
* String to check queue for.
* @return true if the next characters match.
*/
public boolean matches(String seq) {
return queue.regionMatches(true, pos, seq, 0, seq.length());
}
/**
* Case sensitive match test.
*
* @param seq
* string to case sensitively check for
* @return true if matched, false if not
*/
public boolean matchesCS(String seq) {
return queue.startsWith(seq, pos);
}
/**
* Tests if the next characters match any of the sequences. Case
* insensitive.
*
* @param seq
* list of strings to case insensitively check for
* @return true of any matched, false if none did
*/
public boolean matchesAny(String... seq) {
for (String s : seq) {
if (matches(s))
return true;
}
return false;
}
public boolean matchesAny(char... seq) {
if (isEmpty())
return false;
for (char c : seq) {
if (queue.charAt(pos) == c)
return true;
}
return false;
}
public boolean matchesStartTag() {
// micro opt for matching "<x"
return (remainingLength() >= 2 && queue.charAt(pos) == '<' && Character
.isLetter(queue.charAt(pos + 1)));
}
/**
* Tests if the queue matches the sequence (as with match), and if they do,
* removes the matched string from the queue.
*
* @param seq
* String to search for, and if found, remove from queue.
* @return true if found and removed, false if not found.
*/
public boolean matchChomp(String seq) {
if (matches(seq)) {
pos += seq.length();
return true;
} else {
return false;
}
}
/**
* Tests if queue starts with a whitespace character.
*
* @return if starts with whitespace
*/
public boolean matchesWhitespace() {
return !isEmpty() && StringUtil.isWhitespace(queue.charAt(pos));
}
/**
* Test if the queue matches a word character (letter or digit).
*
* @return if matches a word character
*/
public boolean matchesWord() {
return !isEmpty() && Character.isLetterOrDigit(queue.charAt(pos));
}
/**
* Drops the next character off the queue.
*/
public void advance() {
if (!isEmpty())
pos++;
}
/**
* Consume one character off queue.
*
* @return first character on queue.
*/
public char consume() {
return queue.charAt(pos++);
}
/**
* Consumes the supplied sequence of the queue. If the queue does not start
* with the supplied sequence, will throw an illegal state exception -- but
* you should be running match() against that condition.
* <p>
* Case insensitive.
*
* @param seq
* sequence to remove from head of queue.
*/
public void consume(String seq) {
if (!matches(seq))
throw new IllegalStateException(
"Queue did not match expected sequence");
int len = seq.length();
if (len > remainingLength())
throw new IllegalStateException(
"Queue not long enough to consume sequence");
pos += len;
}
/**
* Pulls a string off the queue, up to but exclusive of the match sequence,
* or to the queue running out.
*
* @param seq
* String to end on (and not include in return, but leave on
* queue). <b>Case sensitive.</b>
* @return The matched data consumed from queue.
*/
public String consumeTo(String seq) {
int offset = queue.indexOf(seq, pos);
if (offset != -1) {
String consumed = queue.substring(pos, offset);
pos += consumed.length();
return consumed;
} else {
return remainder();
}
}
public String consumeToIgnoreCase(String seq) {
int start = pos;
String first = seq.substring(0, 1);
boolean canScan = first.toLowerCase().equals(first.toUpperCase()); // if
// first
// is
// not
// cased,
// use
// index
// of
while (!isEmpty()) {
if (matches(seq))
break;
if (canScan) {
int skip = queue.indexOf(first, pos) - pos;
if (skip == 0) // this char is the skip char, but not match, so
// force advance of pos
pos++;
else if (skip < 0) // no chance of finding, grab to end
pos = queue.length();
else
pos += skip;
} else
pos++;
}
String data = queue.substring(start, pos);
return data;
}
/**
* Consumes to the first sequence provided, or to the end of the queue.
* Leaves the terminator on the queue.
*
* @param seq
* any number of terminators to consume to. <b>Case
* insensitive.</b>
* @return consumed string
*/
// todo: method name. not good that consumeTo cares for case, and consume to
// any doesn't. And the only use for this
// is is a case sensitive time...
public String consumeToAny(String... seq) {
int start = pos;
while (!isEmpty() && !matchesAny(seq)) {
pos++;
}
String data = queue.substring(start, pos);
return data;
}
/**
* Pulls a string off the queue (like consumeTo), and then pulls off the
* matched string (but does not return it).
* <p>
* If the queue runs out of characters before finding the seq, will return
* as much as it can (and queue will go isEmpty() == true).
*
* @param seq
* String to match up to, and not include in return, and to pull
* off queue. <b>Case sensitive.</b>
* @return Data matched from queue.
*/
public String chompTo(String seq) {
String data = consumeTo(seq);
matchChomp(seq);
return data;
}
public String chompToIgnoreCase(String seq) {
String data = consumeToIgnoreCase(seq); // case insensitive scan
matchChomp(seq);
return data;
}
/**
* Pulls a balanced string off the queue. E.g. if queue is
* "(one (two) three) four", (,) will return "one (two) three", and leave
* " four" on the queue. Unbalanced openers and closers can be escaped (with
* \). Those escapes will be left in the returned string, which is suitable
* for regexes (where we need to preserve the escape), but unsuitable for
* contains text strings; use unescape for that.
*
* @param open
* opener
* @param close
* closer
* @return data matched from the queue
*/
public String chompBalanced(char open, char close) {
int start = -1;
int end = -1;
int depth = 0;
char last = 0;
do {
if (isEmpty())
break;
Character c = consume();
if (last == 0 || last != ESC) {
if (c.equals(open)) {
depth++;
if (start == -1)
start = pos;
} else if (c.equals(close))
depth--;
}
if (depth > 0 && last != 0)
end = pos; // don't include the outer match pair in the return
last = c;
} while (depth > 0);
return (end >= 0) ? queue.substring(start, end) : "";
}
/**
* Unescaped a \ escaped string.
*
* @param in
* backslash escaped string
* @return unescaped string
*/
public static String unescape(String in) {
StringBuilder out = new StringBuilder();
char last = 0;
for (char c : in.toCharArray()) {
if (c == ESC) {
if (last != 0 && last == ESC)
out.append(c);
} else
out.append(c);
last = c;
}
return out.toString();
}
/**
* Pulls the next run of whitespace characters of the queue.
*/
public boolean consumeWhitespace() {
boolean seen = false;
while (matchesWhitespace()) {
pos++;
seen = true;
}
return seen;
}
/**
* Retrieves the next run of word type (letter or digit) off the queue.
*
* @return String of word characters from queue, or empty string if none.
*/
public String consumeWord() {
int start = pos;
while (matchesWord())
pos++;
return queue.substring(start, pos);
}
/**
* Consume an tag name off the queue (word or :, _, -)
*
* @return tag name
*/
public String consumeTagName() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny(':', '_', '-')))
pos++;
return queue.substring(start, pos);
}
/**
* Consume a CSS element selector (tag name, but | instead of : for
* namespaces, to not conflict with :pseudo selects).
*
* @return tag name
*/
public String consumeElementSelector() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('|', '_', '-')))
pos++;
return queue.substring(start, pos);
}
/**
* Consume a CSS identifier (ID or class) off the queue (letter, digit, -,
* _) http://www.w3.org/TR/CSS2/syndata.html#value-def-identifier
*
* @return identifier
*/
public String consumeCssIdentifier() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('-', '_')))
pos++;
return queue.substring(start, pos);
}
/**
* Consume an attribute key off the queue (letter, digit, -, _, :")
*
* @return attribute key
*/
public String consumeAttributeKey() {
int start = pos;
while (!isEmpty() && (matchesWord() || matchesAny('-', '_', ':')))
pos++;
return queue.substring(start, pos);
}
/**
* Consume and return whatever is left on the queue.
*
* @return remained of queue.
*/
public String remainder() {
final String remainder = queue.substring(pos, queue.length());
pos = queue.length();
return remainder;
}
public String toString() {
return queue.substring(pos);
}
}