/* * This file is part of the X10 project (http://x10-lang.org). * * This file is licensed to You under the Eclipse Public License (EPL); * You may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.opensource.org/licenses/eclipse-1.0.php * * This file was originally derived from the Polyglot extensible compiler framework. * * (C) Copyright 2000-2007 Polyglot project group, Cornell University * (C) Copyright IBM Corporation 2007-2012. */ package polyglot.util; import java.util.StringTokenizer; import java.util.NoSuchElementException; /** * A string tokenizer that understands quotes and escape characters. * @author Igor Peshansky, IBM Corporation */ public class QuotedStringTokenizer extends StringTokenizer { /* Have to keep copies because StringTokenizer makes everything private */ protected final String str; protected String delim; protected final String quotes; protected final char escape; protected final boolean returnDelims; protected int pos = 0; protected final int len; /** * Constructs a string tokenizer for the specified string. * The default delimiters for StringTokenizer are used. * "\"\'" are used as quotes, and '\\' is used as the escape character. */ public QuotedStringTokenizer(String str) { this(str, " \t\n\r\f", "\"\'", '\\', false); } /** * Constructs a string tokenizer for the specified string. * The default delimiters for StringTokenizer are used. * "\"\'" are used as quotes, and escape is used as the escape character. */ public QuotedStringTokenizer(String str, char escape) { this(str, " \t\n\r\f", "\"\'", escape, false); } /** * Constructs a string tokenizer for the specified string. * "\"\'" are used as quotes, and '\\' is used as the escape character. */ public QuotedStringTokenizer(String str, String delim) { this(str, delim, "\"\'", '\\', false); } /** * Constructs a string tokenizer for the specified string. * "\"\'" are used as quotes, and escape is used as the escape character. */ public QuotedStringTokenizer(String str, String delim, char escape) { this(str, delim, "\"\'", escape, false); } /** * Constructs a string tokenizer for the specified string. * Quotes cannot be delimiters, and the escape character can be neither a * quote nor a delimiter. */ public QuotedStringTokenizer(String str, String delim, String quotes, char escape, boolean returnDelims) { super(str, delim, returnDelims); this.str = str; this.len = str.length(); this.delim = delim; this.quotes = quotes; for (int i = 0; i < quotes.length(); i++) if (delim.indexOf(quotes.charAt(i)) >= 0) throw new IllegalArgumentException("Invalid quote character '"+quotes.charAt(i)+"'"); this.escape = escape; if (delim.indexOf(escape) >= 0) throw new IllegalArgumentException("Invalid escape character '"+escape+"'"); if (quotes.indexOf(escape) >= 0) throw new IllegalArgumentException("Invalid escape character '"+escape+"'"); this.returnDelims = returnDelims; } /** * Returns the position of the next non-delimiter character. * Pre-condition: not inside a quoted string (token). */ private int skipDelim(int pos) { while (pos < len && delim.indexOf(str.charAt(pos)) >= 0) pos++; return pos; } private StringBuffer token; /** * Returns the position of the next delimiter character after the token. * If collect is true, collects the token into the StringBuffer. * Pre-condition: not on a delimiter. */ private int skipToken(int pos, boolean collect) { if (collect) token = new StringBuffer(); boolean quoted = false; char quote = '\000'; boolean escaped = false; for (; pos < len; pos++) { char curr = str.charAt(pos); if (escaped) { escaped = false; if (collect) token.append(curr); continue; } if (curr == escape) { // escape character escaped = true; continue; } if (quoted) { if (curr == quote) { // closing quote quoted = false; quote = '\000'; } else if (collect) token.append(curr); continue; } if (quotes.indexOf(curr) >= 0) { // opening quote quoted = true; quote = curr; continue; } if (delim.indexOf(str.charAt(pos)) >= 0) // unquoted delimiter break; if (collect) token.append(curr); } if (escaped || quoted) throw new IllegalArgumentException("Unterminated quoted string"); return pos; } /** * Tests if there are more tokens available from this tokenizer's string. * Pre-condition: not inside a quoted string (token). */ public boolean hasMoreTokens() { if (!returnDelims) { pos = skipDelim(pos); } return (pos < len); } /** * Returns the next token from this string tokenizer. */ public String nextToken() { if (!returnDelims) pos = skipDelim(pos); if (pos >= len) throw new NoSuchElementException(); if (returnDelims && delim.indexOf(str.charAt(pos)) >= 0) return str.substring(pos, ++pos); //int start = pos; pos = skipToken(pos, true); return token.toString(); //return str.substring(start, pos); } /** * Returns the next token in this string tokenizer's string. */ public String nextToken(String delim) { this.delim = delim; return nextToken(); } /** * Calculates the number of times that this tokenizer's nextToken method * can be called before it generates an exception. */ public int countTokens() { int count = 0; int dcount = 0; int curr = pos; while (curr < len) { if (delim.indexOf(str.charAt(curr)) >= 0) { curr++; dcount++; } else { curr = skipToken(curr, false); count++; } } if (returnDelims) return count + dcount; return count; } /** * Returns the same value as the hasMoreTokens method. */ public boolean hasMoreElements() { return hasMoreTokens(); } /** * Returns the same value as the nextToken method, except that its declared * return value is Object rather than String. */ public Object nextElement() { return nextToken(); } }