/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.util; import java.util.NoSuchElementException; /** * A tokenizer, similar to java's StringTokenizer but allows for quoted * character strings which can include other separators. */ public class Tokenizer { /** The string being parsed */ protected String source; /** The index of the first unreturned char in source */ protected int p; /** The set of delimiter characters */ protected String delim; /** If true then delimiters should be returned as tokens */ protected boolean returnDelims; /** Literal string delimiters */ protected String literalDelim; /** The lex state */ protected int state; /** A lookahead for tokens */ protected String lookahead; /** State flag: normal parse */ protected static final int NORMAL = 1; /** State flag: start of literal */ protected static final int LITERAL_START = 2; /** State flag: end of literal */ protected static final int LITERAL_END = 3; /** * Constructor. * @param str the source string to be parsed * @param delim The set of delimiter characters * @param literalDelim Literal string delimiters * @param returnDelims If true then delimiters should be returned as tokens */ public Tokenizer(String str, String delim, String literalDelim, boolean returnDelims) { this.source = str; this.delim = delim; this.literalDelim = literalDelim; this.returnDelims = returnDelims; p = 0; state = NORMAL; } /** * Return the next token. * @throws java.util.NoSuchElementException if there are no more tokens available */ public String nextToken() { String result = null; if (lookahead != null) { result = lookahead; lookahead = null; } else { result = getNextToken(); } if (result == null) { throw new NoSuchElementException("No more elements in tokenized string"); } if (!returnDelims) { if (result.length() == 1) { char c = result.charAt(0); if (delim.indexOf(c) != -1 || literalDelim.indexOf(c) != -1) { return nextToken(); } } } return result; } /** * Test if there are more tokens which can be returned. */ public boolean hasMoreTokens() { if (lookahead == null) lookahead = getNextToken(); return lookahead != null; } /** * Find the next token which can either be a delimiter or a real token. */ private String getNextToken() { if (p >= source.length()) { return null; } switch(state) { case NORMAL: if (is(literalDelim)) { state = LITERAL_START; p++; return source.substring(p-1, p); } else if (is(delim)) { p++; return source.substring(p-1, p); } else { int start = p; p++; while (p < source.length() && ! is(delim)) p++; return source.substring(start, p); } case LITERAL_START: char delim = source.charAt(p-1); StringBuilder literal = new StringBuilder(); while (p < source.length()) { char c = source.charAt(p); if (c == '\\') { p++; if (p >= source.length()) break; c = source.charAt(p); } else { if (c == delim) break; } literal.append(c); p++; } state = LITERAL_END; return literal.toString(); case LITERAL_END: state = NORMAL; p++; return source.substring(p-1, p); } return null; } /** * Returns true if the current character is contained in the given classification. */ private boolean is(String classification) { return classification.indexOf(source.charAt(p)) != -1; } }