/* * Copyright (C) 2014 Alec Dhuse * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package co.foldingmap.dataStructures; /** * * @author Alec */ public class SmartTokenizer { private int currentIndex; private String string; public SmartTokenizer(String string) { this.string = string; this.currentIndex = 0; } /** * Returns the text between a delimiter of the same character, starting at * the current index pointer. Handles if that character is escaped by '\' * * @param delim * @return */ public String getContent(char delim) { boolean continueLoop; char currentChar, prevChar; int startIndex; String content; startIndex = string.indexOf(delim, currentIndex); if (startIndex >= 0) { currentIndex = startIndex + 1; continueLoop = true; prevChar = ' ' ; content = ""; while (continueLoop && currentIndex < string.length()) { currentChar = string.charAt(currentIndex); if (currentIndex > 0) prevChar = string.charAt(currentIndex - 1); if (currentChar == delim && prevChar != '\\') { continueLoop = false; content = string.substring(startIndex + 1, currentIndex); currentIndex++; } else { currentIndex++; } } return content; } else { currentIndex = string.length(); return ""; } } /** * Returns the text between the two delimiting characters. * Deals with nested characters. * * @param startDelim * @param endDelim * @return */ public String getContent(char startDelim, char endDelim) { char currentChar; int openDelims, startIndex; startIndex = string.indexOf(startDelim, currentIndex); openDelims = 1; currentIndex = startIndex + 1; while (openDelims > 0 && currentIndex < string.length()) { currentChar = string.charAt(currentIndex); if (currentChar == startDelim) { openDelims++; } else if (currentChar == endDelim) { openDelims--; } currentIndex++; } return string.substring(startIndex + 1, currentIndex - 1); } /** * Returns the next char in the tokenizer, without advancing the index * pointer. Spaces are ignored. * * @return */ public char getNextChar() { if (currentIndex < string.length()) { //Skip over whitespace moveToNextNonWhiteSpace(); return string.charAt(currentIndex); } else { //end of string, return blank char. return 0; } } /** * Returns a String consisting of the whitespace starting at the current index ending at the * first occurrence of non-whitespace. * * @return */ public String getNextWhiteSpace() { int startIndex = currentIndex; moveToNextNonWhiteSpace(); int endIndex = currentIndex; return string.substring(startIndex, endIndex); } /** * Returns text from the current pointer to the first instance of a given * char. The index pointer is set to one past the stop point. * * If the end of the String is reach without reaching that character, * the substring from the index pointer to the String end will be returned. * * @param character * @return */ public String getTextTo(char character) { String text; int charIndex = string.indexOf(character, currentIndex); if (charIndex > 0) { text = string.substring(currentIndex, charIndex); currentIndex = charIndex + 1; } else { text = string.substring(currentIndex); currentIndex = string.length(); } return text; } /** * Returns if there is more of the string after the index pointer. * * @return */ public boolean hasMore() { if (currentIndex < string.length()) { return true; } else { return false; } } /** * Returns the index of the first occurrence of a given String. * Returns -1 if the string is not found. * * @param searchString * @return */ public int indexOf(String searchString) { return string.indexOf(searchString); } private boolean isWhitespace(char c) { if (c == ' ' || c == '\n' || c == '\t') { return true; } else { return false; } } /** * Jumps the current pointer index to after the next occurrence of a given * char. * * @param character */ public void jumpAfterChar(char character) { int newIndex = string.indexOf(character, currentIndex); if (newIndex >= 0) { currentIndex = newIndex + 1; } else { currentIndex = string.length(); } } /** * Increments the pointer to the next non whitespace character. * */ public void moveToNextNonWhiteSpace() { while ((currentIndex < string.length()) && (string.charAt(currentIndex) == ' ' || string.charAt(currentIndex) == '\n' || string.charAt(currentIndex) == '\t')) { currentIndex++; } } /** * Increments the pointer to the next whitespace character. * */ public void moveToNextWhiteSpace() { while ((currentIndex < string.length()) && string.charAt(currentIndex) != ' ' && string.charAt(currentIndex) != '\n' && string.charAt(currentIndex) != '\t') { currentIndex++; } } /** * Returns the next white space delimited token. If index pointer is in the * middle of a token, the pointer is incremented to the next whitespace * character to start the token. * * @return */ public String nextToken() { int startIndex; if (currentIndex > 0) { if (!isWhitespace(string.charAt(currentIndex - 1))) moveToNextWhiteSpace(); } startIndex = currentIndex; moveToNextNonWhiteSpace(); //find the next whitespace moveToNextWhiteSpace(); return (string.substring(startIndex, currentIndex)).trim(); } /** * Returns the text after the index pointer. * * @return */ @Override public String toString() { return string.substring(currentIndex); } }