/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.lang3.text; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.ListIterator; import java.util.NoSuchElementException; import org.apache.commons.lang3.ArrayUtils; /** * Tokenizes a string based based on delimiters (separators) * and supporting quoting and ignored character concepts. * <p> * This class can split a String into many smaller strings. It aims * to do a similar job to {@link java.util.StringTokenizer StringTokenizer}, * however it offers much more control and flexibility including implementing * the <code>ListIterator</code> interface. By default, it is set up * like <code>StringTokenizer</code>. * <p> * The input String is split into a number of <i>tokens</i>. * Each token is separated from the next String by a <i>delimiter</i>. * One or more delimiter characters must be specified. * <p> * Each token may be surrounded by quotes. * The <i>quote</i> matcher specifies the quote character(s). * A quote may be escaped within a quoted section by duplicating itself. * <p> * Between each token and the delimiter are potentially characters that need trimming. * The <i>trimmer</i> matcher specifies these characters. * One usage might be to trim whitespace characters. * <p> * At any point outside the quotes there might potentially be invalid characters. * The <i>ignored</i> matcher specifies these characters to be removed. * One usage might be to remove new line characters. * <p> * Empty tokens may be removed or returned as null. * <pre> * "a,b,c" - Three tokens "a","b","c" (comma delimiter) * " a, b , c " - Three tokens "a","b","c" (default CSV processing trims whitespace) * "a, ", b ,", c" - Three tokens "a, " , " b ", ", c" (quoted text untouched) * </pre> * <p> * * This tokenizer has the following properties and options: * * <table> * <tr> * <th>Property</th><th>Type</th><th>Default</th> * </tr> * <tr> * <td>delim</td><td>CharSetMatcher</td><td>{ \t\n\r\f}</td> * </tr> * <tr> * <td>quote</td><td>NoneMatcher</td><td>{}</td> * </tr> * <tr> * <td>ignore</td><td>NoneMatcher</td><td>{}</td> * </tr> * <tr> * <td>emptyTokenAsNull</td><td>boolean</td><td>false</td> * </tr> * <tr> * <td>ignoreEmptyTokens</td><td>boolean</td><td>true</td> * </tr> * </table> * * @since 2.2 * @version $Id: StrTokenizer.java 1199894 2011-11-09 17:53:59Z ggregory $ */ public class StrTokenizer implements ListIterator<String>, Cloneable { private static final StrTokenizer CSV_TOKENIZER_PROTOTYPE; private static final StrTokenizer TSV_TOKENIZER_PROTOTYPE; static { CSV_TOKENIZER_PROTOTYPE = new StrTokenizer(); CSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.commaMatcher()); CSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); CSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher()); CSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher()); CSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); CSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); TSV_TOKENIZER_PROTOTYPE = new StrTokenizer(); TSV_TOKENIZER_PROTOTYPE.setDelimiterMatcher(StrMatcher.tabMatcher()); TSV_TOKENIZER_PROTOTYPE.setQuoteMatcher(StrMatcher.doubleQuoteMatcher()); TSV_TOKENIZER_PROTOTYPE.setIgnoredMatcher(StrMatcher.noneMatcher()); TSV_TOKENIZER_PROTOTYPE.setTrimmerMatcher(StrMatcher.trimMatcher()); TSV_TOKENIZER_PROTOTYPE.setEmptyTokenAsNull(false); TSV_TOKENIZER_PROTOTYPE.setIgnoreEmptyTokens(false); } /** The text to work on. */ private char chars[]; /** The parsed tokens */ private String tokens[]; /** The current iteration position */ private int tokenPos; /** The delimiter matcher */ private StrMatcher delimMatcher = StrMatcher.splitMatcher(); /** The quote matcher */ private StrMatcher quoteMatcher = StrMatcher.noneMatcher(); /** The ignored matcher */ private StrMatcher ignoredMatcher = StrMatcher.noneMatcher(); /** The trimmer matcher */ private StrMatcher trimmerMatcher = StrMatcher.noneMatcher(); /** Whether to return empty tokens as null */ private boolean emptyAsNull = false; /** Whether to ignore empty tokens */ private boolean ignoreEmptyTokens = true; //----------------------------------------------------------------------- /** * Returns a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>. * * @return a clone of <code>CSV_TOKENIZER_PROTOTYPE</code>. */ private static StrTokenizer getCSVClone() { return (StrTokenizer) CSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing * will be trim whitespace from both ends (which can be overridden with * the setTrimmer method). * <p> * You must call a "reset" method to set the string which you want to parse. * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StrTokenizer getCSVInstance() { return getCSVClone(); } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing * will be trim whitespace from both ends (which can be overridden with * the setTrimmer method). * * @param input the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StrTokenizer getCSVInstance(String input) { StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } /** * Gets a new tokenizer instance which parses Comma Separated Value strings * initializing it with the given input. The default for CSV processing * will be trim whitespace from both ends (which can be overridden with * the setTrimmer method). * * @param input the text to parse * @return a new tokenizer instance which parses Comma Separated Value strings */ public static StrTokenizer getCSVInstance(char[] input) { StrTokenizer tok = getCSVClone(); tok.reset(input); return tok; } /** * Returns a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>. * * @return a clone of <code>TSV_TOKENIZER_PROTOTYPE</code>. */ private static StrTokenizer getTSVClone() { return (StrTokenizer) TSV_TOKENIZER_PROTOTYPE.clone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * <p> * You must call a "reset" method to set the string which you want to parse. * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StrTokenizer getTSVInstance() { return getTSVClone(); } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * @param input the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StrTokenizer getTSVInstance(String input) { StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } /** * Gets a new tokenizer instance which parses Tab Separated Value strings. * The default for CSV processing will be trim whitespace from both ends * (which can be overridden with the setTrimmer method). * @param input the string to parse * @return a new tokenizer instance which parses Tab Separated Value strings. */ public static StrTokenizer getTSVInstance(char[] input) { StrTokenizer tok = getTSVClone(); tok.reset(input); return tok; } //----------------------------------------------------------------------- /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer, but with no text to tokenize. * <p> * This constructor is normally used with {@link #reset(String)}. */ public StrTokenizer() { super(); this.chars = null; } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed */ public StrTokenizer(String input) { super(); if (input != null) { chars = input.toCharArray(); } else { chars = null; } } /** * Constructs a tokenizer splitting on the specified delimiter character. * * @param input the string which is to be parsed * @param delim the field delimiter character */ public StrTokenizer(String input, char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified delimiter string. * * @param input the string which is to be parsed * @param delim the field delimiter string */ public StrTokenizer(String input, String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input the string which is to be parsed * @param delim the field delimiter matcher */ public StrTokenizer(String input, StrMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character * and handling quotes using the specified quote character. * * @param input the string which is to be parsed * @param delim the field delimiter character * @param quote the field quoted string character */ public StrTokenizer(String input, char delim, char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * * @param input the string which is to be parsed * @param delim the field delimiter matcher * @param quote the field quoted string matcher */ public StrTokenizer(String input, StrMatcher delim, StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } /** * Constructs a tokenizer splitting on space, tab, newline and formfeed * as per StringTokenizer. * * @param input the string which is to be parsed, not cloned */ public StrTokenizer(char[] input) { super(); this.chars = ArrayUtils.clone(input); } /** * Constructs a tokenizer splitting on the specified character. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter character */ public StrTokenizer(char[] input, char delim) { this(input); setDelimiterChar(delim); } /** * Constructs a tokenizer splitting on the specified string. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter string */ public StrTokenizer(char[] input, String delim) { this(input); setDelimiterString(delim); } /** * Constructs a tokenizer splitting using the specified delimiter matcher. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter matcher */ public StrTokenizer(char[] input, StrMatcher delim) { this(input); setDelimiterMatcher(delim); } /** * Constructs a tokenizer splitting on the specified delimiter character * and handling quotes using the specified quote character. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter character * @param quote the field quoted string character */ public StrTokenizer(char[] input, char delim, char quote) { this(input, delim); setQuoteChar(quote); } /** * Constructs a tokenizer splitting using the specified delimiter matcher * and handling quotes using the specified quote matcher. * * @param input the string which is to be parsed, not cloned * @param delim the field delimiter character * @param quote the field quoted string character */ public StrTokenizer(char[] input, StrMatcher delim, StrMatcher quote) { this(input, delim); setQuoteMatcher(quote); } // API //----------------------------------------------------------------------- /** * Gets the number of tokens found in the String. * * @return the number of matched tokens */ public int size() { checkTokenized(); return tokens.length; } /** * Gets the next token from the String. * Equivalent to {@link #next()} except it returns null rather than * throwing {@link NoSuchElementException} when no tokens remain. * * @return the next sequential token, or null when no more tokens are found */ public String nextToken() { if (hasNext()) { return tokens[tokenPos++]; } return null; } /** * Gets the previous token from the String. * * @return the previous sequential token, or null when no more tokens are found */ public String previousToken() { if (hasPrevious()) { return tokens[--tokenPos]; } return null; } /** * Gets a copy of the full token list as an independent modifiable array. * * @return the tokens as a String array */ public String[] getTokenArray() { checkTokenized(); return tokens.clone(); } /** * Gets a copy of the full token list as an independent modifiable list. * * @return the tokens as a String array */ public List<String> getTokenList() { checkTokenized(); List<String> list = new ArrayList<String>(tokens.length); for (String element : tokens) { list.add(element); } return list; } /** * Resets this tokenizer, forgetting all parsing and iteration already completed. * <p> * This method allows the same tokenizer to be reused for the same String. * * @return this, to enable chaining */ public StrTokenizer reset() { tokenPos = 0; tokens = null; return this; } /** * Reset this tokenizer, giving it a new input string to parse. * In this manner you can re-use a tokenizer with the same settings * on multiple input lines. * * @param input the new string to tokenize, null sets no text to parse * @return this, to enable chaining */ public StrTokenizer reset(String input) { reset(); if (input != null) { this.chars = input.toCharArray(); } else { this.chars = null; } return this; } /** * Reset this tokenizer, giving it a new input string to parse. * In this manner you can re-use a tokenizer with the same settings * on multiple input lines. * * @param input the new character array to tokenize, not cloned, null sets no text to parse * @return this, to enable chaining */ public StrTokenizer reset(char[] input) { reset(); this.chars = ArrayUtils.clone(input); return this; } // ListIterator //----------------------------------------------------------------------- /** * Checks whether there are any more tokens. * * @return true if there are more tokens */ public boolean hasNext() { checkTokenized(); return tokenPos < tokens.length; } /** * Gets the next token. * * @return the next String token * @throws NoSuchElementException if there are no more elements */ public String next() { if (hasNext()) { return tokens[tokenPos++]; } throw new NoSuchElementException(); } /** * Gets the index of the next token to return. * * @return the next token index */ public int nextIndex() { return tokenPos; } /** * Checks whether there are any previous tokens that can be iterated to. * * @return true if there are previous tokens */ public boolean hasPrevious() { checkTokenized(); return tokenPos > 0; } /** * Gets the token previous to the last returned token. * * @return the previous token */ public String previous() { if (hasPrevious()) { return tokens[--tokenPos]; } throw new NoSuchElementException(); } /** * Gets the index of the previous token. * * @return the previous token index */ public int previousIndex() { return tokenPos - 1; } /** * Unsupported ListIterator operation. * * @throws UnsupportedOperationException always */ public void remove() { throw new UnsupportedOperationException("remove() is unsupported"); } /** * Unsupported ListIterator operation. * @param obj this parameter ignored. * @throws UnsupportedOperationException always */ public void set(String obj) { throw new UnsupportedOperationException("set() is unsupported"); } /** * Unsupported ListIterator operation. * @param obj this parameter ignored. * @throws UnsupportedOperationException always */ public void add(String obj) { throw new UnsupportedOperationException("add() is unsupported"); } // Implementation //----------------------------------------------------------------------- /** * Checks if tokenization has been done, and if not then do it. */ private void checkTokenized() { if (tokens == null) { if (chars == null) { // still call tokenize as subclass may do some work List<String> split = tokenize(null, 0, 0); tokens = split.toArray(new String[split.size()]); } else { List<String> split = tokenize(chars, 0, chars.length); tokens = split.toArray(new String[split.size()]); } } } /** * Internal method to performs the tokenization. * <p> * Most users of this class do not need to call this method. This method * will be called automatically by other (public) methods when required. * <p> * This method exists to allow subclasses to add code before or after the * tokenization. For example, a subclass could alter the character array, * offset or count to be parsed, or call the tokenizer multiple times on * multiple strings. It is also be possible to filter the results. * <p> * <code>StrTokenizer</code> will always pass a zero offset and a count * equal to the length of the array to this method, however a subclass * may pass other values, or even an entirely different array. * * @param chars the character array being tokenized, may be null * @param offset the start position within the character array, must be valid * @param count the number of characters to tokenize, must be valid * @return the modifiable list of String tokens, unmodifiable if null array or zero count */ protected List<String> tokenize(char[] chars, int offset, int count) { if (chars == null || count == 0) { return Collections.emptyList(); } StrBuilder buf = new StrBuilder(); List<String> tokens = new ArrayList<String>(); int pos = offset; // loop around the entire buffer while (pos >= 0 && pos < count) { // find next token pos = readNextToken(chars, pos, count, buf, tokens); // handle case where end of string is a delimiter if (pos >= count) { addToken(tokens, ""); } } return tokens; } /** * Adds a token to a list, paying attention to the parameters we've set. * * @param list the list to add to * @param tok the token to add */ private void addToken(List<String> list, String tok) { if (tok == null || tok.length() == 0) { if (isIgnoreEmptyTokens()) { return; } if (isEmptyTokenAsNull()) { tok = null; } } list.add(tok); } /** * Reads character by character through the String to get the next token. * * @param chars the character array being tokenized * @param start the first character of field * @param len the length of the character array being tokenized * @param workArea a temporary work area * @param tokens the list of parsed tokens * @return the starting position of the next field (the character * immediately after the delimiter), or -1 if end of string found */ private int readNextToken(char[] chars, int start, int len, StrBuilder workArea, List<String> tokens) { // skip all leading whitespace, unless it is the // field delimiter or the quote character while (start < len) { int removeLen = Math.max( getIgnoredMatcher().isMatch(chars, start, start, len), getTrimmerMatcher().isMatch(chars, start, start, len)); if (removeLen == 0 || getDelimiterMatcher().isMatch(chars, start, start, len) > 0 || getQuoteMatcher().isMatch(chars, start, start, len) > 0) { break; } start += removeLen; } // handle reaching end if (start >= len) { addToken(tokens, ""); return -1; } // handle empty token int delimLen = getDelimiterMatcher().isMatch(chars, start, start, len); if (delimLen > 0) { addToken(tokens, ""); return start + delimLen; } // handle found token int quoteLen = getQuoteMatcher().isMatch(chars, start, start, len); if (quoteLen > 0) { return readWithQuotes(chars, start + quoteLen, len, workArea, tokens, start, quoteLen); } return readWithQuotes(chars, start, len, workArea, tokens, 0, 0); } /** * Reads a possibly quoted string token. * * @param chars the character array being tokenized * @param start the first character of field * @param len the length of the character array being tokenized * @param workArea a temporary work area * @param tokens the list of parsed tokens * @param quoteStart the start position of the matched quote, 0 if no quoting * @param quoteLen the length of the matched quote, 0 if no quoting * @return the starting position of the next field (the character * immediately after the delimiter, or if end of string found, * then the length of string */ private int readWithQuotes(char[] chars, int start, int len, StrBuilder workArea, List<String> tokens, int quoteStart, int quoteLen) { // Loop until we've found the end of the quoted // string or the end of the input workArea.clear(); int pos = start; boolean quoting = quoteLen > 0; int trimStart = 0; while (pos < len) { // quoting mode can occur several times throughout a string // we must switch between quoting and non-quoting until we // encounter a non-quoted delimiter, or end of string if (quoting) { // In quoting mode // If we've found a quote character, see if it's // followed by a second quote. If so, then we need // to actually put the quote character into the token // rather than end the token. if (isQuote(chars, pos, len, quoteStart, quoteLen)) { if (isQuote(chars, pos + quoteLen, len, quoteStart, quoteLen)) { // matched pair of quotes, thus an escaped quote workArea.append(chars, pos, quoteLen); pos += quoteLen * 2; trimStart = workArea.size(); continue; } // end of quoting quoting = false; pos += quoteLen; continue; } // copy regular character from inside quotes workArea.append(chars[pos++]); trimStart = workArea.size(); } else { // Not in quoting mode // check for delimiter, and thus end of token int delimLen = getDelimiterMatcher().isMatch(chars, pos, start, len); if (delimLen > 0) { // return condition when end of token found addToken(tokens, workArea.substring(0, trimStart)); return pos + delimLen; } // check for quote, and thus back into quoting mode if (quoteLen > 0 && isQuote(chars, pos, len, quoteStart, quoteLen)) { quoting = true; pos += quoteLen; continue; } // check for ignored (outside quotes), and ignore int ignoredLen = getIgnoredMatcher().isMatch(chars, pos, start, len); if (ignoredLen > 0) { pos += ignoredLen; continue; } // check for trimmed character // don't yet know if its at the end, so copy to workArea // use trimStart to keep track of trim at the end int trimmedLen = getTrimmerMatcher().isMatch(chars, pos, start, len); if (trimmedLen > 0) { workArea.append(chars, pos, trimmedLen); pos += trimmedLen; continue; } // copy regular character from outside quotes workArea.append(chars[pos++]); trimStart = workArea.size(); } } // return condition when end of string found addToken(tokens, workArea.substring(0, trimStart)); return -1; } /** * Checks if the characters at the index specified match the quote * already matched in readNextToken(). * * @param chars the character array being tokenized * @param pos the position to check for a quote * @param len the length of the character array being tokenized * @param quoteStart the start position of the matched quote, 0 if no quoting * @param quoteLen the length of the matched quote, 0 if no quoting * @return true if a quote is matched */ private boolean isQuote(char[] chars, int pos, int len, int quoteStart, int quoteLen) { for (int i = 0; i < quoteLen; i++) { if (pos + i >= len || chars[pos + i] != chars[quoteStart + i]) { return false; } } return true; } // Delimiter //----------------------------------------------------------------------- /** * Gets the field delimiter matcher. * * @return the delimiter matcher in use */ public StrMatcher getDelimiterMatcher() { return this.delimMatcher; } /** * Sets the field delimiter matcher. * <p> * The delimitier is used to separate one token from another. * * @param delim the delimiter matcher to use * @return this, to enable chaining */ public StrTokenizer setDelimiterMatcher(StrMatcher delim) { if (delim == null) { this.delimMatcher = StrMatcher.noneMatcher(); } else { this.delimMatcher = delim; } return this; } /** * Sets the field delimiter character. * * @param delim the delimiter character to use * @return this, to enable chaining */ public StrTokenizer setDelimiterChar(char delim) { return setDelimiterMatcher(StrMatcher.charMatcher(delim)); } /** * Sets the field delimiter string. * * @param delim the delimiter string to use * @return this, to enable chaining */ public StrTokenizer setDelimiterString(String delim) { return setDelimiterMatcher(StrMatcher.stringMatcher(delim)); } // Quote //----------------------------------------------------------------------- /** * Gets the quote matcher currently in use. * <p> * The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * The default value is '"' (double quote). * * @return the quote matcher in use */ public StrMatcher getQuoteMatcher() { return quoteMatcher; } /** * Set the quote matcher to use. * <p> * The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * * @param quote the quote matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setQuoteMatcher(StrMatcher quote) { if (quote != null) { this.quoteMatcher = quote; } return this; } /** * Sets the quote character to use. * <p> * The quote character is used to wrap data between the tokens. * This enables delimiters to be entered as data. * * @param quote the quote character to use * @return this, to enable chaining */ public StrTokenizer setQuoteChar(char quote) { return setQuoteMatcher(StrMatcher.charMatcher(quote)); } // Ignored //----------------------------------------------------------------------- /** * Gets the ignored character matcher. * <p> * These characters are ignored when parsing the String, unless they are * within a quoted region. * The default value is not to ignore anything. * * @return the ignored matcher in use */ public StrMatcher getIgnoredMatcher() { return ignoredMatcher; } /** * Set the matcher for characters to ignore. * <p> * These characters are ignored when parsing the String, unless they are * within a quoted region. * * @param ignored the ignored matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setIgnoredMatcher(StrMatcher ignored) { if (ignored != null) { this.ignoredMatcher = ignored; } return this; } /** * Set the character to ignore. * <p> * This character is ignored when parsing the String, unless it is * within a quoted region. * * @param ignored the ignored character to use * @return this, to enable chaining */ public StrTokenizer setIgnoredChar(char ignored) { return setIgnoredMatcher(StrMatcher.charMatcher(ignored)); } // Trimmer //----------------------------------------------------------------------- /** * Gets the trimmer character matcher. * <p> * These characters are trimmed off on each side of the delimiter * until the token or quote is found. * The default value is not to trim anything. * * @return the trimmer matcher in use */ public StrMatcher getTrimmerMatcher() { return trimmerMatcher; } /** * Sets the matcher for characters to trim. * <p> * These characters are trimmed off on each side of the delimiter * until the token or quote is found. * * @param trimmer the trimmer matcher to use, null ignored * @return this, to enable chaining */ public StrTokenizer setTrimmerMatcher(StrMatcher trimmer) { if (trimmer != null) { this.trimmerMatcher = trimmer; } return this; } //----------------------------------------------------------------------- /** * Gets whether the tokenizer currently returns empty tokens as null. * The default for this property is false. * * @return true if empty tokens are returned as null */ public boolean isEmptyTokenAsNull() { return this.emptyAsNull; } /** * Sets whether the tokenizer should return empty tokens as null. * The default for this property is false. * * @param emptyAsNull whether empty tokens are returned as null * @return this, to enable chaining */ public StrTokenizer setEmptyTokenAsNull(boolean emptyAsNull) { this.emptyAsNull = emptyAsNull; return this; } //----------------------------------------------------------------------- /** * Gets whether the tokenizer currently ignores empty tokens. * The default for this property is true. * * @return true if empty tokens are not returned */ public boolean isIgnoreEmptyTokens() { return ignoreEmptyTokens; } /** * Sets whether the tokenizer should ignore and not return empty tokens. * The default for this property is true. * * @param ignoreEmptyTokens whether empty tokens are not returned * @return this, to enable chaining */ public StrTokenizer setIgnoreEmptyTokens(boolean ignoreEmptyTokens) { this.ignoreEmptyTokens = ignoreEmptyTokens; return this; } //----------------------------------------------------------------------- /** * Gets the String content that the tokenizer is parsing. * * @return the string content being parsed */ public String getContent() { if (chars == null) { return null; } return new String(chars); } //----------------------------------------------------------------------- /** * Creates a new instance of this Tokenizer. The new instance is reset so * that it will be at the start of the token list. * If a {@link CloneNotSupportedException} is caught, return <code>null</code>. * * @return a new instance of this Tokenizer which has been reset. */ @Override public Object clone() { try { return cloneReset(); } catch (CloneNotSupportedException ex) { return null; } } /** * Creates a new instance of this Tokenizer. The new instance is reset so that * it will be at the start of the token list. * * @return a new instance of this Tokenizer which has been reset. * @throws CloneNotSupportedException if there is a problem cloning */ Object cloneReset() throws CloneNotSupportedException { // this method exists to enable 100% test coverage StrTokenizer cloned = (StrTokenizer) super.clone(); if (cloned.chars != null) { cloned.chars = cloned.chars.clone(); } cloned.reset(); return cloned; } //----------------------------------------------------------------------- /** * Gets the String content that the tokenizer is parsing. * * @return the string content being parsed */ @Override public String toString() { if (tokens == null) { return "StrTokenizer[not tokenized yet]"; } return "StrTokenizer" + getTokenList(); } }