Tokenizer.java example

Explorer
cocoon-master
- cocoon-BRANCH_2_1_X
  - src
  - tools
    - src
      - anttasks
        DocumentCache.java
        ManifestToolTask.java
        PoolSetterTask.java
        SitemapTask.java
        XConfToolTask.java
      - loader
        Loader.java
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cocoon.util;

import java.util.Enumeration;
import java.util.NoSuchElementException;

/**
 * Replacement for StringTokenizer in java.util, because of bug in the
 * Sun's implementation.
 *
 * @deprecated Use commons lang. This class will be removed in 2.2.
 * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek</A>
 * @version CVS $Id$
 */
public class Tokenizer implements Enumeration {

  /**
   * Constructs a string tokenizer for the specified string. All characters
   * in the delim argument are the delimiters for separating tokens.
   * If the returnTokens flag is true, then the delimiter characters are
   * also returned as tokens. Each delimiter is returned as a string of
   * length one. If the flag is false, the delimiter characters are skipped
   * and only serve as separators between tokens.
   *
   * @param str           a string to be parsed
   * @param delim         the delimiters
   * @param returnTokens  flag indicating whether to return the delimiters
   *                      as tokens
   */
  public Tokenizer(String str, String delim, boolean returnTokens) {
    this.str = str;
    this.delim = delim;
    this.returnTokens = returnTokens;

    max = str.length();
  }

  /**
   * Constructs a string tokenizer for the specified string. The characters
   * in the delim argument are the delimiters for separating tokens.
   * Delimiter characters themselves will not be treated as tokens.
   *
   * @param str          a string to be parsed
   * @param delim        the delimiters
   */
  public Tokenizer(String str, String delim) {
    this(str, delim, false);
  }

  /**
   * Constructs a string tokenizer for the specified string. The character
   * in the delim argument is the delimiter for separating tokens.
   * Delimiter character themselves will not be treated as token.
   *
   * @param str          a string to be parsed
   * @param delim        the delimiter
   */
  public Tokenizer(String str, char delim) {
    this(str, String.valueOf(delim), false);
  }

  /**
   * Constructs a string tokenizer for the specified string. The tokenizer
   * uses the default delimiter set, which is " \t\n\r\f": the space
   * character, the tab character, the newline character, the carriage-return
   * character, and the form-feed character. Delimiter characters themselves
   * will not be treated as tokens.
   *
   * @param str          a string to be parsed
   */
  public Tokenizer(String str) {
    this(str, DEFAULT_DELIMITERS, false);
  }

  /**
   * Tests if there are more tokens available from this tokenizer's string.
   * If this method returns true, then a subsequent call to nextToken with
   * no argument will successfully return a token.
   *
   * @return true if and only if there is at least one token in the string
   * after the current position; false otherwise.
   */
  public boolean hasMoreTokens() {
    return ((current < max) ? (true) :
      (((current == max) && (max == 0
        || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0)))));
  }

  /**
   * Returns the next token from this string tokenizer.
   *
   * @return the next token from this string tokenizer
   *
   * @exception NoSuchElementException  if there are no more tokens in this
   *                                    tokenizer's string
   */
  public String nextToken() throws NoSuchElementException {
    if (current == max
      && (max == 0
      || (returnTokens && delim.indexOf(str.charAt(previous)) >= 0))) {

      current++;
      return "";
    }

    if (current >= max)
      throw new NoSuchElementException();

    int start = current;
    String result = null;

    if (delim.indexOf(str.charAt(start)) >= 0) {
      if (previous == -1 || (returnTokens && previous != current
        && delim.indexOf(str.charAt(previous)) >= 0)) {

        result = "";
      }
      else if (returnTokens)
        result = str.substring(start, ++current);

      if (!returnTokens)
        current++;
    }

    previous = start;
    start = current;

    if (result == null)
      while (current < max && delim.indexOf(str.charAt(current)) < 0)
        current++;

    return result == null ? str.substring(start, current) : result;
  }

  /**
   * Returns the next token in this string tokenizer's string. First, the
   * set of characters considered to be delimiters by this Tokenizer
   * object is changed to be the characters in the string delim.
   * Then the next token in the string after the current position is
   * returned. The current position is advanced beyond the recognized token.
   * The new delimiter set remains the default after this call.
   *
   * @param delim the new delimiters
   *
   * @return the next token, after switching to the new delimiter set
   *
   * @exception NoSuchElementException  if there are no more tokens in this
   *                                    tokenizer's string.
   */
  public String nextToken(String delim) throws NoSuchElementException {
    this.delim = delim;
    return nextToken();
  }

  /**
   * Returns the same value as the hasMoreTokens method. It exists so that
   * this class can implement the Enumeration interface.
   *
   * @return true if there are more tokens; false otherwise.
   */
  public boolean hasMoreElements() {
    return hasMoreTokens();
  }

  /**
   * Returns the same value as the nextToken method, except that its
   * declared return value is Object rather than String. It exists so that
   * this class can implement the Enumeration interface.
   *
   * @return the next token in the string
   *
   * @exception NoSuchElementException  if there are no more tokens in this
   *                                    tokenizer's string
   */
  public Object nextElement() {
    return nextToken();
  }

  /**
   * Calculates the number of times that this tokenizer's nextToken method
   * can be called before it generates an exception. The current position
   * is not advanced.
   *
   * @return  the number of tokens remaining in the string using the
   *          current delimiter set
   */
  public int countTokens() {
    int curr = current;
    int count = 0;

    for (int i = curr; i < max; i++) {
      if (delim.indexOf(str.charAt(i)) >= 0)
        count++;

      curr++;
    }

    return count + (returnTokens ? count : 0) + 1;
  }

  /**
   * Resets this tokenizer's state so the tokenizing starts from the begin.
   */
  public void reset() {
    previous = -1;
    current = 0;
  }

  /**
   * Constructs a string tokenizer for the specified string. All characters
   * in the delim argument are the delimiters for separating tokens.
   * If the returnTokens flag is true, then the delimiter characters are
   * also returned as tokens. Each delimiter is returned as a string of
   * length one. If the flag is false, the delimiter characters are skipped
   * and only serve as separators between tokens. Then tokenizes the str
   * and return an String[] array with tokens.
   *
   * @param str           a string to be parsed
   * @param delim         the delimiters
   * @param returnTokens  flag indicating whether to return the delimiters
   *                      as tokens
   *
   * @return array with tokens
   */
  public static String[] tokenize(String str, String delim,
    boolean returnTokens) {

    Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens);
    String[] tokens = new String[tokenizer.countTokens()];

    int i = 0;
    while (tokenizer.hasMoreTokens()) {
      tokens[i] = tokenizer.nextToken();
      i++;
    }

    return tokens;
  }

  /**
   * Default delimiters " \t\n\r\f":
   * the space character, the tab character, the newline character,
   * the carriage-return character, and the form-feed character.
   */
  public static final String DEFAULT_DELIMITERS = " \t\n\r\f";

  /**
   * String to tokenize.
   */
  private String str = null;

  /**
   * Delimiters.
   */
  private String delim = null;

  /**
   * Flag indicating whether to return the delimiters as tokens.
   */
  private boolean returnTokens = false;

  /**
   * Previous token start.
   */
  private int previous = -1;

  /**
   * Current position in str string.
   */
  private int current = 0;

  /**
   * Maximal position in str string.
   */
  private int max = 0;
}