Tokenizer.java example

Explorer

cogitolearning-examples-master
- CogPar
  - src
    - uk
      - co
        cogitolearning
        cogpar
        AdditionExpressionNode.java
        ConstantExpressionNode.java
        EvaluationException.java
        ExponentiationExpressionNode.java
        ExpressionNode.java
        ExpressionNodeVisitor.java
        FunctionExpressionNode.java
        MultiplicationExpressionNode.java
        Parser.java
        ParserException.java
        SequenceExpressionNode.java
        SetVariable.java
        Test.java
        Token.java
        Tokenizer.java
        VariableExpressionNode.java
- PropertyAnimations
  - src
    - uk
      - co
        cogitolearning
        propertyanimations_example
        FpsTimeListener.java
        HsvEvaluator.java
        MainActivity.java
        MatrixEvaluator.java
        PropertyAnimation01.java
        PropertyAnimation02.java
        PropertyAnimation03.java
        PropertyAnimation04.java
        PropertyAnimation05.java
        PropertyAnimation06.java
        PropertyAnimation07.java
        PropertyAnimation08.java
        PropertyAnimation09.java
- SceneExample
  - src
    - uk
      - co
        cogitolearning
        sceneexample
        MainActivity.java

/*
 * This software and all files contained in it are distrubted under the MIT license.
 * 
 * Copyright (c) 2013 Cogito Learning Ltd
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package uk.co.cogitolearning.cogpar;

import java.util.LinkedList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A class for reading an input string and separating it into tokens that can be
 * fed into Parser.
 * 
 * The user can add regular expressions that will be matched against the front
 * of the string. Regular expressions should not contain beginning-of-string or
 * end-of-string anchors or any capturing groups as these will be added by the
 * tokenizer itslef.
 */
public class Tokenizer
{
  /**
   * Internal class holding the information about a token type.
   */
  private class TokenInfo
  {
    /** the regular expression to match against */
    public final Pattern regex;
    /** the token id that the regular expression is linked to */
    public final int token;

    /**
     * Construct TokenInfo with its values
     */
    public TokenInfo(Pattern regex, int token)
    {
      super();
      this.regex = regex;
      this.token = token;
    }
  }

  /** 
   * a list of TokenInfo objects
   * 
   * Each token type corresponds to one entry in the list
   */
  private LinkedList<TokenInfo> tokenInfos;
  
  /** the list of tokens produced when tokenizing the input */
  private LinkedList<Token> tokens;

  /** a tokenizer that can handle mathematical expressions */
  private static Tokenizer expressionTokenizer = null;

  /**
   * Default constructor
   */
  public Tokenizer()
  {
    super();
    tokenInfos = new LinkedList<TokenInfo>();
    tokens = new LinkedList<Token>();
  }

  /**
   * A static method that returns a tokenizer for mathematical expressions
   * @return a tokenizer that can handle mathematical expressions
   */
  public static Tokenizer getExpressionTokenizer()
  {
    if (expressionTokenizer == null)
      expressionTokenizer = createExpressionTokenizer();
    return expressionTokenizer;
  }

  /**
   * A static method that actually creates a tokenizer for mathematical expressions
   * @return a tokenizer that can handle mathematical expressions
   */
  private static Tokenizer createExpressionTokenizer()
  {
    Tokenizer tokenizer = new Tokenizer();

    tokenizer.add("[+-]", Token.PLUSMINUS);
    tokenizer.add("[*/]", Token.MULTDIV);
    tokenizer.add("\\^", Token.RAISED);

    String funcs = FunctionExpressionNode.getAllFunctions();
    tokenizer.add("(" + funcs + ")(?!\\w)", Token.FUNCTION);

    tokenizer.add("\\(", Token.OPEN_BRACKET);
    tokenizer.add("\\)", Token.CLOSE_BRACKET);
    tokenizer.add("(?:\\d+\\.?|\\.\\d)\\d*(?:[Ee][-+]?\\d+)?", Token.NUMBER);
    tokenizer.add("[a-zA-Z]\\w*", Token.VARIABLE);

    return tokenizer;
  }

  /**
   * Add a regular expression and a token id to the internal list of recognized tokens
   * @param regex the regular expression to match against 
   * @param token the token id that the regular expression is linked to
   */
  public void add(String regex, int token)
  {
    tokenInfos.add(new TokenInfo(Pattern.compile("^(" + regex+")"), token));
  }

  /**
   * Tokenize an input string.
   * 
   * The reult of tokenizing can be accessed via getTokens
   * 
   * @param str the string to tokenize
   */
  public void tokenize(String str)
  {
    String s = str.trim();
    int totalLength = s.length();
    tokens.clear();
    while (!s.equals(""))
    {
      int remaining = s.length();
      boolean match = false;
      for (TokenInfo info : tokenInfos)
      {
        Matcher m = info.regex.matcher(s);
        if (m.find())
        {
          match = true;
          String tok = m.group().trim();
          // System.out.println("Success matching " + s + " against " +
          // info.regex.pattern() + " : " + tok);
          s = m.replaceFirst("").trim();
          tokens.add(new Token(info.token, tok, totalLength - remaining));
          break;
        }
      }
      if (!match)
        throw new ParserException("Unexpected character in input: " + s);
    }
  }

  /**
   * Get the tokens generated in the last call to tokenize.
   * @return a list of tokens to be fed to Parser
   */
  public LinkedList<Token> getTokens()
  {
    return tokens;
  }

}