/* * Copyright 2004-2015 the Seasar Foundation and the Others. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ package org.seasar.framework.util; /** * トークンを認識するクラスです。 * * @author higa * */ public class Tokenizer { /** * EOFをあらわします。 */ public static final int TT_EOF = -1; /** * Quoteをあらわします。 */ public static final int TT_QUOTE = '\''; /** * 単語をあらわします。 */ public static final int TT_WORD = -3; private static final int TT_NOTHING = -4; private static final int NEED_CHAR = Integer.MAX_VALUE; private static final int QUOTE = '\''; private static final byte CT_WHITESPACE = 1; private static final byte CT_ALPHA = 4; private byte[] ctype; private static byte[] defaultCtype = new byte[256]; private String str; private int colno = 0; private int ttype = TT_NOTHING; private String sval; private char[] buf = new char[20]; private int peekc = NEED_CHAR; private byte peekct = 0; static { setup(defaultCtype); } /** * {@link Tokenizer}を作成します。 * * @param str * 文字列 */ public Tokenizer(String str) { this(str, defaultCtype); } /** * {@link Tokenizer}を作成します。 * * @param str * 文字列 * @param ctype * 文字のタイプの配列 */ public Tokenizer(String str, byte[] ctype) { this.str = str; this.ctype = ctype; } /** * @param ctype2 * 文字のタイプの配列 */ protected static void setup(byte[] ctype2) { wordChars(ctype2, 'a', 'z'); wordChars(ctype2, 'A', 'Z'); wordChars(ctype2, '0', '9'); wordChar(ctype2, '@'); wordChar(ctype2, '|'); wordChar(ctype2, '_'); wordChar(ctype2, '?'); wordChar(ctype2, '>'); wordChar(ctype2, '='); wordChar(ctype2, '!'); wordChar(ctype2, '<'); wordChar(ctype2, '"'); wordChar(ctype2, '~'); wordChar(ctype2, '*'); wordChar(ctype2, '.'); // ordinaryChar(ctype2, '='); // ordinaryChar(ctype2, ','); whitespaceChars(ctype2, 0, ' '); } /** * 単語用の文字として設定します。 * * @param ctype2 * 文字のタイプの配列 * @param low * @param hi */ protected static void wordChars(byte[] ctype2, int low, int hi) { if (low < 0) { low = 0; } if (hi >= ctype2.length) { hi = ctype2.length - 1; } while (low <= hi) { ctype2[low++] |= CT_ALPHA; } } /** * 単語用の文字として設定します。 * * @param ctype2 * 文字のタイプの配列 * @param val * 文字コード */ protected static void wordChar(byte[] ctype2, int val) { ctype2[val] |= CT_ALPHA; } /** * 空白用の文字として設定します。 * * @param ctype2 * 文字のタイプの配列 * @param low * 最小の文字コード * @param hi * 最大の文字コード */ protected static void whitespaceChars(byte[] ctype2, int low, int hi) { if (low < 0) { low = 0; } if (hi >= ctype2.length) { hi = ctype2.length - 1; } while (low <= hi) { ctype2[low++] = CT_WHITESPACE; } } /** * 単独で存在する文字として設定します。 * * @param ctype2 * 文字のタイプの配列 * @param ch * 文字コード */ protected static void ordinaryChar(byte[] ctype2, int ch) { if (ch >= 0 && ch < ctype2.length) { ctype2[ch] = 0; } } /** * 文字列の値を返します。 * * @return 文字列の値 */ public final String getStringValue() { return sval; } /** * 次のトークンに進めます。 * * @return トークンのタイプ */ public int nextToken() { initVal(); if (processEOF()) { return ttype; } if (processWhitespace()) { return ttype; } if (processWord()) { return ttype; } if (processQuote()) { return ttype; } if (processOrdinary()) { return ttype; } return ttype = peekc; } /** * 既に読み込んだ文字列を返します。 * * @return 既に読み込んだ文字列 */ public final String getReadString() { return str.substring(0, colno - 1); } private int read() { if (colno >= str.length()) { return -1; } return str.charAt(colno++); } private void initVal() { sval = null; } private boolean processEOF() { if (peekc < 0) { ttype = TT_EOF; return true; } if (peekc == NEED_CHAR) { peekc = read(); if (peekc < 0) { ttype = TT_EOF; return true; } } return false; } private boolean processWhitespace() { peekct = peekc < 256 ? ctype[peekc] : CT_ALPHA; while ((peekct & CT_WHITESPACE) != 0) { if (peekc == '\r') { peekc = read(); if (peekc == '\n') { peekc = read(); } } else { peekc = read(); } if (peekc < 0) { ttype = TT_EOF; return true; } peekct = peekc < 256 ? ctype[peekc] : CT_ALPHA; } return false; } private boolean processWord() { if ((peekct & CT_ALPHA) != 0) { int i = 0; do { if (i >= buf.length) { char nb[] = new char[buf.length * 2]; System.arraycopy(buf, 0, nb, 0, buf.length); buf = nb; } buf[i++] = (char) peekc; peekc = read(); peekct = peekc < 0 ? CT_WHITESPACE : (peekc < 256 ? ctype[peekc] : CT_ALPHA); } while ((peekct & (CT_ALPHA)) != 0); sval = String.copyValueOf(buf, 0, i); ttype = TT_WORD; return true; } return false; } private boolean processQuote() { if (peekc == QUOTE) { ttype = QUOTE; int i = 0; int d = read(); int c = d; while (d >= 0) { if (d == QUOTE) { int d2 = read(); if (d2 == QUOTE) { c = QUOTE; } else { d = d2; break; } } else { c = d; } if (i >= buf.length) { char nb[] = new char[buf.length * 2]; System.arraycopy(buf, 0, nb, 0, buf.length); buf = nb; } buf[i++] = (char) c; d = read(); } peekc = d; sval = String.copyValueOf(buf, 0, i); return true; } return false; } private boolean processOrdinary() { if (peekct == 0) { ttype = peekc; peekc = read(); peekct = peekc < 0 ? CT_WHITESPACE : (peekc < 256 ? ctype[peekc] : CT_ALPHA); return true; } return false; } }