/**********************************************************************
Copyright (c) 2009 Asfun Net.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
**********************************************************************/
package net.asfun.jangod.parse;
import static net.asfun.jangod.parse.ParserConstants.*;
public class Tokenizer {
private char[] is;
private int currPost = 0;
private int tokenStart = 0;
private int tokenLength = 0;
private int tokenKind = -1;
private int length = 0;
private int lastStart = 0;
private int inComment = 0;
public void init(String inputstream) {
is = inputstream.toCharArray();
length = inputstream.length();
currPost = 0;
tokenStart = 0;
tokenKind = -1;
lastStart = 0;
inComment = 0;
}
public Token getNextToken() throws ParseException {
char c = 0;
while (currPost < length) {
c = is[currPost++];
if (currPost == length) {
return getEndToken();
}
switch (c) {
// mayby a new token is starting
case TOKEN_PREFIX:
if (currPost < length) {
c = is[currPost];
switch (c) {
case TOKEN_NOTE:
if (inComment++ > 0) {
continue;
}
tokenLength = currPost - tokenStart - 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = --currPost;
tokenKind = c;
inComment--;
return newToken(TOKEN_FIXED);
} else {
tokenKind = c;
}
break;
case TOKEN_MACRO:
case TOKEN_TAG:
case TOKEN_ECHO:
if (inComment > 0) {
continue;
}
// match token two ends
if (!matchToken(c) && tokenKind > 0) {
continue;
}
tokenLength = currPost - tokenStart - 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = --currPost;
tokenKind = c;
return newToken(TOKEN_FIXED);
} else {
tokenKind = c;
}
break;
default:
// nothing continue;
}
}
// reach the stream end
else {
return getEndToken();
}
break;
// mayby current token is closing
case TOKEN_MACRO:
case TOKEN_TAG:
case TOKEN_ECHO2:
// match token two ends
if (inComment > 0) {
continue;
}
if (!matchToken(c)) {
continue;
}
if (currPost < length) {
c = is[currPost];
if (c == TOKEN_POSTFIX) {
tokenLength = currPost - tokenStart + 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = ++currPost;
int kind = tokenKind;
tokenKind = TOKEN_FIXED;
return newToken(kind);
}
}
} else {
return getEndToken();
}
break;
case TOKEN_NOTE:
if (!matchToken(c)) {
continue;
}
if (currPost < length) {
c = is[currPost];
if (c == TOKEN_POSTFIX) {
if (--inComment > 0) {
continue;
}
tokenLength = currPost - tokenStart + 1;
if (tokenLength > 0) {
// start a new token
lastStart = tokenStart;
tokenStart = ++currPost;
tokenKind = TOKEN_FIXED;
return newToken(TOKEN_NOTE);
}
}
} else {
return getEndToken();
}
break;
default:
if (tokenKind == -1) {
tokenKind = TOKEN_FIXED;
}
}
}
return null;
}
private Token getEndToken() throws ParseException {
tokenLength = currPost - tokenStart;
int type = TOKEN_FIXED;
if (inComment > 0) {
type = TOKEN_NOTE;
}
return Token
.newToken(type, String.valueOf(is, tokenStart, tokenLength));
}
private Token newToken(int kind) throws ParseException {
Token token = Token.newToken(kind,
String.copyValueOf(is, lastStart, tokenLength));
return token;
}
private boolean matchToken(char kind) {
if (kind == TOKEN_ECHO) {
return tokenKind == TOKEN_ECHO2;
} else if (kind == TOKEN_ECHO2) {
return tokenKind == TOKEN_ECHO;
} else {
return kind == tokenKind;
}
}
}