/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright 1997-2010 Oracle and/or its affiliates. All rights reserved.
*
* Oracle and Java are registered trademarks of Oracle and/or its affiliates.
* Other names may be trademarks of their respective owners.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common
* Development and Distribution License("CDDL") (collectively, the
* "License"). You may not use this file except in compliance with the
* License. You can obtain a copy of the License at
* http://www.netbeans.org/cddl-gplv2.html
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
* specific language governing permissions and limitations under the
* License. When distributing the software, include this License Header
* Notice in each file and include the License file at
* nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the GPL Version 2 section of the License file that
* accompanied this code. If applicable, add the following below the
* License Header, with the fields enclosed by brackets [] replaced by
* your own identifying information:
* "Portions Copyrighted [year] [name of copyright owner]"
*
* Contributor(s):
*
* The Original Software is NetBeans. The Initial Developer of the Original
* Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
* Microsystems, Inc. All Rights Reserved.
*
* If you wish your version of this file to be governed by only the CDDL
* or only the GPL Version 2, indicate your decision by adding
* "[Contributor] elects to include this software in this distribution
* under the [CDDL or GPL Version 2] license." If you do not indicate a
* single choice of license, a recipient has the option to distribute
* your version of this file under either the CDDL, the GPL Version 2 or
* to extend the choice of license to its licensees as provided above.
* However, if you add GPL Version 2 code and therefore, elected the GPL
* Version 2 license, then the option applies only if the new code is
* made subject to such option by the copyright holder.
*/
package org.netbeans.modules.ruby.rhtml.lexer;
import org.netbeans.modules.ruby.rhtml.lexer.api.RhtmlTokenId;
import org.netbeans.api.lexer.Token;
import org.netbeans.spi.lexer.Lexer;
import org.netbeans.spi.lexer.LexerInput;
import org.netbeans.spi.lexer.LexerRestartInfo;
import org.netbeans.spi.lexer.TokenFactory;
/**
* Syntax class for RHTML tags, recognizing RHTML delimiters.
*
* @author Marek Fukala
* @author Tor Norbye
*
* @todo <%% should be treated as HTML (<%) -- ditto for %%>
*
* @version 1.00
*/
public final class RhtmlLexer implements Lexer<RhtmlTokenId> {
private static final int EOF = LexerInput.EOF;
private LexerInput input;
private TokenFactory<RhtmlTokenId> tokenFactory;
public Object state() {
return state;
}
//main internal lexer state
private int state = INIT;
// Internal analyzer states
private static final int INIT = 0; // initial lexer state = content language
private static final int ISA_LT = 1; // after '<' char
private static final int ISA_LT_PC = 2; // after '<%' - comment or directive or scriptlet
private static final int ISI_SCRIPTLET = 3; // inside Ruby scriptlet
private static final int ISI_SCRIPTLET_PC = 4; // just after % in scriptlet
private static final int ISI_COMMENT_SCRIPTLET = 5; // Inside a Ruby comment scriptlet
private static final int ISI_COMMENT_SCRIPTLET_PC = 6; // just after % in a Ruby comment scriptlet
private static final int ISI_EXPR_SCRIPTLET = 7; // inside Ruby expression scriptlet
private static final int ISI_EXPR_SCRIPTLET_PC = 8; // just after % in an expression scriptlet
private static final int ISI_RUBY_LINE = 9; // just after % in an %-line
public RhtmlLexer(LexerRestartInfo<RhtmlTokenId> info) {
this.input = info.input();
this.tokenFactory = info.tokenFactory();
if (info.state() == null) {
this.state = INIT;
} else {
state = ((Integer) info.state()).intValue();
}
}
private Token<RhtmlTokenId> token(RhtmlTokenId id) {
if(input.readLength() == 0) {
new Exception("Error - token length is zero!; state = " + state).printStackTrace();
}
Token<RhtmlTokenId> t = tokenFactory.createToken(id);
return t;
}
public Token<RhtmlTokenId> nextToken() {
int actChar;
while (true) {
actChar = input.read();
if (actChar == EOF) {
if(input.readLengthEOF() == 1) {
return null; //just EOL is read
} else {
//there is something else in the buffer except EOL
//we will return last token now
input.backup(1); //backup the EOL, we will return null in next nextToken() call
break;
}
}
switch (state) {
case INIT:
switch (actChar) {
// case '\n':
// return token(RhtmlTokenId.EOL);
case '<':
state = ISA_LT;
break;
case '%': {
int peek = input.read();
if (peek == '%') {
// %% means just %
break;
}
if (peek != LexerInput.EOF) {
input.backup(1);
}
// See if we're in a line prefix
if (input.readLength() == 1) {
state = ISI_RUBY_LINE;
return token(RhtmlTokenId.DELIMITER);
}
CharSequence cs = input.readText();
// -2: skip the final %
for (int i = cs.length()-2; i >= 0; i--) {
char c = cs.charAt(i);
if (c == '\n') {
// We're in a new line: Finish this token as HTML.
input.backup(1);
// When we come back we'll just process the line as a delimiter
return token(RhtmlTokenId.HTML);
} else if (!Character.isWhitespace(c)) {
// The % is not the beginning of a line
break;
}
}
break;
}
}
break;
case ISA_LT:
switch (actChar) {
case '%':
state = ISA_LT_PC;
break;
default:
state = INIT; //just content
// state = ISI_TAG_ERROR;
// break;
}
break;
case ISA_LT_PC:
switch (actChar) {
case '=':
if(input.readLength() == 3) {
// just <%! or <%= read
state = ISI_EXPR_SCRIPTLET;
return token(RhtmlTokenId.DELIMITER);
} else {
// RHTML symbol, but we also have content language in the buffer
input.backup(3); //backup <%=
state = INIT;
return token(RhtmlTokenId.HTML); //return CL token
}
case '%': {
int peek = input.read();
if (peek != LexerInput.EOF) {
input.backup(1);
}
if (peek != '>') {
// Handle <%% == <%
if(input.readLength() == 3) {
// <%% is just an escape for <% in HTML...
state = INIT;
break;
} else {
// RHTML symbol, but we also have content language in the buffer
input.backup(3); //backup <%@
state = INIT;
return token(RhtmlTokenId.HTML); //return CL token
}
} else if (input.readLength() == 3) {
// We have <%%> - it's just a <% opener followed by a %> closer;
// digest the open delimiter now
input.backup(1);
state = ISI_SCRIPTLET;
return token(RhtmlTokenId.DELIMITER);
} else {
state = INIT;
input.backup(3);
return token(RhtmlTokenId.HTML);
}
}
case '#':
if(input.readLength() == 3) {
// just <%! or <%= read
state = ISI_COMMENT_SCRIPTLET;
return token(RhtmlTokenId.DELIMITER);
} else {
//jsp symbol, but we also have content language in the buffer
input.backup(3); //backup <%! or <%=
state = INIT;
return token(RhtmlTokenId.HTML); //return CL token
}
case '-':
if(input.readLength() == 3) {
// just read <%-
state = ISI_SCRIPTLET;
return token(RhtmlTokenId.DELIMITER);
} else {
// RHTML symbol, but we also have content language in the buffer
input.backup(3); //backup <%-
state = INIT;
return token(RhtmlTokenId.HTML); //return CL token
}
default: // RHTML scriptlet delimiter '<%'
if(input.readLength() == 3) {
// just <% + something != [=,#] read
state = ISI_SCRIPTLET;
input.backup(1); //backup the third character, it is a part of the Ruby scriptlet
return token(RhtmlTokenId.DELIMITER);
} else {
// RHTML symbol, but we also have content language in the buffer
input.backup(3); //backup <%@
state = INIT;
return token(RhtmlTokenId.HTML); //return CL token
}
}
break;
case ISI_COMMENT_SCRIPTLET:
switch(actChar) {
case '%':
state = ISI_COMMENT_SCRIPTLET_PC;
break;
}
break;
case ISI_SCRIPTLET:
switch(actChar) {
case '%':
state = ISI_SCRIPTLET_PC;
break;
}
break;
case ISI_SCRIPTLET_PC:
switch(actChar) {
case '>':
if(input.readLength() == 2) {
//just the '%>' symbol read
state = INIT;
return token(RhtmlTokenId.DELIMITER);
} else {
//return the scriptlet content
input.backup(2); // backup '%>' we will read JUST them again
state = ISI_SCRIPTLET;
return token(RhtmlTokenId.RUBY);
}
default:
state = ISI_SCRIPTLET;
break;
}
break;
case ISI_RUBY_LINE:
while (actChar != '\n') {
actChar = input.read();
if (actChar == LexerInput.EOF) {
break;
}
}
if (actChar == '\n') {
input.backup(1);
}
state = INIT;
if (input.readLength() > 0) {
return token(RhtmlTokenId.RUBY);
}
break;
case ISI_EXPR_SCRIPTLET:
switch(actChar) {
case '%':
state = ISI_EXPR_SCRIPTLET_PC;
break;
}
break;
case ISI_EXPR_SCRIPTLET_PC:
switch(actChar) {
case '>':
if(input.readLength() == 2) {
//just the '%>' symbol read
state = INIT;
return token(RhtmlTokenId.DELIMITER);
} else {
//return the scriptlet content
input.backup(2); // backup '%>' we will read JUST them again
state = ISI_EXPR_SCRIPTLET;
return token(RhtmlTokenId.RUBY_EXPR);
}
default:
state = ISI_EXPR_SCRIPTLET;
break;
}
break;
case ISI_COMMENT_SCRIPTLET_PC:
switch(actChar) {
case '>':
if(input.readLength() == 2) {
//just the '%>' symbol read
state = INIT;
return token(RhtmlTokenId.DELIMITER);
} else {
//return the scriptlet content
input.backup(2); // backup '%>' we will read JUST them again
state = ISI_COMMENT_SCRIPTLET;
return token(RhtmlTokenId.RUBYCOMMENT);
}
default:
state = ISI_COMMENT_SCRIPTLET;
break;
}
break;
}
}
// At this stage there's no more text in the scanned buffer.
// Scanner first checks whether this is completely the last
// available buffer.
switch(state) {
case INIT:
if (input.readLength() == 0) {
return null;
} else {
return token(RhtmlTokenId.HTML);
}
case ISA_LT:
state = INIT;
return token(RhtmlTokenId.DELIMITER);
case ISA_LT_PC:
state = INIT;
return token(RhtmlTokenId.DELIMITER);
case ISI_SCRIPTLET_PC:
state = INIT;
return token(RhtmlTokenId.DELIMITER);
case ISI_SCRIPTLET:
state = INIT;
return token(RhtmlTokenId.RUBY);
case ISI_EXPR_SCRIPTLET_PC:
state = INIT;
return token(RhtmlTokenId.DELIMITER);
case ISI_EXPR_SCRIPTLET:
state = INIT;
return token(RhtmlTokenId.RUBY_EXPR);
case ISI_COMMENT_SCRIPTLET_PC:
state = INIT;
return token(RhtmlTokenId.DELIMITER);
case ISI_COMMENT_SCRIPTLET:
state = INIT;
return token(RhtmlTokenId.RUBYCOMMENT);
default:
System.out.println("RhtmlLexer - unhandled state : " + state); // NOI18N
}
return null;
}
public void release() {
}
}