/* ***** BEGIN LICENSE BLOCK ***** * Version: GPL 3 * * This program is Copyright (C) 2007-2008 Aptana, Inc. All Rights Reserved * This program is licensed under the GNU General Public license, version 3 (GPL). * * This program is distributed in the hope that it will be useful, but * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or * NONINFRINGEMENT. Redistribution, except as permitted by the GPL, * is prohibited. * * You can redistribute and/or modify this program under the terms of the GPL, * as published by the Free Software Foundation. You should * have received a copy of the GNU General Public License, Version 3 along * with this program; if not, write to the Free Software Foundation, Inc., 51 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Aptana provides a special exception to allow redistribution of this file * with certain other code and certain additional terms * pursuant to Section 7 of the GPL. You may view the exception and these * terms on the web at http://www.aptana.com/legal/gpl/. * * You may view the GPL, and Aptana's exception and additional terms in the file * titled license-jaxer.html in the main distribution folder of this program. * * Any modifications to this file must keep this entire header intact. * * ***** END LICENSE BLOCK ***** */ package com.aptana.ide.editor.xml.parsing; import java.text.ParseException; import com.aptana.ide.editor.xml.lexing.XMLTokenTypes; import com.aptana.ide.editors.unified.parsing.UnifiedParser; import com.aptana.ide.lexer.ILexer; import com.aptana.ide.lexer.Lexeme; import com.aptana.ide.lexer.LexerException; import com.aptana.ide.lexer.Range; import com.aptana.ide.parsing.IParseState; import com.aptana.ide.parsing.ParserInitializationException; import com.aptana.ide.parsing.nodes.IParseNode; /** * @author Kevin Lindsey */ public class XMLParserBase extends UnifiedParser { private static final String TEXT_GROUP = "text"; //$NON-NLS-1$ private static final String ERROR_GROUP = "error"; //$NON-NLS-1$ protected static final String DEFAULT_GROUP = "default"; //$NON-NLS-1$ protected static final String XML_DECLARATION_GROUP = "xml-declaration"; //$NON-NLS-1$ protected static final String CDATA_SECTION_GROUP = "cdata-section"; //$NON-NLS-1$ protected static final String PROCESSING_INSTRUCTION_GROUP = "processing-instruction"; //$NON-NLS-1$ public static final String DOCTYPE_DECLARATION_GROUP = "doctype-declaration"; //$NON-NLS-1$ protected IParseNode _currentElement; /** * XMLParserBase * * @throws ParserInitializationException */ public XMLParserBase() throws ParserInitializationException { this(XMLMimeType.MimeType); } /** * XMLParserBase * * @param language * @throws ParserInitializationException */ public XMLParserBase(String language) throws ParserInitializationException { super(language); } /** * Advance to the next lexeme in the lexeme stream * * @throws LexerException */ protected void advance() throws LexerException { ILexer lexer = this.getLexer(); Lexeme currentLexeme = EOS; if (this._currentElement != null && this.currentLexeme != null && this.currentLexeme != EOS) { this._currentElement.includeLexemeInRange(this.currentLexeme); } if (lexer.isEOS() == false) { boolean inWhitespace = true; while (inWhitespace) { if (lexer.isEOS() == false) { currentLexeme = lexer.getNextLexeme(); if (currentLexeme == null && lexer.isEOS() == false) { // Switch to error group. // NOTE: We want setGroup's exception to propagate since // that indicates an internal inconsistency when it // fails lexer.setGroup(ERROR_GROUP); currentLexeme = lexer.getNextLexeme(); } if (currentLexeme != null) { if (currentLexeme.typeIndex == XMLTokenTypes.START_COMMENT) { // reset lexer position lexer.setCurrentOffset(currentLexeme.offset); // set group for unclosed comment type lexer.setGroup("unclosed-comment"); //$NON-NLS-1$ // rescan currentLexeme = lexer.getNextLexeme(); } } if (currentLexeme == null) { // couldn't recover from error, so mark as end of stream // NOTE: We may want to throw an exception here since we // should be able to return at least an ERROR token currentLexeme = EOS; inWhitespace = false; } else { this.addLexeme(currentLexeme); inWhitespace = false; } } } } this.currentLexeme = currentLexeme; } /** * @see com.aptana.ide.parsing.AbstractParser#createParseState(com.aptana.ide.parsing.IParseState) */ public IParseState createParseState(IParseState parent) { IParseState result; if (parent == null) { result = new XMLParseState(); } else { result = new XMLParseState(parent); } return result; } /** * @see com.aptana.ide.parsing.AbstractParser#initializeLexer() */ public void initializeLexer() throws LexerException { ILexer lexer = this.getLexer(); String language = this.getLanguage(); // ignore whitespace lexer.setIgnoreSet(language, new int[] { XMLTokenTypes.WHITESPACE }); lexer.setLanguageAndGroup(language, DEFAULT_GROUP); } /** * parseDocTypeDeclaration */ protected void parseDocTypeDeclaration() throws LexerException { // NOTE: [KEL] ideally, this will be a nested language, but since we're in a bit of flux // with respect to colorization and parsing, I'm inlining DTD parsing here ILexer lexer = this.getLexer(); int initialOffset = lexer.getEOFOffset(); try { // find end of doctype section and apply to lexer virtual eof Range range = lexer.find("doctype-declaration-delimiter"); //$NON-NLS-1$ int offset = range.getEndingOffset(); if (range.isEmpty()) { offset = lexer.getSourceLength(); } lexer.setEOFOffset(offset); // change groups lexer.setGroup(DOCTYPE_DECLARATION_GROUP); this.advance(); // process until eof while (this.isEOS() == false) { this.advance(); } } finally { // restore original eof lexer.setEOFOffset(initialOffset); lexer.setGroup(DEFAULT_GROUP); // re-prime this.advance(); } } /** * parseText * * @param verify * @throws LexerException * @throws ParseException */ protected void parseText(boolean verify) throws LexerException, ParseException { // get reference to lexer ILexer lexer = this.getLexer(); // switch to text group lexer.setGroup(TEXT_GROUP); // advance over '>' or '/>' if (verify) { this.assertAndAdvance(XMLTokenTypes.GREATER_THAN, "error.tag.end.close"); //$NON-NLS-1$ } else { this.advance(); } // switch back to default group lexer.setGroup(DEFAULT_GROUP); if (this.currentLexeme == EOS || this.isType(XMLTokenTypes.ERROR)) { if (this.currentLexeme != EOS) { lexer.setCurrentOffset(this.currentLexeme.offset); this.removeLexeme(this.currentLexeme); } // rescan in case we have a false EOS this.advance(); } } }