/**
* This file Copyright (c) 2005-2008 Aptana, Inc. This program is
* dual-licensed under both the Aptana Public License and the GNU General
* Public license. You may elect to use one or the other of these licenses.
*
* This program is distributed in the hope that it will be useful, but
* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
* NONINFRINGEMENT. Redistribution, except as permitted by whichever of
* the GPL or APL you select, is prohibited.
*
* 1. For the GPL license (GPL), you can redistribute and/or modify this
* program under the terms of the GNU General Public License,
* Version 3, as published by the Free Software Foundation. You should
* have received a copy of the GNU General Public License, Version 3 along
* with this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Aptana provides a special exception to allow redistribution of this file
* with certain other free and open source software ("FOSS") code and certain additional terms
* pursuant to Section 7 of the GPL. You may view the exception and these
* terms on the web at http://www.aptana.com/legal/gpl/.
*
* 2. For the Aptana Public License (APL), this program and the
* accompanying materials are made available under the terms of the APL
* v1.0 which accompanies this distribution, and is available at
* http://www.aptana.com/legal/apl/.
*
* You may view the GPL, Aptana's exception and additional terms, and the
* APL in the file titled license.html at the root of the corresponding
* plugin containing this source file.
*
* Any modifications to this file must keep this entire header intact.
*/
package com.aptana.ide.editor.html.parsing;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.aptana.ide.editor.html.parsing.nodes.HTMLParseNodeFactory;
import com.aptana.ide.parsing.IParseState;
import com.aptana.ide.parsing.ParseStateChild;
import com.aptana.ide.parsing.nodes.IParseNodeFactory;
/**
* @author Kevin Lindsey
*/
public class HTMLParseState extends ParseStateChild
{
/*
* Fields
*/
private static final String HTML_2_0 = "-//IETF//DTD HTML//EN"; //$NON-NLS-1$
private static final String HTML_3_2 = "-//W3C//DTD HTML 3.2 Final//EN"; //$NON-NLS-1$
private static final String HTML_4_0_1_STRICT = "-//W3C//DTD HTML 4.01//EN"; //$NON-NLS-1$
private static final String HTML_4_0_1_TRANSITIONAL = "-//W3C//DTD HTML 4.01 Transitional//EN"; //$NON-NLS-1$
private static final String HTML_4_0_1_FRAMESET = "-//W3C//DTD HTML 4.01 Frameset//EN"; //$NON-NLS-1$
private static final String XHTML_1_0_STRICT = "-//W3C//DTD XHTML 1.0 Strict//EN"; //$NON-NLS-1$
private static final String XHTML_1_0_TRANSITIONAL = "-//W3C//DTD XHTML 1.0 Transitional//EN"; //$NON-NLS-1$
private static final String XHTML_1_0_FRAMESET = "-//W3C//DTD XHTML 1.0 Frameset//EN"; //$NON-NLS-1$
private static final String XHTML_1_1_STRICT = "-//W3C//DTD XHTML 1.1//EN"; //$NON-NLS-1$
private static Pattern _docTypeSniffer;
private static HashMap _docTypeIndex;
private static HashMap _endTagInfo;
private String _rootElement;
private String _pubId;
private String _system;
private int _documentType;
/*
* Properties
*/
/**
* getDocumentType
*
* @return int;
*/
public int getDocumentType()
{
return this._documentType;
}
/**
* getRootElement
*
* @return String
*/
public String getRootElement()
{
return this._rootElement;
}
/**
* getPubId
*
* @return String
*/
public String getPubId()
{
return this._pubId;
}
/**
* getSystem
*
* @return String
*/
public String getSystem()
{
return this._system;
}
/*
* Constructors
*/
/**
* static constructor
*/
static
{
_docTypeSniffer = Pattern
.compile("<!DOCTYPE\\s+(\\S+)\\s+PUBLIC\\s+((?:'[^']+')|(?:\"[^\"]+\"))(?:\\s+((?:'[^']+')|(?:\"[^\"]+\")))?"); //$NON-NLS-1$
_docTypeIndex = new HashMap();
_docTypeIndex.put(HTML_2_0, new Integer(HTMLDocumentType.HTML_2_0));
_docTypeIndex.put(HTML_3_2, new Integer(HTMLDocumentType.HTML_3_2));
_docTypeIndex.put(HTML_4_0_1_STRICT, new Integer(HTMLDocumentType.HTML_4_0_1_STRICT));
_docTypeIndex.put(HTML_4_0_1_TRANSITIONAL, new Integer(HTMLDocumentType.HTML_4_0_1_TRANSITIONAL));
_docTypeIndex.put(HTML_4_0_1_FRAMESET, new Integer(HTMLDocumentType.HTML_4_0_1_FRAMESET));
_docTypeIndex.put(XHTML_1_0_STRICT, new Integer(HTMLDocumentType.XHTML_1_0_STRICT));
_docTypeIndex.put(XHTML_1_0_TRANSITIONAL, new Integer(HTMLDocumentType.XHTML_1_0_TRANSITIONAL));
_docTypeIndex.put(XHTML_1_0_FRAMESET, new Integer(HTMLDocumentType.XHTML_1_0_FRAMESET));
_docTypeIndex.put(XHTML_1_1_STRICT, new Integer(HTMLDocumentType.XHTML_1_1_STRICT));
_endTagInfo = new HashMap();
_endTagInfo.put("area", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("base", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("basefont", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("body", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("br", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("col", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("colgroup", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("dd", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("dt", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("frame", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("area", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("hr", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("html", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("img", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("input", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("isindex", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("li", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("link", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("meta", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("option", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("p", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("param", new Integer(HTMLTagInfo.END_FORBIDDEN | HTMLTagInfo.EMPTY)); //$NON-NLS-1$
_endTagInfo.put("tbody", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("td", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("tfoot", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("th", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("thead", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
_endTagInfo.put("tr", new Integer(HTMLTagInfo.END_OPTIONAL)); //$NON-NLS-1$
}
/**
* Create a new instance of HTMLParseState
*/
public HTMLParseState()
{
super(HTMLMimeType.MimeType);
}
/**
* Create a new instance of HTMLParseState
*
* @param parent
* The parent IParseState
*/
public HTMLParseState(IParseState parent)
{
super(HTMLMimeType.MimeType, parent);
}
/*
* Methods
*/
/**
* @see com.aptana.ide.parsing.ParseStateChild#createParseNodeFactory()
*/
protected IParseNodeFactory createParseNodeFactory()
{
return new HTMLParseNodeFactory(this);
}
/**
* getCloseTagType
*
* @param tagName
* @return close tag type
*/
public int getCloseTagType(String tagName)
{
int result = HTMLTagInfo.END_REQUIRED;
if (this._documentType < HTMLDocumentType.XHTML_1_0_STRICT)
{
String key = tagName.toLowerCase();
if (_endTagInfo.containsKey(key))
{
result = ((Integer) _endTagInfo.get(key)).intValue();
result = (result & HTMLTagInfo.END_MASK);
}
}
return result;
}
/**
* isEmptyTagType
*
* @param tagName
* @return empty tag type
*/
public boolean isEmptyTagType(String tagName)
{
boolean result = false;
String key = tagName.toLowerCase();
if (_endTagInfo.containsKey(key))
{
int flags = ((Integer) _endTagInfo.get(key)).intValue();
result = (flags & HTMLTagInfo.EMPTY) == HTMLTagInfo.EMPTY;
}
return result;
}
/**
* @see com.aptana.ide.parsing.ParseStateChild#setEditState(java.lang.String, java.lang.String, int, int)
*/
public void setEditState(String source, String insertedSource, int offset, int removeLength)
{
super.setEditState(source, insertedSource, offset, removeLength);
// assume we don't know the document type
int documentType = HTMLDocumentType.UNKNOWN;
int indexOf = source.indexOf("<!DOCTYPE");//$NON-NLS-1$
if (indexOf != -1)
{
Matcher match = _docTypeSniffer.matcher(source.substring(indexOf));
if (match.find())
{
// grab doctype pieces
this._rootElement = match.group(1);
this._pubId = match.group(2);
this._system = match.group(3);
// strip opening and closing quotes
this._pubId = this._pubId.substring(1, this._pubId.length() - 1);
if (this._system != null && this._system.length() > 0)
{
this._system = this._system.substring(1, this._system.length() - 1);
}
// see if see can determine the document type
if (this._rootElement.equals("html") || this._rootElement.equals("HTML")) //$NON-NLS-1$ //$NON-NLS-2$
{
if (_docTypeIndex.containsKey(this._pubId))
{
documentType = ((Integer) _docTypeIndex.get(this._pubId)).intValue();
}
}
}
}
// set document type
this._documentType = documentType;
}
}