// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.2
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.util.*;
/**
* Provides a generic implementation of the abstract {@link StartTagType} class based on the most common start tag behaviour.
* <p>
* This class is only of interest to users who wish to create <a href="TagType.html#Custom">custom tag types</a>.
* <p>
* The only external difference between this class and its abstract superclass {@link StartTagType} is that it provides a default
* implementation of the {@link #constructTagAt(Source, int pos)} method.
* <p>
* Most of the <a href="Tag.html#Predefined">predefined</a> start tag types are implemented using this class or a subclass of it.
*
* @see EndTagTypeGenericImplementation
*/
public class StartTagTypeGenericImplementation extends StartTagType {
final boolean nameCharAfterPrefixAllowed;
/**
* Constructs a new <code>StartTagTypeGenericImplementation</code> object with the specified properties.
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
* <p>
* This is equivalent to calling
* <br /><code>new </code>{@link #StartTagTypeGenericImplementation(String,String,String,EndTagType,boolean,boolean,boolean) StartTagTypeGenericImplementation}<code>(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false)</code>.
*
* @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes.
* @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type.
* @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type.
* @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type.
* @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}.
*/
protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag) {
this(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false);
}
/**
* Constructs a new <code>StartTagTypeGenericImplementation</code> object with the specified properties.
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
*
* @param description a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes.
* @param startDelimiter the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type.
* @param closingDelimiter the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type.
* @param correspondingEndTagType the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type.
* @param isServerTag indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}.
* @param hasAttributes indicates whether the new start tag type {@linkplain #hasAttributes() has attributes}.
* @param isNameAfterPrefixRequired indicates whether a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}.
*/
protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag, final boolean hasAttributes, final boolean isNameAfterPrefixRequired) {
super(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,hasAttributes,isNameAfterPrefixRequired);
nameCharAfterPrefixAllowed=(getNamePrefix().length()==0 || !Character.isLetter(getNamePrefix().charAt(getNamePrefix().length()-1)));
}
/**
* Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
* <br />(<a href="TagType.html#DefaultImplementation">default implementation</a> method)
* <p>
* This default implementation performs the following steps:
* <ol class="Separated">
* <li>
* If a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}, search for a valid
* {@linkplain Tag#isXMLName(CharSequence) XML tag name} directly after the
* {@linkplain #getNamePrefix() name prefix} using the {@link Source#getNameEnd(int pos)} method.
* If one is found, set the {@linkplain Tag#getName() name} to include it, otherwise return <code>null</code>.
* <li>
* If the last character of the {@linkplain #getNamePrefix() name prefix} is a letter
* (indicating that the prefix includes the full {@linkplain Tag#getName() name} of the tag),
* and the character following the prefix in the source text is also a letter
* or any other valid {@linkplain Tag#isXMLNameChar(char) XML name character},
* return <code>null</code>.
* <br />Example: the source text "<code><?xmlt ?></code>" should not be recognised as an
* {@linkplain #XML_PROCESSING_INSTRUCTION XML processing instruction}, which has the prefix "<code><?xml</code>".
* <li>
* If the tag type {@linkplain #hasAttributes() has attributes}, call
* {@link #parseAttributes(Source,int,String) parseAttributes(source,pos,name)} to parse them.
* Return <code>null</code> if too many errors occur while parsing the attributes.
* <li>
* Find the {@linkplain Tag#getEnd() end} of the tag using the {@link #getEnd(Source, int pos)} method,
* where <code>pos</code> is either the end of the {@linkplain StartTag#getAttributes() attributes} segment or the end of the
* {@linkplain Tag#getName() name} depending on whether the tag type {@linkplain #hasAttributes() has attributes}.
* Return <code>null</code> if the end of the tag can not be found.
* <li>
* Construct the {@link StartTag} object using the
* {@link #constructStartTag(Source,int,int,String,Attributes) constructStartTag(Source, int pos, int end, String name, Attributes)}
* method with the argument values collected over the previous steps.
* </ol>
* <p>
* See {@link TagType#constructTagAt(Source, int pos)} for more important information about this method.
*
* @param source the {@link Source} document.
* @param pos the position in the source document.
* @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or <code>null</code> if it does not meet the criteria.
*/
protected Tag constructTagAt(final Source source, final int pos) {
final ParseText parseText=source.getParseText();
final int nameBegin=pos+1;
String name=getNamePrefix();
int nameEnd=nameBegin+getNamePrefix().length();
if (isNameAfterPrefixRequired()) {
final int extendedNameEnd=source.getNameEnd(nameEnd);
if (extendedNameEnd==-1) return null;
name=source.getName(nameBegin,extendedNameEnd);
nameEnd=extendedNameEnd;
} else if (!nameCharAfterPrefixAllowed && Tag.isXMLNameChar(parseText.charAt(nameEnd))) {
return null;
}
int end;
Attributes attributes=null;
if (hasAttributes()) {
// it is necessary to get the attributes so that we can be sure that the search on the closing delimiter doesn't pick up
// anything from the attribute values, which can legally contain ">" characters.
attributes=parseAttributes(source,pos,name);
if (attributes==null) return null; // happens if attributes not properly formed
end=getEnd(source,attributes.getEnd()); // should always return a valid end
} else {
end=getEnd(source,nameEnd);
if (end<0) {
if (end==-1 && source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("StartTag ").append(name).append(" at ")).append(" not recognised as type '").append(getDescription()).append("' because it has no closing delimiter").toString());
return null;
}
}
return constructStartTag(source,pos,end,name,attributes);
}
/**
* Returns the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document.
* <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
* <p>
* This default implementation simply searches for the first occurrence of the
* {@linkplain #getClosingDelimiter() closing delimiter} after the specified position, and returns the position immediately
* after the end of it.
* <p>
* If the closing delimiter is not found, the value <code>-1</code> is returned.
*
* @param source the {@link Source} document.
* @param pos the position in the source document.
* @return the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document, or <code>-1</code> if the end of the tag can not be found.
*/
protected int getEnd(final Source source, final int pos) {
final int delimiterBegin=source.getParseText().indexOf(getClosingDelimiter(),pos);
return (delimiterBegin==-1 ? -1 : delimiterBegin+getClosingDelimiter().length());
}
}