StartTagTypeGenericImplementation.java example

Explorer
uima_prolog-master
- uima-PrologInterface-Examples
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.2
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;

/**
 * Provides a generic implementation of the abstract {@link StartTagType} class based on the most common start tag behaviour.
 * <p>
 * This class is only of interest to users who wish to create <a href="TagType.html#Custom">custom tag types</a>.
 * <p>
 * The only external difference between this class and its abstract superclass {@link StartTagType} is that it provides a default
 * implementation of the {@link #constructTagAt(Source, int pos)} method.
 * <p>
 * Most of the <a href="Tag.html#Predefined">predefined</a> start tag types are implemented using this class or a subclass of it.
 *
 * @see EndTagTypeGenericImplementation
 */
public class StartTagTypeGenericImplementation extends StartTagType {
	final boolean nameCharAfterPrefixAllowed;

	/**
	 * Constructs a new <code>StartTagTypeGenericImplementation</code> object with the specified properties.
	 * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
	 * <p>
	 * This is equivalent to calling
	 * <br /><code>new </code>{@link #StartTagTypeGenericImplementation(String,String,String,EndTagType,boolean,boolean,boolean) StartTagTypeGenericImplementation}<code>(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false)</code>.
	 *
	 * @param description  a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes.
	 * @param startDelimiter  the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type.
	 * @param closingDelimiter  the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type.
	 * @param correspondingEndTagType  the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type.
	 * @param isServerTag  indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}.
	 */
	protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag) {
		this(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,false,false);
	}

	/**
	 * Constructs a new <code>StartTagTypeGenericImplementation</code> object with the specified properties.
	 * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
	 *
	 * @param description  a {@linkplain #getDescription() description} of the new start tag type useful for debugging purposes.
	 * @param startDelimiter  the {@linkplain #getStartDelimiter() start delimiter} of the new start tag type.
	 * @param closingDelimiter  the {@linkplain #getClosingDelimiter() closing delimiter} of the new start tag type.
	 * @param correspondingEndTagType  the {@linkplain #getCorrespondingEndTagType() corresponding end tag type} of the new start tag type.
	 * @param isServerTag  indicates whether the new start tag type is a {@linkplain #isServerTag() server tag}.
	 * @param hasAttributes  indicates whether the new start tag type {@linkplain #hasAttributes() has attributes}.
	 * @param isNameAfterPrefixRequired  indicates whether a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}.
	 */
	protected StartTagTypeGenericImplementation(final String description, final String startDelimiter, final String closingDelimiter, final EndTagType correspondingEndTagType, final boolean isServerTag, final boolean hasAttributes, final boolean isNameAfterPrefixRequired) {
		super(description,startDelimiter,closingDelimiter,correspondingEndTagType,isServerTag,hasAttributes,isNameAfterPrefixRequired);
		nameCharAfterPrefixAllowed=(getNamePrefix().length()==0 || !Character.isLetter(getNamePrefix().charAt(getNamePrefix().length()-1)));
	}

	/**
	 * Constructs a tag of this type at the specified position in the specified source document if it matches all of the required features.
	 * <br />(<a href="TagType.html#DefaultImplementation">default implementation</a> method)
	 * <p>
	 * This default implementation performs the following steps:
	 * <ol class="Separated">
	 *  <li>
	 *   If a {@linkplain #isNameAfterPrefixRequired() name is required after the prefix}, search for a valid
	 *   {@linkplain Tag#isXMLName(CharSequence) XML tag name} directly after the
	 *   {@linkplain #getNamePrefix() name prefix} using the {@link Source#getNameEnd(int pos)} method.
	 *   If one is found, set the {@linkplain Tag#getName() name} to include it, otherwise return <code>null</code>.
	 *  <li>
	 *   If the last character of the {@linkplain #getNamePrefix() name prefix} is a letter
	 *   (indicating that the prefix includes the full {@linkplain Tag#getName() name} of the tag),
	 *   and the character following the prefix in the source text is also a letter
	 *   or any other valid {@linkplain Tag#isXMLNameChar(char) XML name character},
	 *   return <code>null</code>.
	 *   <br />Example: the source text "<code><?xmlt ?></code>" should not be recognised as an
	 *   {@linkplain #XML_PROCESSING_INSTRUCTION XML processing instruction}, which has the prefix "<code><?xml</code>".
	 *  <li>
	 *   If the tag type {@linkplain #hasAttributes() has attributes}, call
	 *   {@link #parseAttributes(Source,int,String) parseAttributes(source,pos,name)} to parse them.
	 *   Return <code>null</code> if too many errors occur while parsing the attributes.
	 *  <li>
	 *   Find the {@linkplain Tag#getEnd() end} of the tag using the {@link #getEnd(Source, int pos)} method,
	 *   where <code>pos</code> is either the end of the {@linkplain StartTag#getAttributes() attributes} segment or the end of the
	 *   {@linkplain Tag#getName() name} depending on whether the tag type {@linkplain #hasAttributes() has attributes}.
	 *   Return <code>null</code> if the end of the tag can not be found.
	 *  <li>
	 *   Construct the {@link StartTag} object using the
	 *   {@link #constructStartTag(Source,int,int,String,Attributes) constructStartTag(Source, int pos, int end, String name, Attributes)}
	 *   method with the argument values collected over the previous steps.
	 * </ol>
	 * <p> 
	 * See {@link TagType#constructTagAt(Source, int pos)} for more important information about this method.
	 *
	 * @param source  the {@link Source} document.
	 * @param pos  the position in the source document.
	 * @return a tag of this type at the specified position in the specified source document if it meets all of the required features, or <code>null</code> if it does not meet the criteria.
	 */
	protected Tag constructTagAt(final Source source, final int pos) {
		final ParseText parseText=source.getParseText();
		final int nameBegin=pos+1;
		String name=getNamePrefix();
		int nameEnd=nameBegin+getNamePrefix().length();
		if (isNameAfterPrefixRequired()) {
			final int extendedNameEnd=source.getNameEnd(nameEnd);
			if (extendedNameEnd==-1) return null;
			name=source.getName(nameBegin,extendedNameEnd);
			nameEnd=extendedNameEnd;
		} else if (!nameCharAfterPrefixAllowed && Tag.isXMLNameChar(parseText.charAt(nameEnd))) {
			return null;
		}
		int end;
		Attributes attributes=null;
		if (hasAttributes()) {
			// it is necessary to get the attributes so that we can be sure that the search on the closing delimiter doesn't pick up
			// anything from the attribute values, which can legally contain ">" characters.
			attributes=parseAttributes(source,pos,name);
			if (attributes==null) return null; // happens if attributes not properly formed
			end=getEnd(source,attributes.getEnd()); // should always return a valid end
		} else {
			end=getEnd(source,nameEnd);
			if (end<0) {
				if (end==-1 && source.logger.isInfoEnabled()) source.logger.info(source.getRowColumnVector(pos).appendTo(new StringBuilder(200).append("StartTag ").append(name).append(" at ")).append(" not recognised as type '").append(getDescription()).append("' because it has no closing delimiter").toString());
				return null;
			}
		}
		return constructStartTag(source,pos,end,name,attributes);
	}

	/**
	 * Returns the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document.
	 * <br />(<a href="TagType.html#ImplementationAssistance">implementation assistance</a> method)
	 * <p>
	 * This default implementation simply searches for the first occurrence of the
	 * {@linkplain #getClosingDelimiter() closing delimiter} after the specified position, and returns the position immediately
	 * after the end of it.
	 * <p>
	 * If the closing delimiter is not found, the value <code>-1</code> is returned.
	 *
	 * @param source  the {@link Source} document.
	 * @param pos  the position in the source document.
	 * @return the {@linkplain Tag#getEnd() end} of a tag of this type, starting from the specified position in the specified source document, or <code>-1</code> if the end of the tag can not be found.
	 */
	protected int getEnd(final Source source, final int pos) {
		final int delimiterBegin=source.getParseText().indexOf(getClosingDelimiter(),pos);
		return (delimiterBegin==-1 ? -1 : delimiterBegin+getClosingDelimiter().length());
	}
}