SourceFormatter.java example

Explorer
uima_prolog-master
- uima-PrologInterface-Examples
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.2
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;
import java.io.*;
import java.net.*;

/**
 * Formats HTML source by laying out each non-inline-level element on a new line with an appropriate indent.
 * <p>
 * Any indentation present in the original source text is removed.
 * <p>
 * Use one of the following methods to obtain the output:
 * <ul>
 *  <li>{@link #writeTo(Writer)}</li>
 *  <li>{@link #appendTo(Appendable)}</li>
 *  <li>{@link #toString()}</li>
 *  <li>{@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}</li>
 * </ul>
 * <p>
 * The output text is functionally equivalent to the original source and should be rendered identically unless specified below.
 * <p>
 * The following points describe the process in general terms.
 * Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions.
 * <p>
 * <ul>
 *  <li>Every element that is not an {@linkplain HTMLElements#getInlineLevelElementNames() inline-level element} appears on a new line
 *   with an indent corresponding to its {@linkplain Element#getDepth() depth} in the <a href="Source.html#DocumentElementHierarchy">document element hierarchy</a>.
 *  <li>The indent is formed by writing <i>n</i> repetitions of the string specified in the {@link #setIndentString(String) IndentString} property,
 *   where <i>n</i> is the depth of the indentation.
 *  <li>The {@linkplain Element#getContent() content} of an indented element starts on a new line and is indented at a depth one greater than that of the element,
 *   with the end tag appearing on a new line at the same depth as the start tag.
 *   If the content contains only text and {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements},
 *   it may continue on the same line as the start tag.  Additionally, if the output content contains no new lines, the end tag may also continue on the same line.
 *  <li>The content of preformatted elements such as {@link HTMLElementName#PRE PRE} and {@link HTMLElementName#TEXTAREA TEXTAREA} are not indented,
 *   nor is the white space modified in any way.
 *  <li>Only {@linkplain StartTagType#NORMAL normal} and {@linkplain StartTagType#DOCTYPE_DECLARATION document type declaration} elements are indented.
 *   All others are treated as {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}.
 *  <li>White space and indentation inside HTML {@linkplain StartTagType#COMMENT comments}, {@linkplain StartTagType#CDATA_SECTION CDATA sections}, or any
 *   {@linkplain TagType#isServerTag() server tag} is preserved, 
 *   but with the indentation of new lines starting at a depth one greater than that of the surrounding text.
 *  <li>White space and indentation inside {@link HTMLElementName#SCRIPT SCRIPT} elements is preserved, 
 *   but with the indentation of new lines starting at a depth one greater than that of the <code>SCRIPT</code> element.
 *  <li>If the {@link #setTidyTags(boolean) TidyTags} property is set to <code>true</code>,
 *   every tag in the document is replaced with the output from its {@link Tag#tidy()} method.
 *   If this property is set to <code>false</code>, the tag from the original text is used, including all white space,
 *   but with any new lines indented at a depth one greater than that of the element.
 *  <li>If the {@link #setCollapseWhiteSpace(boolean) CollapseWhiteSpace} property
 *   is set to <code>true</code>, every string of one or more {@linkplain Segment#isWhiteSpace(char) white space} characters
 *   located outside of a tag is replaced with a single space in the output.
 *   White space located adjacent to a non-inline-level element tag (except {@linkplain TagType#isServerTag() server tags}) may be removed.
 *  <li>If the {@link #setIndentAllElements(boolean) IndentAllElements} property
 *   is set to <code>true</code>, every element appears indented on a new line, including {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}.
 *   This generates output that is a good representation of the actual <a href="Source.html#DocumentElementHierarchy">document element hierarchy</a>,
 *   but is very likely to introduce white space that compromises the functional equivalency of the document.
 *  <li>The {@link #setNewLine(String) NewLine} property specifies the character sequence
 *   to use for each <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output document.
 *  <li>If the source document contains {@linkplain TagType#isServerTag() server tags}, the functional equivalency of the output document may be compromised.
 * </ul>
 * <p>
 * Formatting an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically.
 */
public final class SourceFormatter implements CharStreamSource {
	private final Segment segment;
	private String indentString="\t";
	private boolean tidyTags=false;
	private boolean collapseWhiteSpace=false;
	private boolean removeLineBreaks=false;
	private boolean indentAllElements=false;
	private String newLine=null;

	/**
	 * Constructs a new <code>SourceFormatter</code> based on the specified {@link Segment}.
	 * @param segment  the segment containing the HTML to be formatted.
	 * @see Source#getSourceFormatter()
	 */
	public SourceFormatter(final Segment segment) {
		this.segment=segment;
	}

	// Documentation inherited from CharStreamSource
	public void writeTo(final Writer writer) throws IOException {
		appendTo(writer);
		writer.flush();
	}

	// Documentation inherited from CharStreamSource
	public void appendTo(final Appendable appendable) throws IOException {
		new Processor(segment,getIndentString(),getTidyTags(),getCollapseWhiteSpace(),getRemoveLineBreaks(),getIndentAllElements(),getIndentAllElements(),getNewLine()).appendTo(appendable);
	}

	// Documentation inherited from CharStreamSource
	public long getEstimatedMaximumOutputLength() {
		return segment.length()*2;
	}

	// Documentation inherited from CharStreamSource
	public String toString() {
		return CharStreamSourceUtil.toString(this);
	}

	/**
	 * Sets the string to be used for indentation.
	 * <p>
	 * The default value is a string containing a single tab character (U+0009).
	 * <p>
	 * The most commonly used indent strings are <code>"\t"</code> (single tab), <code>" "</code> (single space), <code>"  "</code> (2 spaces), and <code>"    "</code> (4 spaces).
	 * 
	 * @param indentString  the string to be used for indentation, must not be <code>null</code>.
	 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement. 
	 * @see #getIndentString()
	 */
	public SourceFormatter setIndentString(final String indentString) {
		if (indentString==null) throw new IllegalArgumentException("indentString property must not be null");
		this.indentString=indentString;
		return this;
	}

	/**
	 * Returns the string to be used for indentation.
	 * <p>
	 * See the {@link #setIndentString(String)} method for a full description of this property.
	 *
	 * @return the string to be used for indentation.
	 */
	public String getIndentString() {
		return indentString;
	}

	/**
	 * Sets whether the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method.
	 * <p>
	 * The default value is <code>false</code>.
	 * <p>
	 * If this property is set to <code>false</code>, the tag from the original text is used, including all white space,
	 * but with any new lines indented at a depth one greater than that of the element.
	 *
	 * @param tidyTags  specifies whether the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method.
	 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement. 
	 * @see #getTidyTags()
	 */
	public SourceFormatter setTidyTags(final boolean tidyTags) {
		this.tidyTags=tidyTags;
		return this;
	}

	/**
	 * Indicates whether the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method.
	 * <p>
	 * See the {@link #setTidyTags(boolean)} method for a full description of this property.
	 * 
	 * @return <code>true</code> if the original text of each tag is to be replaced with the output from its {@link Tag#tidy()} method, otherwise <code>false</code>.
	 */
	public boolean getTidyTags() {
		return tidyTags;
	}

	/**
	 * Sets whether {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed.
	 * <p>
	 * The default value is <code>false</code>.
	 * <p>
	 * If this property is set to <code>true</code>, every string of one or more {@linkplain Segment#isWhiteSpace(char) white space} characters
	 * located outside of a tag is replaced with a single space in the output.
	 * White space located adjacent to a non-inline-level element tag (except {@linkplain TagType#isServerTag() server tags}) may be removed.
	 *
	 * @param collapseWhiteSpace  specifies whether {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed.
	 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement. 
	 * @see #getCollapseWhiteSpace()
	 */
	public SourceFormatter setCollapseWhiteSpace(final boolean collapseWhiteSpace) {
		this.collapseWhiteSpace=collapseWhiteSpace;
		return this;
	}
	
	/**
	 * Indicates whether {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed.
	 * <p>
	 * See the {@link #setCollapseWhiteSpace(boolean collapseWhiteSpace)} method for a full description of this property.
	 * 
	 * @return <code>true</code> if {@linkplain Segment#isWhiteSpace(char) white space} in the text between the tags is to be collapsed, otherwise <code>false</code>.
	 */
	public boolean getCollapseWhiteSpace() {
		return collapseWhiteSpace;
	}

	/**
	 * Sets whether all non-essential line breaks are removed.
	 * <p>
	 * The default value is <code>false</code>.
	 * <p>
	 * If this property is set to <code>true</code>, only essential line breaks are retained in the output.
	 * <p>
	 * Setting this property automatically engages the {@link #setCollapseWhiteSpace(boolean) CollapseWhiteSpace} option, regardless of its property setting.
	 * <p>
	 * It is recommended to set the {@link #setTidyTags(boolean) TidyTags} property when this option is used so that non-essential line breaks are also removed from tags.
	 *
	 * @param removeLineBreaks  specifies whether all non-essential line breaks are removed.
	 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement. 
	 * @see #getRemoveLineBreaks()
	 */
	SourceFormatter setRemoveLineBreaks(final boolean removeLineBreaks) {
		this.removeLineBreaks=removeLineBreaks;
		return this;
	}
	
	/**
	 * Indicates whether all non-essential line breaks are removed.
	 * <p>
	 * See the {@link #setRemoveLineBreaks(boolean removeLineBreaks)} method for a full description of this property.
	 * 
	 * @return <code>true</code> if all non-essential line breaks are removed, otherwise <code>false</code>.
	 */
	boolean getRemoveLineBreaks() {
		return removeLineBreaks;
	}

	/**
	 * Sets whether all elements are to be indented, including {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements} and those with preformatted contents.
	 * <p>
	 * The default value is <code>false</code>.
	 * <p>
	 * If this property is set to <code>true</code>, every element appears indented on a new line, including
	 * {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements}.
	 * <p>
	 * This generates output that is a good representation of the actual <a href="Source.html#DocumentElementHierarchy">document element hierarchy</a>,
	 * but is very likely to introduce white space that compromises the functional equivalency of the document.
	 *
	 * @param indentAllElements  specifies whether all elements are to be indented.
	 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement. 
	 * @see #getIndentAllElements()
	 */
	public SourceFormatter setIndentAllElements(final boolean indentAllElements) {
		this.indentAllElements=indentAllElements;
		return this;
	}

	/**
	 * Indicates whether all elements are to be indented, including {@linkplain HTMLElements#getInlineLevelElementNames() inline-level elements} and those with preformatted contents.
	 * <p>
	 * See the {@link #setIndentAllElements(boolean)} method for a full description of this property.
	 * 
	 * @return <code>true</code> if all elements are to be indented, otherwise <code>false</code>.
	 */
	public boolean getIndentAllElements() {
		return indentAllElements;
	}
	
	/**
	 * Sets the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
	 * <p>
	 * The default is to use the same new line string as is used in the source document, which is determined via the {@link Source#getNewLine()} method.
	 * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document,
	 * or using the value from the static {@link Config#NewLine} property.
	 * <p>
	 * Specifying a <code>null</code> argument resets the property to its default value, which is to use the same new line string as is used in the source document.
	 * 
	 * @param newLine  the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output, may be <code>null</code>.
	 * @return this <code>SourceFormatter</code> instance, allowing multiple property setting methods to be chained in a single statement. 
	 * @see #getNewLine()
	 */
	public SourceFormatter setNewLine(final String newLine) {
		this.newLine=newLine;
		return this;
	}

	/**
	 * Returns the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
	 * <p>
	 * See the {@link #setNewLine(String)} method for a full description of this property.
	 *
	 * @return the string to be used to represent a <a target="_blank" href="http://en.wikipedia.org/wiki/Newline">newline</a> in the output.
	 */
	public String getNewLine() {
		if (newLine==null) newLine=segment.source.getBestGuessNewLine();
		return newLine;
	}

	/** This class does the actual work, but is first passed final copies of all the parameters for efficiency. */
	private static final class Processor {
		private final Segment segment;
		private final CharSequence sourceText;
		private final String indentString;
		private final boolean tidyTags;
		private final boolean collapseWhiteSpace;
		private final boolean removeLineBreaks; // Indicates whether all non-essential line breaks are removed. Must be used with collapseWhiteSpace=true.
		private final boolean indentAllElements;
		private final boolean indentScriptElements; // at present this parameter is tied to indentAllElements.  SCRIPT elements need to be inline to keep functional equivalency of output
		private final String newLine;
	
		private Appendable appendable;
		private Tag nextTag;
		private int index;
	
		public Processor(final Segment segment, final String indentString, final boolean tidyTags, final boolean collapseWhiteSpace, final boolean removeLineBreaks, final boolean indentAllElements, final boolean indentScriptElements, final String newLine) {
			this.segment=segment;
			sourceText=segment.source.toString();
			this.indentString=indentString;
			this.tidyTags=tidyTags;
			this.collapseWhiteSpace=collapseWhiteSpace || removeLineBreaks;
			this.removeLineBreaks=removeLineBreaks;
			this.indentAllElements=indentAllElements;
			this.indentScriptElements=indentScriptElements;
			this.newLine=newLine;
		}
	
		public void appendTo(final Appendable appendable) throws IOException {
			this.appendable=appendable;
			if (segment instanceof Source) ((Source)segment).fullSequentialParse();
			nextTag=segment.source.getNextTag(segment.begin);
			index=segment.begin;
			appendContent(segment.end,segment.getChildElements(),0);
		}
	
		private void appendContent(final int end, final List<Element> childElements, final int depth) throws IOException {
			assert index<=end;
			for (Element element : childElements) {
				final int elementBegin=element.begin;
				if (elementBegin>=end) break;
				if (indentAllElements) {
					appendText(elementBegin,depth);
					appendElement(element,depth,end,false,false);
				} else {
					if (inlinable(element)) continue; // skip over elements that can be inlined.
					appendText(elementBegin,depth);
					final String elementName=element.getName();
					if (elementName==HTMLElementName.PRE || elementName==HTMLElementName.TEXTAREA) {
						appendElement(element,depth,end,true,true);
					} else if (elementName==HTMLElementName.SCRIPT) {
						appendElement(element,depth,end,true,false);
					} else {
						appendElement(element,depth,end,false,!removeLineBreaks && containsOnlyInlineLevelChildElements(element));
					}
				}
			}
			appendText(end,depth);
			assert index==end;
		}
	
		private boolean inlinable(final Element element) {
			// returns true if the specified element should be inlined
			final StartTagType startTagType=element.getStartTag().getStartTagType();
			// if (startTagType==StartTagType.DOCTYPE_DECLARATION) return false; // this was removed because it caused an extra line break if the DOCTYPE is preceeded by a server tag
			if (startTagType!=StartTagType.NORMAL) return true;
			// element is a normal type
			final String elementName=element.getName();
			if (elementName==HTMLElementName.SCRIPT) return !indentScriptElements;
			if (removeLineBreaks && !HTMLElements.getElementNames().contains(elementName)) return true; // inline non-HTML elements if removing line breaks
			if (!HTMLElements.getInlineLevelElementNames().contains(elementName)) return false;
			// element is inline type
			if (removeLineBreaks) return true;
			if (elementName==HTMLElementName.TEXTAREA) return false; // TEXTAREA is theoretically inlinable but we want to format its content in the same was as PRE, and this is easiest when the entire element is treated like a block PRE element.
			return containsOnlyInlineLevelChildElements(element); // only inline if it doesn't illegally contain non-inline elements
		}
	
		private void appendText(final int end, int depth) throws IOException {
			assert index<=end;
			if (index==end) return;
			while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return; // trim whitespace.
			appendIndent(depth);
			if (collapseWhiteSpace) {
				appendTextCollapseWhiteSpace(end,depth);
			} else {
				appendTextInline(end,depth,false);
			}
			appendFormattingNewLine();
			assert index==end;
		}
	
		private void appendElement(final Element element, final int depth, final int end, final boolean preformatted, boolean renderContentInline) throws IOException {
			assert index==element.begin;
			assert index<end;
			final StartTag startTag=element.getStartTag();
			final EndTag endTag=element.getEndTag();
			appendIndent(depth);
			appendTag(startTag,depth,end);
			if (index==end) {
				appendFormattingNewLine();
				assert index==Math.min(element.end,end) : index;
				return;
			}
			if (!renderContentInline) appendFormattingNewLine();
			int contentEnd=element.getContentEnd();
			if (end<contentEnd) contentEnd=end;
			if (index<contentEnd) {
				if (preformatted) {
					if (renderContentInline) {
						// Preformatted element such as PRE, TEXTAREA
						appendContentPreformatted(contentEnd,depth);
					} else {
						// SCRIPT element
						appendIndentedScriptContent(contentEnd,depth+1);
					}
				} else {
					if (renderContentInline) {
						// Element contains only inline-level elements, so don't bother putting start and end tags on separate lines
						if (collapseWhiteSpace) {
							appendTextCollapseWhiteSpace(contentEnd,depth);
						} else {
							if (!appendTextInline(contentEnd,depth,true)) {
								appendFormattingNewLine();
								renderContentInline=false;
							}
						}
					} else {
						appendContent(contentEnd,element.getChildElements(),depth+1);
					}
				}
			}
			if (endTag!=null && end>endTag.begin) {
				if (!renderContentInline) appendIndent(depth);
				assert index==endTag.begin;
				appendTag(endTag,depth,end);
				appendFormattingNewLine();
			} else if (renderContentInline) {
				appendFormattingNewLine();
			}
			assert index==Math.min(element.end,end) : index;
		}
	
		private void updateNextTag() {
			// ensures that nextTag is up to date
			while (nextTag!=null) {
				if (nextTag.begin>=index) return;
				nextTag=nextTag.getNextTag();
			}
		}
	
		private void appendIndentedScriptContent(final int end, final int depth) throws IOException {
			assert index<end;
			if (removeLineBreaks) {
				appendTextRemoveIndentation(end);
				assert index==end;
				return;
			}
			int startOfLinePos=getStartOfLinePos(end,false);
			if (index==end) return;
			if (startOfLinePos==-1) {
				// Script started on same line as start tag.  Use the start of the next line to determine the original indent.
				appendIndent(depth);
				appendLineKeepWhiteSpace(end,depth);
				appendEssentialNewLine();
				if (index==end) return;
				startOfLinePos=getStartOfLinePos(end,true);
				if (index==end) return;
			}
			appendTextPreserveIndentation(end,depth,index-startOfLinePos);
			appendEssentialNewLine();
			assert index==end;
		}
	
		private boolean appendTextPreserveIndentation(final int end, final int depth) throws IOException {
			// returns true if all text was on one line, otherwise false
			assert index<end;
			if (removeLineBreaks) return appendTextRemoveIndentation(end);
			// Use the start of the next line to determine the original indent.
			appendLineKeepWhiteSpace(end,depth);
			if (index==end) return true;
			int startOfLinePos=getStartOfLinePos(end,true);
			if (index==end) return true;
			appendEssentialNewLine();
			appendTextPreserveIndentation(end,depth+1,index-startOfLinePos);
			assert index==end;
			return false;
		}
	
		private void appendTextPreserveIndentation(final int end, final int depth, final int originalIndentLength) throws IOException {
			assert index<end;
			appendIndent(depth);
			appendLineKeepWhiteSpace(end,depth);
			while (index!=end) {
				// Skip over the original indent:
				for (int x=0; x<originalIndentLength; x++) {
					final char ch=sourceText.charAt(index);
					if (!(ch==' ' || ch=='\t')) break;
					if (++index==end) return;
				}
				appendEssentialNewLine();
				// Insert our indent:
				appendIndent(depth);
				// Write the rest of the line including any indent greater than the first line's indent:
				appendLineKeepWhiteSpace(end,depth);
			}
			assert index==end;
		}

		private boolean appendTextRemoveIndentation(final int end) throws IOException {
			assert index<end;
			appendLineKeepWhiteSpace(end,0);
			if (index==end) return true;
			while (index!=end) {
				// Skip over the original indent:
				while (true) {
					final char ch=sourceText.charAt(index);
					if (!(ch==' ' || ch=='\t')) break;
					if (++index==end) return false;
				}
				appendEssentialNewLine();
				// Write the rest of the line including any indent greater than the first line's indent:
				appendLineKeepWhiteSpace(end,0);
			}
			assert index==end;
			return false;
		}
	
		private int getStartOfLinePos(final int end, final boolean atStartOfLine) {
			// returns the starting position of the next complete line containing text, or -1 if texts starts on the current line (hence not a complete line).
			// sets index to the start of the text following the returned position, or end, whichever comes first.
			int startOfLinePos=atStartOfLine ? index : -1;
			while (true) {
				final char ch=sourceText.charAt(index);
				if (ch=='\n' || ch=='\r') {
					startOfLinePos=index+1;
				} else if (!(ch==' ' || ch=='\t')) break;
				if (++index==end) break;
			}
			return startOfLinePos;
		}
	
		private void appendSpecifiedTextInline(final CharSequence text, int depth) throws IOException {
			final int textLength=text.length();
			int i=appendSpecifiedLine(text,0);
			if (i<textLength) {
				final int subsequentLineDepth=depth+1;
				do {
					while (Segment.isWhiteSpace(text.charAt(i))) if (++i>=textLength) return; // trim whitespace.
					appendEssentialNewLine();
					appendIndent(subsequentLineDepth);
					i=appendSpecifiedLine(text,i);
				} while (i<textLength);
			}
		}
	
		private int appendSpecifiedLine(final CharSequence text, int i) throws IOException {
			// Writes the first line from the specified text starting from the specified position.
			// The line break characters are not written.
			// Returns the position following the first line break character(s), or text.length() if the text contains no line breaks.
			final int textLength=text.length();
			while (true) {
				final char ch=text.charAt(i);
				if (ch=='\r') {
					final int nexti=i+1;
					if (nexti<textLength && text.charAt(nexti)=='\n') return i+2;
				}
				if (ch=='\n') return i+1;
				appendable.append(ch);
				if (++i>=textLength) return i;
			}
		}
	
		private boolean appendTextInline(final int end, int depth, final boolean increaseIndentAfterFirstLineBreak) throws IOException {
			// returns true if all text was on one line, otherwise false
			assert index<end;
			appendLineKeepWhiteSpace(end,depth);
			if (index==end) return true;
			final int subsequentLineDepth=increaseIndentAfterFirstLineBreak ? depth+1 : depth;
			do {
				while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return false; // trim whitespace.
				appendEssentialNewLine(); // essential because we might be inside a tag attribute value.  If new lines in normal text aren't required this method wouldn't have been called.
				appendIndent(subsequentLineDepth);
				appendLineKeepWhiteSpace(end,subsequentLineDepth);
			} while (index<end);
			assert index==end;
			return false;
		}
	
		private void appendLineKeepWhiteSpace(final int end, final int depth) throws IOException {
			// Writes the first line from the source text starting from index, ending at the specified end position.
			// The line break characters are not written.
			// Sets index to the position following the first line break character(s), or end if the text contains no line breaks, guaranteed index<=end.
			// Any tags encountered are written using the appendTag method, whose output may include line breaks.
			assert index<end;
			updateNextTag();
			while (true) {
				while (nextTag!=null && index==nextTag.begin) {
					appendTag(nextTag,depth,end);
					if (index==end) return;
				}
				final char ch=sourceText.charAt(index);
				if (ch=='\r') {
					final int nextindex=index+1;
					if (nextindex<end && sourceText.charAt(nextindex)=='\n') {
						index+=2;
						assert index<=end;
						return;
					}
				}
				if (ch=='\n') {
					index++;
					assert index<=end;
					return;
				}
				appendable.append(ch);
				if (++index==end) return;
			}
		}		
	
		private void appendTextCollapseWhiteSpace(final int end, final int depth) throws IOException {
			assert index<end;
			boolean lastWasWhiteSpace=false;
			updateNextTag();
			while (index<end) {
				while (nextTag!=null && index==nextTag.begin) {
					if (lastWasWhiteSpace) {
						appendable.append(' ');
						lastWasWhiteSpace=false;
					}
					appendTag(nextTag,depth,end);
					if (index==end) return;
				}
				final char ch=sourceText.charAt(index++);
				if (Segment.isWhiteSpace(ch)) {
					lastWasWhiteSpace=true;
				} else {
					if (lastWasWhiteSpace) {
						appendable.append(' ');
						lastWasWhiteSpace=false;
					}
					appendable.append(ch);
				}
			}
			if (lastWasWhiteSpace) appendable.append(' ');
			assert index==end;
		}
	
		private void appendContentPreformatted(final int end, final int depth) throws IOException {
			assert index<end;
			updateNextTag();
			do {
				while (nextTag!=null && index==nextTag.begin) {
					appendTag(nextTag,depth,end);
					if (index==end) return;
				}
				appendable.append(sourceText.charAt(index));
			} while (++index<end);
			assert index==end;
		}
	
		private void appendTag(final Tag tag, final int depth, final int end) throws IOException {
			// sets index to last position written
			assert index==tag.begin;
			assert index<end;
			nextTag=tag.getNextTag();
			final int tagEnd=(tag.end<end) ? tag.end : end;
			assert index<tagEnd;
			if (tag.getTagType()==StartTagType.COMMENT || tag.getTagType()==StartTagType.CDATA_SECTION || tag.getTagType().isServerTag()) {
				appendTextPreserveIndentation(tagEnd,depth);
			} else if (tidyTags) {
				final String tidyTag=tag.tidy();
				if ((tag instanceof StartTag) && ((StartTag)tag).getAttributes()!=null)
					appendable.append(tidyTag);
				else
					appendSpecifiedTextInline(tidyTag,depth);
				index=tagEnd;
			} else {
				appendTextInline(tagEnd,depth,true); // Write tag keeping linefeeds. This will add an indent to any attribute values containing linefeeds, but the normal situation where line breaks are between attributes will look nice.
			}
			if (end<=tag.end || !(tag instanceof StartTag)) {
				assert index<=end;
				return;
			}
			if ((tag.name==HTMLElementName.SCRIPT && !indentScriptElements) || tag.getTagType().isServerTag()) {
				// NOTE SERVER ELEMENTS CONTAINING NON-INLINE TAGS WILL NOT FORMAT PROPERLY. NEED TO INVESTIGATE INCLUDING SUCH SERVER ELEMENTS IN DOCUMENT HIERARCHY.
				// this is a script or server start tag, we may need to append the whole element:
				final Element element=tag.getElement();
				final EndTag endTag=element.getEndTag();
				if (endTag==null) {
					assert index<=end;
					return;
				}
				final int contentEnd=(end<endTag.begin) ? end : endTag.begin;
				boolean singleLineContent=true;
				if (index!=contentEnd) {
					// elementContainsMarkup should be made into a TagType property one day.
					// for the time being assume all server element content is code, although this is not true for some Mason elements.
					final boolean elementContainsMarkup=false;
					if (elementContainsMarkup) {
						singleLineContent=appendTextInline(contentEnd,depth+1,false);
					} else {
						singleLineContent=appendTextPreserveIndentation(contentEnd,depth);
					}
				}
				if (endTag.begin>=end) {
					assert index<=end;
					return;
				}
				if (!singleLineContent) {
					appendEssentialNewLine(); // some server or client side scripting languages might need the final new line
					appendIndent(depth);
				}
				assert index==endTag.begin;
				appendTag(endTag,depth,end);
			}
			assert index<=end;
		}
		
	  private void appendIndent(final int depth) throws IOException {
			if (!removeLineBreaks) for (int x=0; x<depth; x++) appendable.append(indentString);
	  }
	
		private void appendFormattingNewLine() throws IOException {
			if (!removeLineBreaks) appendable.append(newLine);
		}

		private void appendEssentialNewLine() throws IOException {
			appendable.append(newLine);
		}
	
		private boolean containsOnlyInlineLevelChildElements(final Element element) {
			// returns true if the element contains only inline-level elements except for SCRIPT elements.
			final Collection<Element> childElements=element.getChildElements();
			if (childElements.isEmpty()) return true;
			for (Element childElement : childElements) {
				final String elementName=childElement.getName();
				if (elementName==HTMLElementName.SCRIPT || !HTMLElements.getInlineLevelElementNames().contains(elementName)) return false;
				if (!containsOnlyInlineLevelChildElements(childElement)) return false;
			}
			return true;
		}
	}
}