/* * reserved comment block * DO NOT REMOVE OR ALTER! */ /* * Copyright 1999-2002,2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ // Aug 21, 2000: // Added ability to omit DOCTYPE declaration. // Reported by Lars Martin <lars@smb-tec.com> // Aug 25, 2000: // Added ability to omit comments. // Contributed by Anupam Bagchi <abagchi@jtcsv.com> package com.sun.org.apache.xml.internal.serialize; import java.io.UnsupportedEncodingException; import org.w3c.dom.Document; import org.w3c.dom.DocumentType; import org.w3c.dom.Node; import org.w3c.dom.html.HTMLDocument; /** * Specifies an output format to control the serializer. Based on the * XSLT specification for output format, plus additional parameters. * Used to select the suitable serializer and determine how the * document should be formatted on output. * <p> * The two interesting constructors are: * <ul> * <li>{@link #OutputFormat(String,String,boolean)} creates a format * for the specified method (XML, HTML, Text, etc), encoding and indentation * <li>{@link #OutputFormat(Document,String,boolean)} creates a format * compatible with the document type (XML, HTML, Text, etc), encoding and * indentation * </ul> * * * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a> * <a href="mailto:visco@intalio.com">Keith Visco</a> * @see Serializer * @see Method * @see LineSeparator */ public class OutputFormat { public static class DTD { /** * Public identifier for HTML 4.01 (Strict) document type. */ public static final String HTMLPublicId = "-//W3C//DTD HTML 4.01//EN"; /** * System identifier for HTML 4.01 (Strict) document type. */ public static final String HTMLSystemId = "http://www.w3.org/TR/html4/strict.dtd"; /** * Public identifier for XHTML 1.0 (Strict) document type. */ public static final String XHTMLPublicId = "-//W3C//DTD XHTML 1.0 Strict//EN"; /** * System identifier for XHTML 1.0 (Strict) document type. */ public static final String XHTMLSystemId = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; } public static class Defaults { /** * If indentation is turned on, the default identation * level is 4. * * @see #setIndenting(boolean) */ public static final int Indent = 4; /** * The default encoding for Web documents it UTF-8. * * @see #getEncoding() */ public static final String Encoding = "UTF-8"; /** * The default line width at which to break long lines * when identing. This is set to 72. */ public static final int LineWidth = 72; } /** * Holds the output method specified for this document, * or null if no method was specified. */ private String _method; /** * Specifies the version of the output method. */ private String _version; /** * The indentation level, or zero if no indentation * was requested. */ private int _indent = 0; /** * The encoding to use, if an input stream is used. * The default is always UTF-8. */ private String _encoding = Defaults.Encoding; /** * The EncodingInfo instance for _encoding. */ private EncodingInfo _encodingInfo = null; // whether java names for encodings are permitted private boolean _allowJavaNames = false; /** * The specified media type or null. */ private String _mediaType; /** * The specified document type system identifier, or null. */ private String _doctypeSystem; /** * The specified document type public identifier, or null. */ private String _doctypePublic; /** * Ture if the XML declaration should be ommited; */ private boolean _omitXmlDeclaration = false; /** * Ture if the DOCTYPE declaration should be ommited; */ private boolean _omitDoctype = false; /** * Ture if comments should be ommited; */ private boolean _omitComments = false; /** * Ture if the comments should be ommited; */ private boolean _stripComments = false; /** * True if the document type should be marked as standalone. */ private boolean _standalone = false; /** * List of element tag names whose text node children must * be output as CDATA. */ private String[] _cdataElements; /** * List of element tag names whose text node children must * be output unescaped. */ private String[] _nonEscapingElements; /** * The selected line separator. */ private String _lineSeparator = LineSeparator.Web; /** * The line width at which to wrap long lines when indenting. */ private int _lineWidth = Defaults.LineWidth; /** * True if spaces should be preserved in elements that do not * specify otherwise, or specify the default behavior. */ private boolean _preserve = false; /** If true, an empty string valued attribute is output as "". If false and * and we are using the HTMLSerializer, then only the attribute name is * serialized. Defaults to false for backwards compatibility. */ private boolean _preserveEmptyAttributes = false; /** * Constructs a new output format with the default values. */ public OutputFormat() { } /** * Constructs a new output format with the default values for * the specified method and encoding. If <tt>indent</tt> * is true, the document will be pretty printed with the default * indentation level and default line wrapping. * * @param method The specified output method * @param encoding The specified encoding * @param indenting True for pretty printing * @see #setEncoding * @see #setIndenting * @see #setMethod */ public OutputFormat( String method, String encoding, boolean indenting ) { setMethod( method ); setEncoding( encoding ); setIndenting( indenting ); } /** * Constructs a new output format with the proper method, * document type identifiers and media type for the specified * document. * * @param doc The document to output * @see #whichMethod */ public OutputFormat( Document doc ) { setMethod( whichMethod( doc ) ); setDoctype( whichDoctypePublic( doc ), whichDoctypeSystem( doc ) ); setMediaType( whichMediaType( getMethod() ) ); } /** * Constructs a new output format with the proper method, * document type identifiers and media type for the specified * document, and with the specified encoding. If <tt>indent</tt> * is true, the document will be pretty printed with the default * indentation level and default line wrapping. * * @param doc The document to output * @param encoding The specified encoding * @param indenting True for pretty printing * @see #setEncoding * @see #setIndenting * @see #whichMethod */ public OutputFormat( Document doc, String encoding, boolean indenting ) { this( doc ); setEncoding( encoding ); setIndenting( indenting ); } /** * Returns the method specified for this output format. * Typically the method will be <tt>xml</tt>, <tt>html</tt> * or <tt>text</tt>, but it might be other values. * If no method was specified, null will be returned * and the most suitable method will be determined for * the document by calling {@link #whichMethod}. * * @return The specified output method, or null */ public String getMethod() { return _method; } /** * Sets the method for this output format. * * @see #getMethod * @param method The output method, or null */ public void setMethod( String method ) { _method = method; } /** * Returns the version for this output method. * If no version was specified, will return null * and the default version number will be used. * If the serializerr does not support that particular * version, it should default to a supported version. * * @return The specified method version, or null */ public String getVersion() { return _version; } /** * Sets the version for this output method. * For XML the value would be "1.0", for HTML * it would be "4.0". * * @see #getVersion * @param version The output method version, or null */ public void setVersion( String version ) { _version = version; } /** * Returns the indentation specified. If no indentation * was specified, zero is returned and the document * should not be indented. * * @return The indentation or zero * @see #setIndenting */ public int getIndent() { return _indent; } /** * Returns true if indentation was specified. */ public boolean getIndenting() { return ( _indent > 0 ); } /** * Sets the indentation. The document will not be * indented if the indentation is set to zero. * Calling {@link #setIndenting} will reset this * value to zero (off) or the default (on). * * @param indent The indentation, or zero */ public void setIndent( int indent ) { if ( indent < 0 ) _indent = 0; else _indent = indent; } /** * Sets the indentation on and off. When set on, the default * indentation level and default line wrapping is used * (see {@link Defaults#Indent} and {@link Defaults#LineWidth}). * To specify a different indentation level or line wrapping, * use {@link #setIndent} and {@link #setLineWidth}. * * @param on True if indentation should be on */ public void setIndenting( boolean on ) { if ( on ) { _indent = Defaults.Indent; _lineWidth = Defaults.LineWidth; } else { _indent = 0; _lineWidth = 0; } } /** * Returns the specified encoding. If no encoding was * specified, the default is always "UTF-8". * * @return The encoding */ public String getEncoding() { return _encoding; } /** * Sets the encoding for this output method. If no * encoding was specified, the default is always "UTF-8". * Make sure the encoding is compatible with the one * used by the {@link java.io.Writer}. * * @see #getEncoding * @param encoding The encoding, or null */ public void setEncoding( String encoding ) { _encoding = encoding; _encodingInfo = null; } /** * Sets the encoding for this output method with an <code>EncodingInfo</code> * instance. */ public void setEncoding(EncodingInfo encInfo) { _encoding = encInfo.getIANAName(); _encodingInfo = encInfo; } /** * Returns an <code>EncodingInfo<code> instance for the encoding. * * @see #setEncoding */ public EncodingInfo getEncodingInfo() throws UnsupportedEncodingException { if (_encodingInfo == null) _encodingInfo = Encodings.getEncodingInfo(_encoding, _allowJavaNames); return _encodingInfo; } /** * Sets whether java encoding names are permitted */ public void setAllowJavaNames (boolean allow) { _allowJavaNames = allow; } /** * Returns whether java encoding names are permitted */ public boolean setAllowJavaNames () { return _allowJavaNames; } /** * Returns the specified media type, or null. * To determine the media type based on the * document type, use {@link #whichMediaType}. * * @return The specified media type, or null */ public String getMediaType() { return _mediaType; } /** * Sets the media type. * * @see #getMediaType * @param mediaType The specified media type */ public void setMediaType( String mediaType ) { _mediaType = mediaType; } /** * Sets the document type public and system identifiers. * Required only if the DOM Document or SAX events do not * specify the document type, and one must be present in * the serialized document. Any document type specified * by the DOM Document or SAX events will override these * values. * * @param publicId The public identifier, or null * @param systemId The system identifier, or null */ public void setDoctype( String publicId, String systemId ) { _doctypePublic = publicId; _doctypeSystem = systemId; } /** * Returns the specified document type public identifier, * or null. */ public String getDoctypePublic() { return _doctypePublic; } /** * Returns the specified document type system identifier, * or null. */ public String getDoctypeSystem() { return _doctypeSystem; } /** * Returns true if comments should be ommited. * The default is false. */ public boolean getOmitComments() { return _omitComments; } /** * Sets comment omitting on and off. * * @param omit True if comments should be ommited */ public void setOmitComments( boolean omit ) { _omitComments = omit; } /** * Returns true if the DOCTYPE declaration should * be ommited. The default is false. */ public boolean getOmitDocumentType() { return _omitDoctype; } /** * Sets DOCTYPE declaration omitting on and off. * * @param omit True if DOCTYPE declaration should be ommited */ public void setOmitDocumentType( boolean omit ) { _omitDoctype = omit; } /** * Returns true if the XML document declaration should * be ommited. The default is false. */ public boolean getOmitXMLDeclaration() { return _omitXmlDeclaration; } /** * Sets XML declaration omitting on and off. * * @param omit True if XML declaration should be ommited */ public void setOmitXMLDeclaration( boolean omit ) { _omitXmlDeclaration = omit; } /** * Returns true if the document type is standalone. * The default is false. */ public boolean getStandalone() { return _standalone; } /** * Sets document DTD standalone. The public and system * identifiers must be null for the document to be * serialized as standalone. * * @param standalone True if document DTD is standalone */ public void setStandalone( boolean standalone ) { _standalone = standalone; } /** * Returns a list of all the elements whose text node children * should be output as CDATA, or null if no such elements were * specified. */ public String[] getCDataElements() { return _cdataElements; } /** * Returns true if the text node children of the given elements * should be output as CDATA. * * @param tagName The element's tag name * @return True if should serialize as CDATA */ public boolean isCDataElement( String tagName ) { int i; if ( _cdataElements == null ) return false; for ( i = 0 ; i < _cdataElements.length ; ++i ) if ( _cdataElements[ i ].equals( tagName ) ) return true; return false; } /** * Sets the list of elements for which text node children * should be output as CDATA. * * @param cdataElements List of CDATA element tag names */ public void setCDataElements( String[] cdataElements ) { _cdataElements = cdataElements; } /** * Returns a list of all the elements whose text node children * should be output unescaped (no character references), or null * if no such elements were specified. */ public String[] getNonEscapingElements() { return _nonEscapingElements; } /** * Returns true if the text node children of the given elements * should be output unescaped. * * @param tagName The element's tag name * @return True if should serialize unescaped */ public boolean isNonEscapingElement( String tagName ) { int i; if ( _nonEscapingElements == null ) { return false; } for ( i = 0 ; i < _nonEscapingElements.length ; ++i ) if ( _nonEscapingElements[ i ].equals( tagName ) ) return true; return false; } /** * Sets the list of elements for which text node children * should be output unescaped (no character references). * * @param nonEscapingElements List of unescaped element tag names */ public void setNonEscapingElements( String[] nonEscapingElements ) { _nonEscapingElements = nonEscapingElements; } /** * Returns a specific line separator to use. The default is the * Web line separator (<tt>\n</tt>). A string is returned to * support double codes (CR + LF). * * @return The specified line separator */ public String getLineSeparator() { return _lineSeparator; } /** * Sets the line separator. The default is the Web line separator * (<tt>\n</tt>). The machine's line separator can be obtained * from the system property <tt>line.separator</tt>, but is only * useful if the document is edited on machines of the same type. * For general documents, use the Web line separator. * * @param lineSeparator The specified line separator */ public void setLineSeparator( String lineSeparator ) { if ( lineSeparator == null ) _lineSeparator = LineSeparator.Web; else _lineSeparator = lineSeparator; } /** * Returns true if the default behavior for this format is to * preserve spaces. All elements that do not specify otherwise * or specify the default behavior will be formatted based on * this rule. All elements that specify space preserving will * always preserve space. */ public boolean getPreserveSpace() { return _preserve; } /** * Sets space preserving as the default behavior. The default is * space stripping and all elements that do not specify otherwise * or use the default value will not preserve spaces. * * @param preserve True if spaces should be preserved */ public void setPreserveSpace( boolean preserve ) { _preserve = preserve; } /** * Return the selected line width for breaking up long lines. * When indenting, and only when indenting, long lines will be * broken at space boundaries based on this line width. * No line wrapping occurs if this value is zero. */ public int getLineWidth() { return _lineWidth; } /** * Sets the line width. If zero then no line wrapping will * occur. Calling {@link #setIndenting} will reset this * value to zero (off) or the default (on). * * @param lineWidth The line width to use, zero for default * @see #getLineWidth * @see #setIndenting */ public void setLineWidth( int lineWidth ) { if ( lineWidth <= 0 ) _lineWidth = 0; else _lineWidth = lineWidth; } /** * Returns the preserveEmptyAttribute flag. If flag is false, then' * attributes with empty string values are output as the attribute * name only (in HTML mode). * @return preserve the preserve flag */ public boolean getPreserveEmptyAttributes () { return _preserveEmptyAttributes; } /** * Sets the preserveEmptyAttribute flag. If flag is false, then' * attributes with empty string values are output as the attribute * name only (in HTML mode). * @param preserve the preserve flag */ public void setPreserveEmptyAttributes (boolean preserve) { _preserveEmptyAttributes = preserve; } /** * Returns the last printable character based on the selected * encoding. Control characters and non-printable characters * are always printed as character references. */ public char getLastPrintable() { if ( getEncoding() != null && ( getEncoding().equalsIgnoreCase( "ASCII" ) ) ) return 0xFF; else return 0xFFFF; } /** * Determine the output method for the specified document. * If the document is an instance of {@link org.w3c.dom.html.HTMLDocument} * then the method is said to be <tt>html</tt>. If the root * element is 'html' and all text nodes preceding the root * element are all whitespace, then the method is said to be * <tt>html</tt>. Otherwise the method is <tt>xml</tt>. * * @param doc The document to check * @return The suitable method */ public static String whichMethod( Document doc ) { Node node; String value; int i; // If document is derived from HTMLDocument then the default // method is html. if ( doc instanceof HTMLDocument ) return Method.HTML; // Lookup the root element and the text nodes preceding it. // If root element is html and all text nodes contain whitespace // only, the method is html. // FIXME (SM) should we care about namespaces here? node = doc.getFirstChild(); while (node != null) { // If the root element is html, the method is html. if ( node.getNodeType() == Node.ELEMENT_NODE ) { if ( node.getNodeName().equalsIgnoreCase( "html" ) ) { return Method.HTML; } else if ( node.getNodeName().equalsIgnoreCase( "root" ) ) { return Method.FOP; } else { return Method.XML; } } else if ( node.getNodeType() == Node.TEXT_NODE ) { // If a text node preceding the root element contains // only whitespace, this might be html, otherwise it's // definitely xml. value = node.getNodeValue(); for ( i = 0 ; i < value.length() ; ++i ) if ( value.charAt( i ) != 0x20 && value.charAt( i ) != 0x0A && value.charAt( i ) != 0x09 && value.charAt( i ) != 0x0D ) return Method.XML; } node = node.getNextSibling(); } // Anything else, the method is xml. return Method.XML; } /** * Returns the document type public identifier * specified for this document, or null. */ public static String whichDoctypePublic( Document doc ) { DocumentType doctype; /* DOM Level 2 was introduced into the code base*/ doctype = doc.getDoctype(); if ( doctype != null ) { // Note on catch: DOM Level 1 does not specify this method // and the code will throw a NoSuchMethodError try { return doctype.getPublicId(); } catch ( Error except ) { } } if ( doc instanceof HTMLDocument ) return DTD.XHTMLPublicId; return null; } /** * Returns the document type system identifier * specified for this document, or null. */ public static String whichDoctypeSystem( Document doc ) { DocumentType doctype; /* DOM Level 2 was introduced into the code base*/ doctype = doc.getDoctype(); if ( doctype != null ) { // Note on catch: DOM Level 1 does not specify this method // and the code will throw a NoSuchMethodError try { return doctype.getSystemId(); } catch ( Error except ) { } } if ( doc instanceof HTMLDocument ) return DTD.XHTMLSystemId; return null; } /** * Returns the suitable media format for a document * output with the specified method. */ public static String whichMediaType( String method ) { if ( method.equalsIgnoreCase( Method.XML ) ) return "text/xml"; if ( method.equalsIgnoreCase( Method.HTML ) ) return "text/html"; if ( method.equalsIgnoreCase( Method.XHTML ) ) return "text/html"; if ( method.equalsIgnoreCase( Method.TEXT ) ) return "text/plain"; if ( method.equalsIgnoreCase( Method.FOP ) ) return "application/pdf"; return null; } }