/* * @(#)Configuration.java 1.11 2000/08/16 * */ package org.w3c.tidy; /** * * Read configuration file and manage configuration properties. * * (c) 1998-2000 (W3C) MIT, INRIA, Keio University * See Tidy.java for the copyright notice. * Derived from <a href="http://www.w3.org/People/Raggett/tidy"> * HTML Tidy Release 4 Aug 2000</a> * * @author Dave Raggett <dsr@w3.org> * @author Andy Quick <ac.quick@sympatico.ca> (translation to Java) * @version 1.0, 1999/05/22 * @version 1.0.1, 1999/05/29 * @version 1.1, 1999/06/18 Java Bean * @version 1.2, 1999/07/10 Tidy Release 7 Jul 1999 * @version 1.3, 1999/07/30 Tidy Release 26 Jul 1999 * @version 1.4, 1999/09/04 DOM support * @version 1.5, 1999/10/23 Tidy Release 27 Sep 1999 * @version 1.6, 1999/11/01 Tidy Release 22 Oct 1999 * @version 1.7, 1999/12/06 Tidy Release 30 Nov 1999 * @version 1.8, 2000/01/22 Tidy Release 13 Jan 2000 * @version 1.9, 2000/06/03 Tidy Release 30 Apr 2000 * @version 1.10, 2000/07/22 Tidy Release 8 Jul 2000 * @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000 */ /* Configuration files associate a property name with a value. The format is that of a Java .properties file. */ import java.util.Enumeration; import java.util.Properties; import java.util.StringTokenizer; import java.io.FileInputStream; import java.io.IOException; public class Configuration implements java.io.Serializable { private static final long serialVersionUID = 1l; /* character encodings */ public static final int RAW = 0; public static final int ASCII = 1; public static final int LATIN1 = 2; public static final int UTF8 = 3; public static final int ISO2022 = 4; public static final int MACROMAN = 5; /* mode controlling treatment of doctype */ public static final int DOCTYPE_OMIT = 0; public static final int DOCTYPE_AUTO = 1; public static final int DOCTYPE_STRICT= 2; public static final int DOCTYPE_LOOSE = 3; public static final int DOCTYPE_USER = 4; protected int spaces = 2; /* default indentation */ protected int wraplen = 68; /* default wrap margin */ protected int CharEncoding = ASCII; protected int tabsize = 4; protected int docTypeMode = DOCTYPE_AUTO; /* see doctype property */ protected String altText = null; /* default text for alt attribute */ protected String slidestyle = null; /* style sheet for slides */ protected String docTypeStr = null; /* user specified doctype */ protected String errfile = null; /* file name to write errors to */ protected boolean writeback = false; /* if true then output tidied markup */ protected boolean OnlyErrors = false; /* if true normal output is suppressed */ protected boolean ShowWarnings = true; /* however errors are always shown */ protected boolean Quiet = false; /* no 'Parsing X', guessed DTD or summary */ protected boolean IndentContent = false; /* indent content of appropriate tags */ protected boolean SmartIndent = false; /* does text/block level content effect indentation */ protected boolean HideEndTags = false; /* suppress optional end tags */ protected boolean XmlTags = false; /* treat input as XML */ protected boolean XmlOut = false; /* create output as XML */ protected boolean xHTML = false; /* output extensible HTML */ protected boolean XmlPi = false; /* add <?xml?> for XML docs */ protected boolean RawOut = false; /* avoid mapping values > 127 to entities */ protected boolean UpperCaseTags = false; /* output tags in upper not lower case */ protected boolean UpperCaseAttrs = false; /* output attributes in upper not lower case */ protected boolean MakeClean = false; /* remove presentational clutter */ protected boolean LogicalEmphasis = false; /* replace i by em and b by strong */ protected boolean DropFontTags = false; /* discard presentation tags */ protected boolean DropEmptyParas = true; /* discard empty p elements */ protected boolean FixComments = true; /* fix comments with adjacent hyphens */ protected boolean BreakBeforeBR = false; /* o/p newline before <br> or not? */ protected boolean BurstSlides = false; /* create slides on each h2 element */ protected boolean NumEntities = false; /* use numeric entities */ protected boolean QuoteMarks = false; /* output " marks as " */ protected boolean QuoteNbsp = true; /* output non-breaking space as entity */ protected boolean QuoteAmpersand = true; /* output naked ampersand as & */ protected boolean WrapAttVals = false; /* wrap within attribute values */ protected boolean WrapScriptlets = false; /* wrap within JavaScript string literals */ protected boolean WrapSection = true; /* wrap within <![ ... ]> section tags */ protected boolean WrapAsp = true; /* wrap within ASP pseudo elements */ protected boolean WrapJste = true; /* wrap within JSTE pseudo elements */ protected boolean WrapPhp = true; /* wrap within PHP pseudo elements */ protected boolean FixBackslash = true; /* fix URLs by replacing \ with / */ protected boolean IndentAttributes = false; /* newline+indent before each attribute */ protected boolean XmlPIs = false; /* if set to yes PIs must end with ?> */ protected boolean XmlSpace = false; /* if set to yes adds xml:space attr as needed */ protected boolean EncloseBodyText = false; /* if yes text at body is wrapped in <p>'s */ protected boolean EncloseBlockText = false; /* if yes text in blocks is wrapped in <p>'s */ protected boolean KeepFileTimes = true; /* if yes last modied time is preserved */ protected boolean Word2000 = false; /* draconian cleaning for Word2000 */ protected boolean TidyMark = true; /* add meta element indicating tidied doc */ protected boolean Emacs = false; /* if true format error output for GNU Emacs */ protected boolean LiteralAttribs = false; /* if true attributes may use newlines */ protected TagTable tt; /* TagTable associated with this Configuration */ private transient Properties _properties = new Properties(); public Configuration() { } public void addProps( Properties p ) { Enumeration propenum = p.propertyNames(); while (propenum.hasMoreElements()) { String key = (String) propenum.nextElement(); String value = p.getProperty(key); _properties.put(key, value); } parseProps(); } public void parseFile( String filename ) { try { _properties.load( new FileInputStream( filename ) ); } catch (IOException e) { System.err.println(filename + e.toString()); return; } parseProps(); } private void parseProps() { String value; value = _properties.getProperty("indent-spaces"); if (value != null) spaces = parseInt(value, "indent-spaces"); value = _properties.getProperty("wrap"); if (value != null) wraplen = parseInt(value, "wrap"); value = _properties.getProperty("wrap-attributes"); if (value != null) WrapAttVals = parseBool(value, "wrap-attributes"); value = _properties.getProperty("wrap-script-literals"); if (value != null) WrapScriptlets = parseBool(value, "wrap-script-literals"); value = _properties.getProperty("wrap-sections"); if (value != null) WrapSection = parseBool(value, "wrap-sections"); value = _properties.getProperty("wrap-asp"); if (value != null) WrapAsp = parseBool(value, "wrap-asp"); value = _properties.getProperty("wrap-jste"); if (value != null) WrapJste = parseBool(value, "wrap-jste"); value = _properties.getProperty("wrap-php"); if (value != null) WrapPhp = parseBool(value, "wrap-php"); value = _properties.getProperty("literal-attributes"); if (value != null) LiteralAttribs = parseBool(value, "literal-attributes"); value = _properties.getProperty("tab-size"); if (value != null) tabsize = parseInt(value, "tab-size"); value = _properties.getProperty("markup"); if (value != null) OnlyErrors = parseInvBool(value, "markup"); value = _properties.getProperty("quiet"); if (value != null) Quiet = parseBool(value, "quiet"); value = _properties.getProperty("tidy-mark"); if (value != null) TidyMark = parseBool(value, "tidy-mark"); value = _properties.getProperty("indent"); if (value != null) IndentContent = parseIndent(value, "indent"); value = _properties.getProperty("indent-attributes"); if (value != null) IndentAttributes = parseBool(value, "ident-attributes"); value = _properties.getProperty("hide-endtags"); if (value != null) HideEndTags = parseBool(value, "hide-endtags"); value = _properties.getProperty("input-xml"); if (value != null) XmlTags = parseBool(value, "input-xml"); value = _properties.getProperty("output-xml"); if (value != null) XmlOut = parseBool(value, "output-xml"); value = _properties.getProperty("output-xhtml"); if (value != null) xHTML = parseBool(value, "output-xhtml"); value = _properties.getProperty("add-xml-pi"); if (value != null) XmlPi = parseBool(value, "add-xml-pi"); value = _properties.getProperty("add-xml-decl"); if (value != null) XmlPi = parseBool(value, "add-xml-decl"); value = _properties.getProperty("assume-xml-procins"); if (value != null) XmlPIs = parseBool(value, "assume-xml-procins"); value = _properties.getProperty("raw"); if (value != null) RawOut = parseBool(value, "raw"); value = _properties.getProperty("uppercase-tags"); if (value != null) UpperCaseTags = parseBool(value, "uppercase-tags"); value = _properties.getProperty("uppercase-attributes"); if (value != null) UpperCaseAttrs = parseBool(value, "uppercase-attributes"); value = _properties.getProperty("clean"); if (value != null) MakeClean = parseBool(value, "clean"); value = _properties.getProperty("logical-emphasis"); if (value != null) LogicalEmphasis = parseBool(value, "logical-emphasis"); value = _properties.getProperty("word-2000"); if (value != null) Word2000 = parseBool(value, "word-2000"); value = _properties.getProperty("drop-empty-paras"); if (value != null) DropEmptyParas = parseBool(value, "drop-empty-paras"); value = _properties.getProperty("drop-font-tags"); if (value != null) DropFontTags = parseBool(value, "drop-font-tags"); value = _properties.getProperty("enclose-text"); if (value != null) EncloseBodyText = parseBool(value, "enclose-text"); value = _properties.getProperty("enclose-block-text"); if (value != null) EncloseBlockText = parseBool(value, "enclose-block-text"); value = _properties.getProperty("alt-text"); if (value != null) altText = value; value = _properties.getProperty("add-xml-space"); if (value != null) XmlSpace = parseBool(value, "add-xml-space"); value = _properties.getProperty("fix-bad-comments"); if (value != null) FixComments = parseBool(value, "fix-bad-comments"); value = _properties.getProperty("split"); if (value != null) BurstSlides = parseBool(value, "split"); value = _properties.getProperty("break-before-br"); if (value != null) BreakBeforeBR = parseBool(value, "break-before-br"); value = _properties.getProperty("numeric-entities"); if (value != null) NumEntities = parseBool(value, "numeric-entities"); value = _properties.getProperty("quote-marks"); if (value != null) QuoteMarks = parseBool(value, "quote-marks"); value = _properties.getProperty("quote-nbsp"); if (value != null) QuoteNbsp = parseBool(value, "quote-nbsp"); value = _properties.getProperty("quote-ampersand"); if (value != null) QuoteAmpersand = parseBool(value, "quote-ampersand"); value = _properties.getProperty("write-back"); if (value != null) writeback = parseBool(value, "write-back"); value = _properties.getProperty("keep-time"); if (value != null) KeepFileTimes = parseBool(value, "keep-time"); value = _properties.getProperty("show-warnings"); if (value != null) ShowWarnings = parseBool(value, "show-warnings"); value = _properties.getProperty("error-file"); if (value != null) errfile = parseName(value, "error-file"); value = _properties.getProperty("slide-style"); if (value != null) slidestyle = parseName(value, "slide-style"); value = _properties.getProperty("new-inline-tags"); if (value != null) parseInlineTagNames(value, "new-inline-tags"); value = _properties.getProperty("new-blocklevel-tags"); if (value != null) parseBlockTagNames(value, "new-blocklevel-tags"); value = _properties.getProperty("new-empty-tags"); if (value != null) parseEmptyTagNames(value, "new-empty-tags"); value = _properties.getProperty("new-pre-tags"); if (value != null) parsePreTagNames(value, "new-pre-tags"); value = _properties.getProperty("char-encoding"); if (value != null) CharEncoding = parseCharEncoding(value, "char-encoding"); value = _properties.getProperty("doctype"); if (value != null) docTypeStr = parseDocType(value, "doctype"); value = _properties.getProperty("fix-backslash"); if (value != null) FixBackslash = parseBool(value, "fix-backslash"); value = _properties.getProperty("gnu-emacs"); if (value != null) Emacs = parseBool(value, "gnu-emacs"); } /* ensure that config is self consistent */ public void adjust() { if (EncloseBlockText) EncloseBodyText = true; /* avoid the need to set IndentContent when SmartIndent is set */ if (SmartIndent) IndentContent = true; /* disable wrapping */ if (wraplen == 0) wraplen = 0x7FFFFFFF; /* Word 2000 needs o:p to be declared as inline */ if (Word2000) { tt.defineInlineTag("o:p"); } /* XHTML is written in lower case */ if (xHTML) { XmlOut = true; UpperCaseTags = false; UpperCaseAttrs = false; } /* if XML in, then XML out */ if (XmlTags) { XmlOut = true; XmlPIs = true; } /* XML requires end tags */ if (XmlOut) { QuoteAmpersand = true; HideEndTags = false; } } private static int parseInt( String s, String option ) { int i = 0; try { i = Integer.parseInt( s ); } catch ( NumberFormatException e ) { Report.badArgument(option); i = -1; } return i; } private static boolean parseBool( String s, String option ) { boolean b = false; if ( s != null && s.length() > 0 ) { char c = s.charAt(0); if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1')) b = true; else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0')) b = false; else Report.badArgument(option); } return b; } private static boolean parseInvBool( String s, String option ) { boolean b = false; if ( s != null && s.length() > 0 ) { char c = s.charAt(0); if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y')) b = true; else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n')) b = false; else Report.badArgument(option); } return !b; } private static String parseName( String s, String option ) { StringTokenizer t = new StringTokenizer( s ); String rs = null; if ( t.countTokens() >= 1 ) rs = t.nextToken(); else Report.badArgument(option); return rs; } private static int parseCharEncoding( String s, String option ) { int result = ASCII; if (Lexer.wstrcasecmp(s, "ascii") == 0) result = ASCII; else if (Lexer.wstrcasecmp(s, "latin1") == 0) result = LATIN1; else if (Lexer.wstrcasecmp(s, "raw") == 0) result = RAW; else if (Lexer.wstrcasecmp(s, "utf8") == 0) result = UTF8; else if (Lexer.wstrcasecmp(s, "iso2022") == 0) result = ISO2022; else if (Lexer.wstrcasecmp(s, "mac") == 0) result = MACROMAN; else Report.badArgument(option); return result; } /* slight hack to avoid changes to pprint.c */ private boolean parseIndent( String s, String option ) { boolean b = IndentContent; if (Lexer.wstrcasecmp(s, "yes") == 0) { b = true; SmartIndent = false; } else if (Lexer.wstrcasecmp(s, "true") == 0) { b = true; SmartIndent = false; } else if (Lexer.wstrcasecmp(s, "no") == 0) { b = false; SmartIndent = false; } else if (Lexer.wstrcasecmp(s, "false") == 0) { b = false; SmartIndent = false; } else if (Lexer.wstrcasecmp(s, "auto") == 0) { b = true; SmartIndent = true; } else Report.badArgument(option); return b; } private void parseInlineTagNames( String s, String option ) { StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); while ( t.hasMoreTokens() ) { tt.defineInlineTag( t.nextToken() ); } } private void parseBlockTagNames( String s, String option ) { StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); while ( t.hasMoreTokens() ) { tt.defineBlockTag( t.nextToken() ); } } private void parseEmptyTagNames( String s, String option ) { StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); while ( t.hasMoreTokens() ) { tt.defineEmptyTag( t.nextToken() ); } } private void parsePreTagNames( String s, String option ) { StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); while ( t.hasMoreTokens() ) { tt.definePreTag( t.nextToken() ); } } /* doctype: omit | auto | strict | loose | <fpi> where the fpi is a string similar to "-//ACME//DTD HTML 3.14159//EN" */ protected String parseDocType( String s, String option ) { s = s.trim(); /* "-//ACME//DTD HTML 3.14159//EN" or similar */ if (s.startsWith("\"")) { docTypeMode = DOCTYPE_USER; return s; } /* read first word */ String word = ""; StringTokenizer t = new StringTokenizer( s, " \t\n\r," ); if (t.hasMoreTokens()) word = t.nextToken(); if (Lexer.wstrcasecmp(word, "omit") == 0) docTypeMode = DOCTYPE_OMIT; else if (Lexer.wstrcasecmp(word, "strict") == 0) docTypeMode = DOCTYPE_STRICT; else if (Lexer.wstrcasecmp(word, "loose") == 0 || Lexer.wstrcasecmp(word, "transitional") == 0) docTypeMode = DOCTYPE_LOOSE; else if (Lexer.wstrcasecmp(word, "auto") == 0) docTypeMode = DOCTYPE_AUTO; else { docTypeMode = DOCTYPE_AUTO; Report.badArgument(option); } return null; } }