/* * Copyright (c) 1999, 2000, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package javax.swing.text.html; import java.io.*; /** {@collect.stats} * A CSS parser. This works by way of a delegate that implements the * CSSParserCallback interface. The delegate is notified of the following * events: * <ul> * <li>Import statement: <code>handleImport</code> * <li>Selectors <code>handleSelector</code>. This is invoked for each * string. For example if the Reader contained p, bar , a {}, the delegate * would be notified 4 times, for 'p,' 'bar' ',' and 'a'. * <li>When a rule starts, <code>startRule</code> * <li>Properties in the rule via the <code>handleProperty</code>. This * is invoked one per property/value key, eg font size: foo;, would * cause the delegate to be notified once with a value of 'font size'. * <li>Values in the rule via the <code>handleValue</code>, this is notified * for the total value. * <li>When a rule ends, <code>endRule</code> * </ul> * This will parse much more than CSS 1, and loosely implements the * recommendation for <i>Forward-compatible parsing</i> in section * 7.1 of the CSS spec found at: * <a href=http://www.w3.org/TR/REC-CSS1>http://www.w3.org/TR/REC-CSS1</a>. * If an error results in parsing, a RuntimeException will be thrown. * <p> * This will preserve case. If the callback wishes to treat certain poritions * case insensitively (such as selectors), it should use toLowerCase, or * something similar. * * @author Scott Violet */ class CSSParser { // Parsing something like the following: // (@rule | ruleset | block)* // // @rule (block | identifier)*; (block with {} ends @rule) // block matching [] () {} (that is, [()] is a block, [(){}{[]}] // is a block, ()[] is two blocks) // identifier "*" | '*' | anything but a [](){} and whitespace // // ruleset selector decblock // selector (identifier | (block, except block '{}') )* // declblock declaration* block* // declaration (identifier* stopping when identifier ends with :) // (identifier* stopping when identifier ends with ;) // // comments /* */ can appear any where, and are stripped. // identifier - letters, digits, dashes and escaped characters // block starts with { ends with matching }, () [] and {} always occur // in matching pairs, '' and "" also occur in pairs, except " may be // Indicates the type of token being parsed. private static final int IDENTIFIER = 1; private static final int BRACKET_OPEN = 2; private static final int BRACKET_CLOSE = 3; private static final int BRACE_OPEN = 4; private static final int BRACE_CLOSE = 5; private static final int PAREN_OPEN = 6; private static final int PAREN_CLOSE = 7; private static final int END = -1; private static final char[] charMapping = { 0, 0, '[', ']', '{', '}', '(', ')', 0}; /** {@collect.stats} Set to true if one character has been read ahead. */ private boolean didPushChar; /** {@collect.stats} The read ahead character. */ private int pushedChar; /** {@collect.stats} Temporary place to hold identifiers. */ private StringBuffer unitBuffer; /** {@collect.stats} Used to indicate blocks. */ private int[] unitStack; /** {@collect.stats} Number of valid blocks. */ private int stackCount; /** {@collect.stats} Holds the incoming CSS rules. */ private Reader reader; /** {@collect.stats} Set to true when the first non @ rule is encountered. */ private boolean encounteredRuleSet; /** {@collect.stats} Notified of state. */ private CSSParserCallback callback; /** {@collect.stats} nextToken() inserts the string here. */ private char[] tokenBuffer; /** {@collect.stats} Current number of chars in tokenBufferLength. */ private int tokenBufferLength; /** {@collect.stats} Set to true if any whitespace is read. */ private boolean readWS; // The delegate interface. static interface CSSParserCallback { /** {@collect.stats} Called when an @import is encountered. */ void handleImport(String importString); // There is currently no way to distinguish between '"foo,"' and // 'foo,'. But this generally isn't valid CSS. If it becomes // a problem, handleSelector will have to be told if the string is // quoted. void handleSelector(String selector); void startRule(); // Property names are mapped to lower case before being passed to // the delegate. void handleProperty(String property); void handleValue(String value); void endRule(); } CSSParser() { unitStack = new int[2]; tokenBuffer = new char[80]; unitBuffer = new StringBuffer(); } void parse(Reader reader, CSSParserCallback callback, boolean inRule) throws IOException { this.callback = callback; stackCount = tokenBufferLength = 0; this.reader = reader; encounteredRuleSet = false; try { if (inRule) { parseDeclarationBlock(); } else { while (getNextStatement()); } } finally { callback = null; reader = null; } } /** {@collect.stats} * Gets the next statement, returning false if the end is reached. A * statement is either an @rule, or a ruleset. */ private boolean getNextStatement() throws IOException { unitBuffer.setLength(0); int token = nextToken((char)0); switch (token) { case IDENTIFIER: if (tokenBufferLength > 0) { if (tokenBuffer[0] == '@') { parseAtRule(); } else { encounteredRuleSet = true; parseRuleSet(); } } return true; case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN: parseTillClosed(token); return true; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: // Shouldn't happen... throw new RuntimeException("Unexpected top level block close"); case END: return false; } return true; } /** {@collect.stats} * Parses an @ rule, stopping at a matching brace pair, or ;. */ private void parseAtRule() throws IOException { // PENDING: make this more effecient. boolean done = false; boolean isImport = (tokenBufferLength == 7 && tokenBuffer[0] == '@' && tokenBuffer[1] == 'i' && tokenBuffer[2] == 'm' && tokenBuffer[3] == 'p' && tokenBuffer[4] == 'o' && tokenBuffer[5] == 'r' && tokenBuffer[6] == 't'); unitBuffer.setLength(0); while (!done) { int nextToken = nextToken(';'); switch (nextToken) { case IDENTIFIER: if (tokenBufferLength > 0 && tokenBuffer[tokenBufferLength - 1] == ';') { --tokenBufferLength; done = true; } if (tokenBufferLength > 0) { if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' '); } unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } break; case BRACE_OPEN: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' '); } unitBuffer.append(charMapping[nextToken]); parseTillClosed(nextToken); done = true; // Skip a tailing ';', not really to spec. { int nextChar = readWS(); if (nextChar != -1 && nextChar != ';') { pushChar(nextChar); } } break; case BRACKET_OPEN: case PAREN_OPEN: unitBuffer.append(charMapping[nextToken]); parseTillClosed(nextToken); break; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: throw new RuntimeException("Unexpected close in @ rule"); case END: done = true; break; } } if (isImport && !encounteredRuleSet) { callback.handleImport(unitBuffer.toString()); } } /** {@collect.stats} * Parses the next rule set, which is a selector followed by a * declaration block. */ private void parseRuleSet() throws IOException { if (parseSelectors()) { callback.startRule(); parseDeclarationBlock(); callback.endRule(); } } /** {@collect.stats} * Parses a set of selectors, returning false if the end of the stream * is reached. */ private boolean parseSelectors() throws IOException { // Parse the selectors int nextToken; if (tokenBufferLength > 0) { callback.handleSelector(new String(tokenBuffer, 0, tokenBufferLength)); } unitBuffer.setLength(0); for (;;) { while ((nextToken = nextToken((char)0)) == IDENTIFIER) { if (tokenBufferLength > 0) { callback.handleSelector(new String(tokenBuffer, 0, tokenBufferLength)); } } switch (nextToken) { case BRACE_OPEN: return true; case BRACKET_OPEN: case PAREN_OPEN: parseTillClosed(nextToken); // Not too sure about this, how we handle this isn't very // well spec'd. unitBuffer.setLength(0); break; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: throw new RuntimeException("Unexpected block close in selector"); case END: // Prematurely hit end. return false; } } } /** {@collect.stats} * Parses a declaration block. Which a number of declarations followed * by a })]. */ private void parseDeclarationBlock() throws IOException { for (;;) { int token = parseDeclaration(); switch (token) { case END: case BRACE_CLOSE: return; case BRACKET_CLOSE: case PAREN_CLOSE: // Bail throw new RuntimeException("Unexpected close in declaration block"); case IDENTIFIER: break; } } } /** {@collect.stats} * Parses a single declaration, which is an identifier a : and another * identifier. This returns the last token seen. */ // identifier+: identifier* ;|} private int parseDeclaration() throws IOException { int token; if ((token = parseIdentifiers(':', false)) != IDENTIFIER) { return token; } // Make the property name to lowercase for (int counter = unitBuffer.length() - 1; counter >= 0; counter--) { unitBuffer.setCharAt(counter, Character.toLowerCase (unitBuffer.charAt(counter))); } callback.handleProperty(unitBuffer.toString()); token = parseIdentifiers(';', true); callback.handleValue(unitBuffer.toString()); return token; } /** {@collect.stats} * Parses identifiers until <code>extraChar</code> is encountered, * returning the ending token, which will be IDENTIFIER if extraChar * is found. */ private int parseIdentifiers(char extraChar, boolean wantsBlocks) throws IOException { int nextToken; int ubl; unitBuffer.setLength(0); for (;;) { nextToken = nextToken(extraChar); switch (nextToken) { case IDENTIFIER: if (tokenBufferLength > 0) { if (tokenBuffer[tokenBufferLength - 1] == extraChar) { if (--tokenBufferLength > 0) { if (readWS && unitBuffer.length() > 0) { unitBuffer.append(' '); } unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } return IDENTIFIER; } if (readWS && unitBuffer.length() > 0) { unitBuffer.append(' '); } unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } break; case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN: ubl = unitBuffer.length(); if (wantsBlocks) { unitBuffer.append(charMapping[nextToken]); } parseTillClosed(nextToken); if (!wantsBlocks) { unitBuffer.setLength(ubl); } break; case BRACE_CLOSE: // No need to throw for these two, we return token and // caller can do whatever. case BRACKET_CLOSE: case PAREN_CLOSE: case END: // Hit the end return nextToken; } } } /** {@collect.stats} * Parses till a matching block close is encountered. This is only * appropriate to be called at the top level (no nesting). */ private void parseTillClosed(int openToken) throws IOException { int nextToken; boolean done = false; startBlock(openToken); while (!done) { nextToken = nextToken((char)0); switch (nextToken) { case IDENTIFIER: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' '); } if (tokenBufferLength > 0) { unitBuffer.append(tokenBuffer, 0, tokenBufferLength); } break; case BRACKET_OPEN: case BRACE_OPEN: case PAREN_OPEN: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' '); } unitBuffer.append(charMapping[nextToken]); startBlock(nextToken); break; case BRACKET_CLOSE: case BRACE_CLOSE: case PAREN_CLOSE: if (unitBuffer.length() > 0 && readWS) { unitBuffer.append(' '); } unitBuffer.append(charMapping[nextToken]); endBlock(nextToken); if (!inBlock()) { done = true; } break; case END: // Prematurely hit end. throw new RuntimeException("Unclosed block"); } } } /** {@collect.stats} * Fetches the next token. */ private int nextToken(char idChar) throws IOException { readWS = false; int nextChar = readWS(); switch (nextChar) { case '\'': readTill('\''); if (tokenBufferLength > 0) { tokenBufferLength--; } return IDENTIFIER; case '"': readTill('"'); if (tokenBufferLength > 0) { tokenBufferLength--; } return IDENTIFIER; case '[': return BRACKET_OPEN; case ']': return BRACKET_CLOSE; case '{': return BRACE_OPEN; case '}': return BRACE_CLOSE; case '(': return PAREN_OPEN; case ')': return PAREN_CLOSE; case -1: return END; default: pushChar(nextChar); getIdentifier(idChar); return IDENTIFIER; } } /** {@collect.stats} * Gets an identifier, returning true if the length of the string is greater than 0, * stopping when <code>stopChar</code>, whitespace, or one of {}()[] is * hit. */ // NOTE: this could be combined with readTill, as they contain somewhat // similiar functionality. private boolean getIdentifier(char stopChar) throws IOException { boolean lastWasEscape = false; boolean done = false; int escapeCount = 0; int escapeChar = 0; int nextChar; int intStopChar = (int)stopChar; // 1 for '\', 2 for valid escape char [0-9a-fA-F], 3 for // stop character (white space, ()[]{}) 0 otherwise short type; int escapeOffset = 0; tokenBufferLength = 0; while (!done) { nextChar = readChar(); switch (nextChar) { case '\\': type = 1; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': type = 2; escapeOffset = nextChar - '0'; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': type = 2; escapeOffset = nextChar - 'a' + 10; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': type = 2; escapeOffset = nextChar - 'A' + 10; break; case '\'': case '"': case '[': case ']': case '{': case '}': case '(': case ')': case ' ': case '\n': case '\t': case '\r': type = 3; break; case '/': type = 4; break; case -1: // Reached the end done = true; type = 0; break; default: type = 0; break; } if (lastWasEscape) { if (type == 2) { // Continue with escape. escapeChar = escapeChar * 16 + escapeOffset; if (++escapeCount == 4) { lastWasEscape = false; append((char)escapeChar); } } else { // no longer escaped lastWasEscape = false; if (escapeCount > 0) { append((char)escapeChar); // Make this simpler, reprocess the character. pushChar(nextChar); } else if (!done) { append((char)nextChar); } } } else if (!done) { if (type == 1) { lastWasEscape = true; escapeChar = escapeCount = 0; } else if (type == 3) { done = true; pushChar(nextChar); } else if (type == 4) { // Potential comment nextChar = readChar(); if (nextChar == '*') { done = true; readComment(); readWS = true; } else { append('/'); if (nextChar == -1) { done = true; } else { pushChar(nextChar); } } } else { append((char)nextChar); if (nextChar == intStopChar) { done = true; } } } } return (tokenBufferLength > 0); } /** {@collect.stats} * Reads till a <code>stopChar</code> is encountered, escaping characters * as necessary. */ private void readTill(char stopChar) throws IOException { boolean lastWasEscape = false; int escapeCount = 0; int escapeChar = 0; int nextChar; boolean done = false; int intStopChar = (int)stopChar; // 1 for '\', 2 for valid escape char [0-9a-fA-F], 0 otherwise short type; int escapeOffset = 0; tokenBufferLength = 0; while (!done) { nextChar = readChar(); switch (nextChar) { case '\\': type = 1; break; case '0': case '1': case '2': case '3': case '4':case '5': case '6': case '7': case '8': case '9': type = 2; escapeOffset = nextChar - '0'; break; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': type = 2; escapeOffset = nextChar - 'a' + 10; break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': type = 2; escapeOffset = nextChar - 'A' + 10; break; case -1: // Prematurely reached the end! throw new RuntimeException("Unclosed " + stopChar); default: type = 0; break; } if (lastWasEscape) { if (type == 2) { // Continue with escape. escapeChar = escapeChar * 16 + escapeOffset; if (++escapeCount == 4) { lastWasEscape = false; append((char)escapeChar); } } else { // no longer escaped if (escapeCount > 0) { append((char)escapeChar); if (type == 1) { lastWasEscape = true; escapeChar = escapeCount = 0; } else { if (nextChar == intStopChar) { done = true; } append((char)nextChar); lastWasEscape = false; } } else { append((char)nextChar); lastWasEscape = false; } } } else if (type == 1) { lastWasEscape = true; escapeChar = escapeCount = 0; } else { if (nextChar == intStopChar) { done = true; } append((char)nextChar); } } } private void append(char character) { if (tokenBufferLength == tokenBuffer.length) { char[] newBuffer = new char[tokenBuffer.length * 2]; System.arraycopy(tokenBuffer, 0, newBuffer, 0, tokenBuffer.length); tokenBuffer = newBuffer; } tokenBuffer[tokenBufferLength++] = character; } /** {@collect.stats} * Parses a comment block. */ private void readComment() throws IOException { int nextChar; for(;;) { nextChar = readChar(); switch (nextChar) { case -1: throw new RuntimeException("Unclosed comment"); case '*': nextChar = readChar(); if (nextChar == '/') { return; } else if (nextChar == -1) { throw new RuntimeException("Unclosed comment"); } else { pushChar(nextChar); } break; default: break; } } } /** {@collect.stats} * Called when a block start is encountered ({[. */ private void startBlock(int startToken) { if (stackCount == unitStack.length) { int[] newUS = new int[stackCount * 2]; System.arraycopy(unitStack, 0, newUS, 0, stackCount); unitStack = newUS; } unitStack[stackCount++] = startToken; } /** {@collect.stats} * Called when an end block is encountered )]} */ private void endBlock(int endToken) { int startToken; switch (endToken) { case BRACKET_CLOSE: startToken = BRACKET_OPEN; break; case BRACE_CLOSE: startToken = BRACE_OPEN; break; case PAREN_CLOSE: startToken = PAREN_OPEN; break; default: // Will never happen. startToken = -1; break; } if (stackCount > 0 && unitStack[stackCount - 1] == startToken) { stackCount--; } else { // Invalid state, should do something. throw new RuntimeException("Unmatched block"); } } /** {@collect.stats} * @return true if currently in a block. */ private boolean inBlock() { return (stackCount > 0); } /** {@collect.stats} * Skips any white space, returning the character after the white space. */ private int readWS() throws IOException { int nextChar; while ((nextChar = readChar()) != -1 && Character.isWhitespace((char)nextChar)) { readWS = true; } return nextChar; } /** {@collect.stats} * Reads a character from the stream. */ private int readChar() throws IOException { if (didPushChar) { didPushChar = false; return pushedChar; } return reader.read(); // Uncomment the following to do case insensitive parsing. /* if (retValue != -1) { return (int)Character.toLowerCase((char)retValue); } return retValue; */ } /** {@collect.stats} * Supports one character look ahead, this will throw if called twice * in a row. */ private void pushChar(int tempChar) { if (didPushChar) { // Should never happen. throw new RuntimeException("Can not handle look ahead of more than one character"); } didPushChar = true; pushedChar = tempChar; } }